bugguide 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +119 -0
- data/Rakefile +10 -0
- data/bin/bugguide +59 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/bugguide.gemspec +34 -0
- data/lib/bugguide.rb +14 -0
- data/lib/bugguide/exceptions.rb +5 -0
- data/lib/bugguide/photo.rb +110 -0
- data/lib/bugguide/taxon.rb +139 -0
- data/lib/bugguide/version.rb +3 -0
- metadata +165 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 65d0d1fc897a62290d360c53a69170de37e5d945
|
4
|
+
data.tar.gz: 9459ac30d9c3c0fac94ca677ed44e267208879a6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 34f6360407fd0d6f7677d90f5a6ab190b50c1302ddfedf8fba27eba29a93a8a30a64685d48fa23f36644cbc59feb66f2cc71b5bf85c17d5b98af7cf7921587b8
|
7
|
+
data.tar.gz: 430e4e576fcf040e20fed2c6c30a041d858c4f41f36ade7f621205f480a795232091f043540069f91adcc46bc95e8527cac2f6156174d573d87712c367b7badf
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
|
4
|
+
|
5
|
+
We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
|
6
|
+
|
7
|
+
Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.
|
8
|
+
|
9
|
+
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.
|
10
|
+
|
11
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
|
12
|
+
|
13
|
+
This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Ken-ichi Ueda
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
# BugGuide
|
2
|
+
|
3
|
+
Ruby gem for scraping data from [BugGuide.net](http://bugguide.net), an
|
4
|
+
excellent online community of entomologists sharing information about
|
5
|
+
terrestrial arthropods in North America. Sadly, BugGuide doesn't have an API, so
|
6
|
+
this gem is little more than a scraper focusing on their [advanced search
|
7
|
+
feature](http://bugguide.net/adv_search/bgsearch.php).
|
8
|
+
|
9
|
+
# Installation
|
10
|
+
|
11
|
+
I haven't posted this to rubygems yet, so you can just clone and install
|
12
|
+
locally:
|
13
|
+
|
14
|
+
```bash
|
15
|
+
git clone git@github.com:kueda/bugguide.git
|
16
|
+
cd bugguide
|
17
|
+
gem build bugguide.gemspec
|
18
|
+
gem install bugguide-0.1.1.gem
|
19
|
+
```
|
20
|
+
|
21
|
+
And of course bundler makes it pretty easy:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
gem 'bugguide', git: 'git@github.com:kueda/bugguide.git'
|
25
|
+
```
|
26
|
+
|
27
|
+
# Usage
|
28
|
+
|
29
|
+
## Search taxa
|
30
|
+
```ruby
|
31
|
+
BugGuide::Taxon.search('Apis mellifera').map(&:name)
|
32
|
+
["Apis mellifera", "Apis mellifera carnica", "Apis mellifera ligustica",
|
33
|
+
"Apis mellifera mellifera", "Apis mellifera scutellata"]
|
34
|
+
```
|
35
|
+
|
36
|
+
## Get common names
|
37
|
+
```ruby
|
38
|
+
BugGuide::Taxon.search('Apis mellifera').map(&:common_name)
|
39
|
+
["Western Honey Bee", "Carniolan Honeybee", "Italian Honeybee",
|
40
|
+
"Black Honeybee", "African Honeybee"]
|
41
|
+
```
|
42
|
+
|
43
|
+
## Get classification
|
44
|
+
```ruby
|
45
|
+
BugGuide::Taxon.search('Apis mellifera').first.ancestors.map(&:scientific_name)
|
46
|
+
["Arthropoda", "Hexapoda", "Insecta", "Hymenoptera", "Aculeata", "Apoidea",
|
47
|
+
"Apidae", "Apinae", "Apini", "Apis"]
|
48
|
+
BugGuide::Taxon.search('Apis mellifera').first.ancestors.map(&:rank)
|
49
|
+
["phylum", "subphylum", "class", "order", "no taxon", "no taxon", "family",
|
50
|
+
"subfamily", "tribe", "genus"]
|
51
|
+
```
|
52
|
+
|
53
|
+
Note that `name` is a verbatim name from BugGuide, while `common_name` and
|
54
|
+
`scientific_name` represent attempts to parse out those kind of names
|
55
|
+
specifically. It's also worth keeping in mind that retrieving things like a
|
56
|
+
classification require an additional request to BugGuide, so if you're doing it
|
57
|
+
for multiple taxa, maybe cut them some slack and throttle your requests.
|
58
|
+
|
59
|
+
|
60
|
+
## Search photos
|
61
|
+
|
62
|
+
Since this gem is just scraping the Advanced Search results, you will get
|
63
|
+
exceptions if your search returns too many results.
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
taxon = BugGuide::Taxon.search('Epinotia').first
|
67
|
+
BugGuide::Photo.search(taxon: taxon.id).map(&:thumbnail_url)
|
68
|
+
```
|
69
|
+
|
70
|
+
|
71
|
+
For more please check out the specs.
|
72
|
+
|
73
|
+
# Command line tool
|
74
|
+
|
75
|
+
Right now it only generates checklists:
|
76
|
+
|
77
|
+
```bash
|
78
|
+
> bugguide checklist Epinotia -s CA
|
79
|
+
# choose matching taxon
|
80
|
+
Found 82 photos of 15 taxa:
|
81
|
+
|
82
|
+
TAXON ID NAME PHOTO ID
|
83
|
+
54501 Epinotia 854559
|
84
|
+
185131 Epinotia albangulana 388400
|
85
|
+
579472 Epinotia albicapitana 579454
|
86
|
+
262585 Epinotia arctostaphylana 718670
|
87
|
+
452559 Epinotia castaneana 630199
|
88
|
+
725616 Epinotia cercocarpana 529759
|
89
|
+
262473 Epinotia emarginana 718673
|
90
|
+
378960 Epinotia hopkinsana 710406
|
91
|
+
241183 Epinotia kasloana 878601
|
92
|
+
723150 Epinotia nigralbana 546896
|
93
|
+
917995 Epinotia sagittana 917977
|
94
|
+
828578 Epinotia signiferana 1001290
|
95
|
+
472765 Epinotia subplicana 861801
|
96
|
+
261436 Epinotia subviridis 455558
|
97
|
+
481897 Epinotia terracoctana 458366
|
98
|
+
```
|
99
|
+
|
100
|
+
```bash
|
101
|
+
> bugguide checklist Epinotia -s CA -f csv
|
102
|
+
# choose matching taxon
|
103
|
+
TAXON ID,NAME,PHOTO ID
|
104
|
+
54501,Epinotia,854559
|
105
|
+
185131,Epinotia albangulana,388400
|
106
|
+
579472,Epinotia albicapitana,579454
|
107
|
+
262585,Epinotia arctostaphylana,718670
|
108
|
+
452559,Epinotia castaneana,630199
|
109
|
+
725616,Epinotia cercocarpana,529759
|
110
|
+
262473,Epinotia emarginana,718673
|
111
|
+
378960,Epinotia hopkinsana,710406
|
112
|
+
241183,Epinotia kasloana,878601
|
113
|
+
723150,Epinotia nigralbana,546896
|
114
|
+
917995,Epinotia sagittana,917977
|
115
|
+
828578,Epinotia signiferana,1001290
|
116
|
+
472765,Epinotia subplicana,861801
|
117
|
+
261436,Epinotia subviridis,455558
|
118
|
+
481897,Epinotia terracoctana,458366
|
119
|
+
```
|
data/Rakefile
ADDED
data/bin/bugguide
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'commander/import'
|
3
|
+
require 'bugguide'
|
4
|
+
|
5
|
+
program :name, 'BugGuide'
|
6
|
+
program :version, '1.0.0'
|
7
|
+
program :description, 'Command-line tool for scraping data from http://bugguide.net'
|
8
|
+
|
9
|
+
command :checklist do |c|
|
10
|
+
c.syntax = "bugguide checklist [taxon name]"
|
11
|
+
c.option "-s", '--state STATE', String, '2-letter code of a US state or CA province'
|
12
|
+
c.option "-c", '--county COUNTY', String, "US county, full name, e.g. Alameda, Middlesex."
|
13
|
+
c.option "-f", '--format table|csv', String, "Output format"
|
14
|
+
c.action do |args, opts|
|
15
|
+
taxa = BugGuide::Taxon.search(args[0])
|
16
|
+
if taxa.size == 0
|
17
|
+
abort "No matching taxa found"
|
18
|
+
elsif taxa.size == 1
|
19
|
+
taxon = taxa.first
|
20
|
+
else
|
21
|
+
puts "Multiple taxon results, choose one (hit ENTER to choose the first)"
|
22
|
+
taxa.each_with_index do |taxon, i|
|
23
|
+
say "[#{i}] #{taxon.common_name} (#{taxon.scientific_name}) http://bugguide.net/node/view/#{taxon.id}"
|
24
|
+
end
|
25
|
+
choice = ask "Choice: "
|
26
|
+
taxon = taxa[choice.to_i]
|
27
|
+
taxon = taxa.first if choice.blank?
|
28
|
+
end
|
29
|
+
abort "You must choose a taxon" if taxon.blank?
|
30
|
+
photos = begin
|
31
|
+
BugGuide::Photo.search(
|
32
|
+
taxon: taxon.id,
|
33
|
+
user: opts.user,
|
34
|
+
state: opts.state,
|
35
|
+
county: opts.county
|
36
|
+
)
|
37
|
+
rescue BugGuide::TooManyResultsException
|
38
|
+
abort "Too many results. Try refining your query."
|
39
|
+
end
|
40
|
+
longest_name = ''
|
41
|
+
unique_photos = photos.uniq{|p|
|
42
|
+
longest_name = p.taxon.scientific_name if p.taxon.scientific_name.size > longest_name.size
|
43
|
+
p.taxon.scientific_name
|
44
|
+
}.sort{|a,b| a.taxon.scientific_name <=> b.taxon.scientific_name}
|
45
|
+
if opts.format == 'csv'
|
46
|
+
puts ["TAXON ID", "NAME", "PHOTO ID"].join(',')
|
47
|
+
unique_photos.each do |p|
|
48
|
+
puts [p.taxon.id, p.taxon.scientific_name, p.id].join(',')
|
49
|
+
end
|
50
|
+
else
|
51
|
+
puts "Found #{photos.size} photos of #{unique_photos.size} taxa:"
|
52
|
+
puts
|
53
|
+
puts ["TAXON ID".ljust(10), "NAME".ljust(longest_name.size+2), "PHOTO ID"].join(' ')
|
54
|
+
unique_photos.each do |p|
|
55
|
+
puts [p.taxon.id.to_s.ljust(10), p.taxon.scientific_name.ljust(longest_name.size+2), p.id].join(' ')
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "bugguide"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/bugguide.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'bugguide/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "bugguide"
|
8
|
+
spec.version = BugGuide::VERSION
|
9
|
+
spec.authors = ["Ken-ichi Ueda"]
|
10
|
+
spec.email = ["kenichi.ueda@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "Library to read North American insect data from BugGuide.net."
|
13
|
+
spec.description = %q{
|
14
|
+
BugGuide.net is a website for sharing insect photos from North America.
|
15
|
+
Over the years it as acrued many experts and a great deal of excellent
|
16
|
+
data, but not an API. This gem is little more than a scraper for
|
17
|
+
conveniently extracting data from the website.
|
18
|
+
}
|
19
|
+
spec.homepage = "https://github.com/kueda/bugguide"
|
20
|
+
spec.license = "MIT"
|
21
|
+
|
22
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
23
|
+
spec.bindir = "bin"
|
24
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
25
|
+
spec.require_paths = ["lib"]
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
29
|
+
spec.add_development_dependency "minitest", "~> 0"
|
30
|
+
spec.add_development_dependency "m", "~> 1.3"
|
31
|
+
spec.add_runtime_dependency 'nokogiri', "~> 0"
|
32
|
+
spec.add_runtime_dependency 'activesupport', "~> 0"
|
33
|
+
spec.add_runtime_dependency 'commander', "~> 0"
|
34
|
+
end
|
data/lib/bugguide.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'active_support/core_ext/array/grouping'
|
5
|
+
require 'active_support/core_ext/object/blank'
|
6
|
+
require 'active_support/core_ext/hash/keys'
|
7
|
+
|
8
|
+
require 'bugguide/version'
|
9
|
+
require 'bugguide/taxon'
|
10
|
+
require 'bugguide/photo'
|
11
|
+
require 'bugguide/exceptions'
|
12
|
+
|
13
|
+
module BugGuide
|
14
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
#
|
3
|
+
# Represents a single photo on BugGuide. Several methods are intended for
|
4
|
+
# compatability with the DarwinCore SimpleMultimedia extention (http://rs.gbif.org/terms/1.0/Multimedia).
|
5
|
+
#
|
6
|
+
class BugGuide::Photo
|
7
|
+
attr_accessor :thumbnail_url, :id, :url, :title, :date, :state, :county, :city_location, :taxon
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
options.each do |k,v|
|
11
|
+
send("#{k}=", v)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
# Search for photos. This method depends on BugGuide's Advanced Search
|
17
|
+
# functionality, which will bail if your search returns too much results, so
|
18
|
+
# this will throw an exception in that case that you should be prepared to
|
19
|
+
# deal with.
|
20
|
+
#
|
21
|
+
# @param options [Hash] You need to choose enough to filter the results adequately
|
22
|
+
# user: user ID
|
23
|
+
# taxon: ancestor taxon ID
|
24
|
+
# description: free text search in the description of the photo
|
25
|
+
# month: numerical month of the year, 1-12
|
26
|
+
# location: two-letter US state or Canadian province code, e.g. AK, BC, WA, OR,
|
27
|
+
# CA, etc. You can specify multiple states as an array of these codes.
|
28
|
+
# county: County or region name. No controlled voabulary, so you can use "Madera"
|
29
|
+
# but also "Sierra"
|
30
|
+
# city_location: City or location name. Like county it's free text.
|
31
|
+
# adult: Boolean
|
32
|
+
# immature: Boolean
|
33
|
+
# male: Boolean
|
34
|
+
# female: Boolean
|
35
|
+
# representative: Boolean. It's not clear to me what this means on BugGuide.
|
36
|
+
def self.search(options = {})
|
37
|
+
raise BugGuide::NoParametersException if options.blank?
|
38
|
+
url = "http://bugguide.net/adv_search/bgsearch.php?"
|
39
|
+
options.stringify_keys!
|
40
|
+
headers = options[:headers] || {}
|
41
|
+
params = []
|
42
|
+
%w(user taxon description county city_location adult immature male female representative).each do |param|
|
43
|
+
next if options[param] != false && options[param].blank?
|
44
|
+
params << if options[param] == true || options[param] == false
|
45
|
+
"#{param}=#{options[param] ? 1 : 0}"
|
46
|
+
else
|
47
|
+
"#{param}=#{options[param]}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
states = [options['state'], options['location']].flatten.compact.uniq
|
51
|
+
params << states.map{|s| "location[]=#{s}"} unless states.blank?
|
52
|
+
params << [options['month']].flatten.map{|s| "month[]=#{s}"} unless options['month'].blank?
|
53
|
+
url += URI.escape( params.join('&') )
|
54
|
+
photos = []
|
55
|
+
# puts "fetching #{url}"
|
56
|
+
open(url, headers) do |response|
|
57
|
+
html = Nokogiri::HTML(response.read.encode('UTF-8'))
|
58
|
+
if html.to_s =~ /Too many results \(\d+\)/
|
59
|
+
raise BugGuide::TooManyResultsException
|
60
|
+
end
|
61
|
+
names = []
|
62
|
+
html.css('body > table tr').each do |tr|
|
63
|
+
next if tr.css('th').size > 0
|
64
|
+
photos << BugGuide::Photo.new(
|
65
|
+
thumbnail_url: tr.css('img')[0][:src],
|
66
|
+
id: tr.children[1].text.to_i,
|
67
|
+
url: tr.children[1].css('a')[0][:href],
|
68
|
+
title: tr.children[2].text,
|
69
|
+
date: tr.children[3].text,
|
70
|
+
state: tr.children[4].text,
|
71
|
+
county: tr.children[5].text,
|
72
|
+
city_location: tr.children[6].text,
|
73
|
+
taxon: BugGuide::Taxon.new(
|
74
|
+
name: tr.children[7].text,
|
75
|
+
id: tr.children[7].css('a')[0][:href].to_s[/\d+$/, 0],
|
76
|
+
url: tr.children[7].css('a')[0][:href]
|
77
|
+
)
|
78
|
+
)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
photos
|
82
|
+
end
|
83
|
+
|
84
|
+
# DarwinCore Simple Multimedia mapping
|
85
|
+
# http://rs.gbif.org/terms/1.0/Multimedia
|
86
|
+
|
87
|
+
# DarwinCore Simple Multimedia identifier
|
88
|
+
alias_method :identifier, :id
|
89
|
+
|
90
|
+
# DarwinCore Simple Multimedia references (basically just a URL)
|
91
|
+
alias_method :references, :url
|
92
|
+
|
93
|
+
# DarwinCore Simple Multimedia date created
|
94
|
+
alias_method :created, :date
|
95
|
+
|
96
|
+
# DarwinCore Simple Multimedia media type
|
97
|
+
def type
|
98
|
+
"StillImage"
|
99
|
+
end
|
100
|
+
|
101
|
+
# DarwinCore Simple Multimedia media format, aka MIME type
|
102
|
+
def format
|
103
|
+
"image/jpeg"
|
104
|
+
end
|
105
|
+
|
106
|
+
# DarwinCore Simple Multimedia publisher, always BugGuide in this case
|
107
|
+
def publisher
|
108
|
+
"BugGuide"
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
#
|
3
|
+
# Represents a single taxon on BugGuide
|
4
|
+
#
|
5
|
+
# One thing to keep in mind is that this will generally be instantiated from
|
6
|
+
# search results, so for certain methods, like `ancestry`, it will need to
|
7
|
+
# perform an additional request to retrieve the relevant data.
|
8
|
+
#
|
9
|
+
# Several methods are intended for
|
10
|
+
# compatability with the DarwinCore SimpleMultimedia extention (http://rs.gbif.org/terms/1.0/Multimedia).
|
11
|
+
#
|
12
|
+
class BugGuide::Taxon
|
13
|
+
NAME_PATTERN = /[\w\s\-\'\.]+/
|
14
|
+
attr_accessor :id, :name, :scientific_name, :common_name, :url
|
15
|
+
|
16
|
+
def initialize(options = {})
|
17
|
+
options.each do |k,v|
|
18
|
+
send("#{k}=", v)
|
19
|
+
end
|
20
|
+
self.url ||= "http://bugguide.net/node/view/#{id}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def name=(new_name)
|
24
|
+
if new_name =~ /subgenus/i
|
25
|
+
self.scientific_name ||= new_name.gsub(/subgenus/i, '')[/[^\(]+/, 0]
|
26
|
+
elsif matches = new_name.match(/group .*\((#{NAME_PATTERN})\)/i)
|
27
|
+
self.scientific_name ||= matches[1]
|
28
|
+
elsif matches = new_name.match(/(#{NAME_PATTERN}) \((#{NAME_PATTERN})\)/)
|
29
|
+
self.scientific_name ||= matches[1]
|
30
|
+
self.common_name ||= matches[2]
|
31
|
+
else
|
32
|
+
self.scientific_name ||= new_name[/[^\(]+/, 0]
|
33
|
+
end
|
34
|
+
@name = new_name.strip if new_name
|
35
|
+
end
|
36
|
+
|
37
|
+
def scientific_name=(new_name)
|
38
|
+
@scientific_name = new_name.strip if new_name
|
39
|
+
end
|
40
|
+
|
41
|
+
def common_name=(new_name)
|
42
|
+
@common_name = new_name.strip if new_name
|
43
|
+
end
|
44
|
+
|
45
|
+
def rank=(new_rank)
|
46
|
+
@rank = new_rank.downcase
|
47
|
+
@rank = nil if @rank == 'no taxon'
|
48
|
+
end
|
49
|
+
|
50
|
+
# Taxonomic rank, e.g. kingdom, phylum, order, etc.
|
51
|
+
def rank
|
52
|
+
return @rank if @rank
|
53
|
+
@rank = taxonomy_html.css('.bgpage-roots a').last['title'].downcase
|
54
|
+
end
|
55
|
+
|
56
|
+
# All ancestor taxa of this taxon, or its classification if you prefer that terminology.
|
57
|
+
def ancestors
|
58
|
+
return @ancestors if @ancestors
|
59
|
+
@ancestors = []
|
60
|
+
nbsp = Nokogiri::HTML(" ").text
|
61
|
+
@ancestors = taxonomy_html.css('.bgpage-roots a').map do |a|
|
62
|
+
next unless a['href'] =~ /node\/view\/\d+\/tree/
|
63
|
+
t = BugGuide::Taxon.new(
|
64
|
+
id: a['href'].split('/')[-2],
|
65
|
+
name: a.text.gsub(nbsp, ' '),
|
66
|
+
url: a['href'],
|
67
|
+
rank: a['title']
|
68
|
+
)
|
69
|
+
if name_matches = t.name.match(/(#{NAME_PATTERN})\s+\((#{NAME_PATTERN})\)/)
|
70
|
+
t.common_name = name_matches[1]
|
71
|
+
t.scientific_name = name_matches[2]
|
72
|
+
elsif name_matches = t.name.match(/(#{NAME_PATTERN})\s+\-\s+(#{NAME_PATTERN})/)
|
73
|
+
t.common_name = name_matches[2]
|
74
|
+
t.scientific_name = name_matches[1]
|
75
|
+
end
|
76
|
+
next if t.scientific_name == scientific_name
|
77
|
+
t
|
78
|
+
end.compact
|
79
|
+
end
|
80
|
+
|
81
|
+
# HTML source of the taxon's taxonomy page on BugGuide as a Nokogiri document
|
82
|
+
def taxonomy_html
|
83
|
+
return @taxonomy_html if @taxonomy_html
|
84
|
+
open("http://bugguide.net/node/view/#{id}/tree") do |response|
|
85
|
+
@taxonomy_html = Nokogiri::HTML(response.read)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Search for taxa, returns matching BugGuide::Taxon instances
|
90
|
+
def self.search(name, options = {})
|
91
|
+
# For reference, http://bugguide.net/adv_search/taxon.php?q=Sphecidae returns
|
92
|
+
# 117327||Apoid Wasps (Apoidea)- traditional Sphecidae|2302 135|Sphecidae|Thread-waisted Wasps|2700
|
93
|
+
url = "http://bugguide.net/adv_search/taxon.php?q=#{URI.escape(name)}"
|
94
|
+
headers = options[:headers] || {}
|
95
|
+
f = open(url)
|
96
|
+
taxa = []
|
97
|
+
open(url, headers) do |f|
|
98
|
+
f.read.split("\n").each do |row|
|
99
|
+
row = row.split('|').compact.map(&:strip)
|
100
|
+
taxa << BugGuide::Taxon.new(
|
101
|
+
id: row[0],
|
102
|
+
name: row[1],
|
103
|
+
scientific_name: row[1],
|
104
|
+
common_name: row[2]
|
105
|
+
)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
taxa
|
109
|
+
end
|
110
|
+
|
111
|
+
# Find a single BugGuide taxon given its node ID
|
112
|
+
def self.find(id)
|
113
|
+
taxon = BugGuide::Taxon.new(id: id)
|
114
|
+
taxon.name = taxon.taxonomy_html.css('.node-title h1').text
|
115
|
+
taxon.scientific_name = taxon.taxonomy_html.css('.node-title i').text
|
116
|
+
taxon.common_name = taxon.taxonomy_html.css('.node-title').text.split('-').last
|
117
|
+
taxon
|
118
|
+
end
|
119
|
+
|
120
|
+
# DarwinCore mapping
|
121
|
+
|
122
|
+
# DarwinCore-compliant taxon ID
|
123
|
+
alias_method :taxonID, :id
|
124
|
+
|
125
|
+
# DarwinCore-compliant scientific name
|
126
|
+
alias_method :scientificName, :scientific_name
|
127
|
+
|
128
|
+
# DarwinCore-compliant common name
|
129
|
+
alias_method :vernacularName, :common_name
|
130
|
+
|
131
|
+
# DarwinCore-compliant rank
|
132
|
+
alias_method :taxonRank, :rank
|
133
|
+
|
134
|
+
# DarwinCore-compliant taxonomic classification
|
135
|
+
def higherClassification
|
136
|
+
ancestors.map(&:scientific_name).join(' | ')
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
metadata
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bugguide
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ken-ichi Ueda
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.10'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.10'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: m
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: nokogiri
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: commander
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: "\n BugGuide.net is a website for sharing insect photos from North
|
112
|
+
America.\n Over the years it as acrued many experts and a great deal of excellent\n
|
113
|
+
\ data, but not an API. This gem is little more than a scraper for\n conveniently
|
114
|
+
extracting data from the website.\n "
|
115
|
+
email:
|
116
|
+
- kenichi.ueda@gmail.com
|
117
|
+
executables:
|
118
|
+
- bugguide
|
119
|
+
- console
|
120
|
+
- setup
|
121
|
+
extensions: []
|
122
|
+
extra_rdoc_files: []
|
123
|
+
files:
|
124
|
+
- ".gitignore"
|
125
|
+
- ".travis.yml"
|
126
|
+
- CODE_OF_CONDUCT.md
|
127
|
+
- Gemfile
|
128
|
+
- LICENSE.txt
|
129
|
+
- README.md
|
130
|
+
- Rakefile
|
131
|
+
- bin/bugguide
|
132
|
+
- bin/console
|
133
|
+
- bin/setup
|
134
|
+
- bugguide.gemspec
|
135
|
+
- lib/bugguide.rb
|
136
|
+
- lib/bugguide/exceptions.rb
|
137
|
+
- lib/bugguide/photo.rb
|
138
|
+
- lib/bugguide/taxon.rb
|
139
|
+
- lib/bugguide/version.rb
|
140
|
+
homepage: https://github.com/kueda/bugguide
|
141
|
+
licenses:
|
142
|
+
- MIT
|
143
|
+
metadata: {}
|
144
|
+
post_install_message:
|
145
|
+
rdoc_options: []
|
146
|
+
require_paths:
|
147
|
+
- lib
|
148
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
|
+
requirements:
|
155
|
+
- - ">="
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '0'
|
158
|
+
requirements: []
|
159
|
+
rubyforge_project:
|
160
|
+
rubygems_version: 2.4.6
|
161
|
+
signing_key:
|
162
|
+
specification_version: 4
|
163
|
+
summary: Library to read North American insect data from BugGuide.net.
|
164
|
+
test_files: []
|
165
|
+
has_rdoc:
|