taxonifi 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +24 -7
- data/README.rdoc +5 -6
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/assessor/row_assessor.rb +25 -18
- data/lib/export/format/base.rb +96 -1
- data/lib/export/format/obo_nomenclature.rb +71 -0
- data/lib/export/format/prolog.rb +59 -0
- data/lib/export/format/species_file.rb +303 -193
- data/lib/lumper/clump.rb +112 -0
- data/lib/lumper/lumper.rb +71 -45
- data/lib/lumper/lumps/parent_child_name_collection.rb +79 -15
- data/lib/models/author_year.rb +1 -2
- data/lib/models/base.rb +56 -51
- data/lib/models/collection.rb +16 -1
- data/lib/models/name.rb +56 -15
- data/lib/models/name_collection.rb +70 -19
- data/lib/models/ref.rb +17 -0
- data/lib/models/ref_collection.rb +2 -1
- data/lib/models/shared_class_methods.rb +29 -0
- data/lib/models/species_name.rb +14 -12
- data/lib/splitter/parser.rb +1 -2
- data/lib/splitter/tokens.rb +1 -1
- data/lib/taxonifi.rb +12 -0
- data/lib/utils/array.rb +17 -0
- data/lib/utils/hash.rb +17 -0
- data/taxonifi.gemspec +116 -0
- data/test/file_fixtures/Fossil.csv +11 -0
- data/test/file_fixtures/Lygaeoidea.csv +1 -1
- data/test/file_fixtures/names.csv +1 -0
- data/test/helper.rb +14 -0
- data/test/test_export_prolog.rb +14 -0
- data/test/test_exporter.rb +23 -0
- data/test/test_lumper_clump.rb +75 -0
- data/test/test_lumper_names.rb +67 -9
- data/test/test_lumper_parent_child_name_collection.rb +47 -3
- data/test/test_lumper_refs.rb +22 -7
- data/test/test_obo_nomenclature.rb +14 -0
- data/test/test_parser.rb +4 -2
- data/test/test_splitter_tokens.rb +9 -0
- data/test/test_taxonifi_accessor.rb +21 -15
- data/test/test_taxonifi_base.rb +25 -0
- data/test/test_taxonifi_name.rb +41 -4
- data/test/test_taxonifi_name_collection.rb +54 -17
- data/test/test_taxonifi_species_name.rb +1 -1
- metadata +34 -5
data/lib/models/collection.rb
CHANGED
@@ -2,12 +2,27 @@ module Taxonifi
|
|
2
2
|
class CollectionError < StandardError; end
|
3
3
|
module Model
|
4
4
|
|
5
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'shared_class_methods'))
|
6
|
+
|
5
7
|
# The base class that all collection classes are derived from.
|
6
8
|
class Collection
|
9
|
+
include Taxonifi::Model::SharedClassMethods
|
10
|
+
|
11
|
+
# A Hash indexing object by id like {Integer => SomeBaseSubclass}
|
7
12
|
attr_accessor :by_id_index
|
13
|
+
|
14
|
+
# A Integer representing the current free id to be used for an accessioned a collection object. Not used in non-indexed collections.
|
8
15
|
attr_accessor :current_free_id
|
16
|
+
|
17
|
+
# An Array, the collection.
|
9
18
|
attr_accessor :collection
|
10
19
|
|
20
|
+
# Returns an array of (downcased) strings representing the prefixes of the Collection based subclasses, like
|
21
|
+
# ['name', 'geog', 'ref'] etc.
|
22
|
+
def self.subclass_prefixes
|
23
|
+
self.subclasses.collect{|c| c.to_s.split("::").last}.collect{|n| n.gsub(/Collection/, "").downcase}
|
24
|
+
end
|
25
|
+
|
11
26
|
def initialize(options = {})
|
12
27
|
opts = {
|
13
28
|
:initial_id => 0
|
@@ -15,6 +30,7 @@ module Taxonifi
|
|
15
30
|
raise CollectionError, "Can not start with an initial_id of nil." if opts[:initial_id].nil?
|
16
31
|
@collection = []
|
17
32
|
@by_id_index = {}
|
33
|
+
# @by_row_index = {}
|
18
34
|
@current_free_id = opts[:initial_id]
|
19
35
|
true
|
20
36
|
end
|
@@ -50,7 +66,6 @@ module Taxonifi
|
|
50
66
|
return obj
|
51
67
|
end
|
52
68
|
|
53
|
-
|
54
69
|
# Return an array of ancestor (parent) ids.
|
55
70
|
# TODO: deprecate?
|
56
71
|
# More or less identical to Taxonifi::Name.ancestor_ids except
|
data/lib/models/name.rb
CHANGED
@@ -6,25 +6,27 @@ module Taxonifi
|
|
6
6
|
|
7
7
|
# String
|
8
8
|
attr_accessor :name
|
9
|
+
|
9
10
|
# String
|
10
11
|
attr_accessor :rank
|
12
|
+
|
13
|
+
# String
|
14
|
+
attr_accessor :author
|
15
|
+
|
11
16
|
# String, authors as originally read
|
12
17
|
attr_accessor :year
|
18
|
+
|
13
19
|
# Boolean, true if parens present (i.e. _not_ in original combination)
|
14
20
|
attr_accessor :parens
|
21
|
+
|
15
22
|
# A Taxonifi::Model::Name
|
16
23
|
attr_accessor :parent
|
17
|
-
|
18
|
-
|
19
|
-
# General purpose relationship, typically used to indicate synonymy. A Taxonifi::Model::Name
|
24
|
+
|
25
|
+
# A Taxonifi::Model::Name General purpose relationship, typically used to indicate synonymy.
|
20
26
|
attr_accessor :related_name
|
21
27
|
|
22
28
|
# Array, contains properties assignable in Taxonifi::Model::Name#new()
|
23
|
-
ATTRIBUTES = [:name, :rank, :year, :parens, :parent, :author, :related_name]
|
24
|
-
|
25
|
-
ATTRIBUTES.each do |a|
|
26
|
-
attr_accessor a
|
27
|
-
end
|
29
|
+
@@ATTRIBUTES = [:name, :rank, :year, :parens, :parent, :author, :related_name]
|
28
30
|
|
29
31
|
# optionally parsed/index
|
30
32
|
attr_accessor :authors
|
@@ -33,11 +35,13 @@ module Taxonifi
|
|
33
35
|
attr_accessor :author_year_index
|
34
36
|
|
35
37
|
def initialize(options = {})
|
38
|
+
super
|
36
39
|
opts = {
|
37
40
|
id: nil
|
38
41
|
}.merge!(options)
|
42
|
+
|
39
43
|
@parent = nil
|
40
|
-
build(ATTRIBUTES, opts)
|
44
|
+
build(@@ATTRIBUTES, opts)
|
41
45
|
add_author_year(opts[:author_year]) if !opts[:author_year].nil? && opts[:author_year].size > 0
|
42
46
|
@parent = opts[:parent] if (!opts[:parent].nil? && opts[:parent].class == Taxonifi::Model::Name)
|
43
47
|
@id = opts[:id] # if !opts[:id].nil? && opts[:id].size != 0
|
@@ -67,6 +71,22 @@ module Taxonifi
|
|
67
71
|
@rank = r
|
68
72
|
end
|
69
73
|
|
74
|
+
# Return a string indicating at what level this name
|
75
|
+
# is indexed within a NameCollection.
|
76
|
+
# TODO: Family group extension; ICZN specific
|
77
|
+
def index_rank
|
78
|
+
case rank
|
79
|
+
when 'species', 'subspecies'
|
80
|
+
'species_group'
|
81
|
+
when 'genus', 'subgenus'
|
82
|
+
'genus_group'
|
83
|
+
when nil, ""
|
84
|
+
'unknown'
|
85
|
+
else
|
86
|
+
rank.downcase
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
70
90
|
# Set the parent (a Taxonifi::Model::Name)
|
71
91
|
def parent=(parent)
|
72
92
|
if @rank.nil?
|
@@ -89,16 +109,30 @@ module Taxonifi
|
|
89
109
|
# TODO: rename to reflect parens
|
90
110
|
def author_year
|
91
111
|
au = author_year_string
|
92
|
-
if
|
93
|
-
|
94
|
-
else
|
95
|
-
au.size == 0 ? nil : au
|
96
|
-
end
|
112
|
+
return nil if au.nil?
|
113
|
+
(self.parens == true) ? "(#{au})" : au
|
97
114
|
end
|
98
115
|
|
99
116
|
# Return the author year string.
|
100
117
|
def author_year_string
|
101
118
|
au = [self.author, self.year].compact.join(", ")
|
119
|
+
return nil if au.size == 0
|
120
|
+
au
|
121
|
+
end
|
122
|
+
|
123
|
+
# Return a Taxonifi::Model::Name representing the finest genus_group_parent.
|
124
|
+
# TODO: ICZN specific(?)
|
125
|
+
def genus_group_parent
|
126
|
+
[ parent_at_rank('subgenus'), parent_at_rank('genus')].compact.first
|
127
|
+
end
|
128
|
+
|
129
|
+
# Returns just the name and author year, no parens, no parents.
|
130
|
+
# Like:
|
131
|
+
# foo Smith, 1927
|
132
|
+
# Foo Smith, 1927
|
133
|
+
# Fooidae
|
134
|
+
def name_author_year_string
|
135
|
+
[name, author_year_string].compact.join(" ")
|
102
136
|
end
|
103
137
|
|
104
138
|
# Return the name of a parent at a given rank.
|
@@ -139,7 +173,7 @@ module Taxonifi
|
|
139
173
|
def nomenclator_name
|
140
174
|
case @rank
|
141
175
|
when 'species', 'subspecies'
|
142
|
-
[parent_name_at_rank('genus'), (parent_name_at_rank('subgenus') ? "({parent_name_at_rank('subgenus')})" : nil), parent_name_at_rank('species'),
|
176
|
+
[parent_name_at_rank('genus'), (parent_name_at_rank('subgenus') ? "(#{parent_name_at_rank('subgenus')})" : nil), parent_name_at_rank('species'), parent_name_at_rank('subspecies')].compact.join(" ")
|
143
177
|
when 'subgenus'
|
144
178
|
[parent_name_at_rank('genus'), "(#{@name})"].compact.join(" ")
|
145
179
|
else
|
@@ -150,6 +184,7 @@ module Taxonifi
|
|
150
184
|
# Return a dashed "vector" of ids representing the ancestor parent closure, like:
|
151
185
|
# 0-1-14-29g-45s-99-100.
|
152
186
|
# Postfixed g means "genus", postifed s means "subgenus. As per SpecieFile usage.
|
187
|
+
# TODO: !! malformed because the valid name is not injected. Note that this can be generated internally post import.
|
153
188
|
def parent_ids_sf_style
|
154
189
|
ids = []
|
155
190
|
(ancestors.push self).each do |a|
|
@@ -176,6 +211,12 @@ module Taxonifi
|
|
176
211
|
@author_year_index = Taxonifi::Model::AuthorYear.new(people: @authors, year: @year).compact_index
|
177
212
|
end
|
178
213
|
|
214
|
+
# Return a String of Prolog rules representing this Name
|
215
|
+
def prologify
|
216
|
+
"false"
|
217
|
+
|
218
|
+
end
|
219
|
+
|
179
220
|
end
|
180
221
|
|
181
222
|
# ICZN specific sublassing of a taxonomic name.
|
@@ -5,16 +5,25 @@ module Taxonifi
|
|
5
5
|
# A collection of taxonomic names.
|
6
6
|
class NameCollection < Taxonifi::Model::Collection
|
7
7
|
|
8
|
+
# A by-name (string index)
|
8
9
|
attr_accessor :by_name_index
|
10
|
+
|
11
|
+
# A Taxonifi::Model::RefCollection, optionally generated from Author/Year strings
|
9
12
|
attr_accessor :ref_collection
|
10
13
|
|
14
|
+
# An optional collection of existing combinations of species names, as represented by
|
15
|
+
# individual arrays of Taxonifi::Model::Names. Note you can not use a Taxonifi::Model::SpeciesName
|
16
|
+
# for this purpose because getting/setting names therin will affect other combinations
|
17
|
+
attr_accessor :combinations
|
18
|
+
|
11
19
|
def initialize(options = {})
|
12
20
|
super
|
13
|
-
@
|
14
|
-
|
15
|
-
|
21
|
+
@by_name_index = {'genus_group' => {}, 'species_group' => {} } # "foo => [1,2,3]"
|
22
|
+
Taxonifi::RANKS[0..-5].inject(@by_name_index){|hsh, v| hsh.merge!(v => {})} # Lumping species and genus group names
|
23
|
+
|
16
24
|
@by_name_index['unknown'] = {} # unranked names get dumped in here
|
17
25
|
@ref_collection = nil
|
26
|
+
@combinations = []
|
18
27
|
true
|
19
28
|
end
|
20
29
|
|
@@ -33,7 +42,7 @@ module Taxonifi
|
|
33
42
|
RANKS[highest - 1]
|
34
43
|
end
|
35
44
|
|
36
|
-
#
|
45
|
+
# Returns an Array of the names objects in the collection at a rank.
|
37
46
|
# TODO: Should index this on add_object
|
38
47
|
def names_at_rank(rank)
|
39
48
|
raise if !RANKS.include?(rank)
|
@@ -45,18 +54,27 @@ module Taxonifi
|
|
45
54
|
end
|
46
55
|
|
47
56
|
# Returns id of matching existing name
|
48
|
-
# or false if there
|
49
|
-
#
|
57
|
+
# or false if there is no match.
|
58
|
+
# !! assumes parent is set
|
59
|
+
# Matches against name, year, and all parents (by id).
|
60
|
+
#
|
61
|
+
# !! nominotypic names are considered to be the same (species and generic). See
|
62
|
+
# @combinations to instantiate these
|
63
|
+
#
|
64
|
+
# TODO: This is likely already overly ICZN flavoured.
|
50
65
|
def name_exists?(name = Taxonifi::Model::Name)
|
51
|
-
#
|
52
|
-
rank = name.
|
53
|
-
|
54
|
-
if by_name_index[rank][name.
|
55
|
-
|
56
|
-
|
66
|
+
# species/genus group names are indexed for indexing purposes
|
67
|
+
rank = name.index_rank
|
68
|
+
|
69
|
+
if by_name_index[rank][name.name_author_year_string]
|
70
|
+
by_name_index[rank][name.name_author_year_string].each do |id|
|
71
|
+
full_parent_vector = parent_id_vector(name.parent.id)
|
72
|
+
return id if full_parent_vector == parent_id_vector(id) # this hits species/genus group names
|
73
|
+
|
57
74
|
vector = parent_id_vector(id)
|
58
|
-
vector.
|
59
|
-
|
75
|
+
next if vector.last != name.parent.id # can stop looking at this possiblity
|
76
|
+
vector.pop # compare just parents
|
77
|
+
if vector == full_parent_vector
|
60
78
|
exists = true
|
61
79
|
return id
|
62
80
|
end
|
@@ -108,6 +126,12 @@ module Taxonifi
|
|
108
126
|
end
|
109
127
|
end
|
110
128
|
|
129
|
+
# Return an array of the names in the collection
|
130
|
+
def name_string_array
|
131
|
+
collection.collect{|n| n.display_name}
|
132
|
+
end
|
133
|
+
|
134
|
+
|
111
135
|
# Take the author/years of these names and generate a reference collection.
|
112
136
|
# Start the ids assigned to the references with initial_id.
|
113
137
|
def generate_ref_collection(initial_id = 0)
|
@@ -131,19 +155,46 @@ module Taxonifi
|
|
131
155
|
def ref_collection=(ref_collection)
|
132
156
|
@ref_collection = ref_collection if ref_collection.class == Taxonifi::Model::RefCollection
|
133
157
|
end
|
158
|
+
|
159
|
+
# Return an Array of "homonyms" within the rank
|
160
|
+
# provided. Useful for finding missmatched upper heirarchies,
|
161
|
+
# if nc is a name_collection:
|
162
|
+
#
|
163
|
+
# homonyms = nc.homonyms_at_rank('genus')
|
164
|
+
# homonyms.keys.sort.each do |n|
|
165
|
+
# puts "#{n} (#{homonyms[n].size}) :"
|
166
|
+
# homonyms[n].each do |p|
|
167
|
+
# puts " #{p.ancestors.collect{|i| i.name}.join(",")}"
|
168
|
+
# end
|
169
|
+
# end
|
170
|
+
#
|
171
|
+
def homonyms_at_rank(rank)
|
172
|
+
raise if !RANKS.include?(rank)
|
173
|
+
uniques = {}
|
174
|
+
names_at_rank(rank).each do |n|
|
175
|
+
uniques.merge!(n.name => []) if !uniques[n.name]
|
176
|
+
uniques[n.name].push n
|
177
|
+
end
|
178
|
+
uniques.delete_if{|k| uniques[k].size < 2}
|
179
|
+
uniques
|
180
|
+
end
|
134
181
|
|
135
182
|
protected
|
136
183
|
|
137
184
|
# Index the object by name into the
|
138
185
|
# @by_name_index variable (this looks like:
|
139
186
|
# {"Foo bar" => [1,2,93]})
|
140
|
-
|
141
|
-
|
187
|
+
# Pass a Taxonifi::Name
|
188
|
+
def index_by_name(name)
|
189
|
+
rank = name.rank
|
190
|
+
rank = 'species_group' if %w{species subspecies variety}.include?(rank)
|
191
|
+
rank = 'genus_group' if %w{genus subgenus}.include?(rank)
|
142
192
|
rank ||= 'unknown'
|
143
|
-
by_name_index[rank][obj.name] ||= []
|
144
|
-
by_name_index[rank][obj.name].push obj.id
|
145
|
-
end
|
146
193
|
|
194
|
+
by_name_index[rank][name.name_author_year_string] ||= []
|
195
|
+
by_name_index[rank][name.name_author_year_string].push name.id
|
196
|
+
end
|
147
197
|
end
|
198
|
+
|
148
199
|
end
|
149
200
|
end
|
data/lib/models/ref.rb
CHANGED
@@ -48,6 +48,7 @@ module Taxonifi
|
|
48
48
|
|
49
49
|
# If :author_year is passed it is broken down into People + year.
|
50
50
|
def initialize(options = {})
|
51
|
+
super
|
51
52
|
opts = {
|
52
53
|
}.merge!(options)
|
53
54
|
@parent = nil
|
@@ -80,6 +81,22 @@ module Taxonifi
|
|
80
81
|
@author_year_index = Taxonifi::Model::AuthorYear.new(people: @authors, year: @year).compact_index
|
81
82
|
end
|
82
83
|
|
84
|
+
# Return a single String value representing the page
|
85
|
+
# data available for this reference.
|
86
|
+
def page_string
|
87
|
+
str = ''
|
88
|
+
if @pg_start.nil?
|
89
|
+
str = [@pages].compact.join
|
90
|
+
else
|
91
|
+
if @pg_end.nil?
|
92
|
+
str = [@pg_start, @pages].compact.join("; ")
|
93
|
+
else
|
94
|
+
str = ["#{@pg_start}-#{@pg_end}", @pages].compact.join("; ")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
str.strip
|
98
|
+
end
|
99
|
+
|
83
100
|
end
|
84
101
|
end
|
85
102
|
end
|
@@ -28,11 +28,12 @@ module Taxonifi
|
|
28
28
|
# The object at a given row.
|
29
29
|
# TODO: inherit from Collection?
|
30
30
|
def object_from_row(row_number)
|
31
|
+
return nil if row_number.nil?
|
31
32
|
@row_index[row_number]
|
32
33
|
end
|
33
34
|
|
34
35
|
# Incrementally (re-)assigns the id of every associated author (Person)
|
35
|
-
# This is only
|
36
|
+
# This is only useful if you assume every author is unique.
|
36
37
|
def enumerate_authors(initial_id = 0)
|
37
38
|
i = initial_id
|
38
39
|
collection.each do |r|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Taxonifi::Model::SharedClassMethods
|
2
|
+
def self.included(base)
|
3
|
+
base.class_eval do
|
4
|
+
|
5
|
+
# Return an array of the classes derived from the base class.
|
6
|
+
# TODO: DRY with collection code.
|
7
|
+
def self.subclasses
|
8
|
+
classes = []
|
9
|
+
ObjectSpace.each_object do |klass|
|
10
|
+
next unless Module === klass
|
11
|
+
classes << klass if self > klass
|
12
|
+
end
|
13
|
+
classes
|
14
|
+
end
|
15
|
+
|
16
|
+
# Determines identity base ONLY
|
17
|
+
# on attributes in ATTRIBUTES.
|
18
|
+
def identical?(obj)
|
19
|
+
raise Taxonifi::ModelError, "Objects are not comparible." if obj.class != self.class
|
20
|
+
self.class::ATTRIBUTES.each do |a|
|
21
|
+
next if a == :id # don't compare
|
22
|
+
return false if obj.send(a) != self.send(a)
|
23
|
+
end
|
24
|
+
return true
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/models/species_name.rb
CHANGED
@@ -65,20 +65,22 @@ module Taxonifi
|
|
65
65
|
end
|
66
66
|
|
67
67
|
# Return a string representation of the species name.
|
68
|
+
# Becuase we build parent relationships on setters
|
69
|
+
# this is the same as the last names display_name
|
68
70
|
def display_name
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
strs.push n.name
|
76
|
-
end
|
77
|
-
end
|
78
|
-
strs.push self.names.last.author_year
|
79
|
-
txt = strs.compact.join(" ")
|
80
|
-
txt
|
71
|
+
names.last.display_name
|
72
|
+
end
|
73
|
+
|
74
|
+
# Returns true if this combination contains a nominotypic subspecies name
|
75
|
+
def nominotypical_species
|
76
|
+
names.species && names.subspecies && (names.species.name == names.subspecies.name)
|
81
77
|
end
|
78
|
+
|
79
|
+
# Returns true if this combinations contains a nominotypic subgenus
|
80
|
+
def nominotypical_genus
|
81
|
+
names.genus && names.subgenus && (names.genus.name == names.subgenus.name)
|
82
|
+
end
|
83
|
+
|
82
84
|
end
|
83
85
|
end
|
84
86
|
end
|
data/lib/splitter/parser.rb
CHANGED
@@ -34,7 +34,6 @@ class Taxonifi::Splitter::Parser
|
|
34
34
|
t = @lexer.pop(Taxonifi::Splitter::Tokens::Quadrinomial)
|
35
35
|
ranks = %w{genus subgenus species subspecies}
|
36
36
|
names = {}
|
37
|
-
last_parent = nil
|
38
37
|
ranks.each do |r|
|
39
38
|
names.merge!(r: nil)
|
40
39
|
@builder.send("#{r}=", Taxonifi::Model::Name.new(:name => t.send(r), rank: r) ) if t.send(r)
|
@@ -44,7 +43,7 @@ class Taxonifi::Splitter::Parser
|
|
44
43
|
t = @lexer.pop(Taxonifi::Splitter::Tokens::AuthorYear)
|
45
44
|
@builder.names.last.author = t.authors
|
46
45
|
@builder.names.last.year = t.year
|
47
|
-
@builder.names.last.parens =
|
46
|
+
@builder.names.last.parens = t.parens
|
48
47
|
@builder.names.last.derive_authors_year
|
49
48
|
end
|
50
49
|
|
data/lib/splitter/tokens.rb
CHANGED
data/lib/taxonifi.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'csv'
|
2
|
+
require 'fileutils'
|
2
3
|
|
3
4
|
# Everything in Taxonifi is in here.
|
4
5
|
module Taxonifi
|
@@ -7,8 +8,12 @@ module Taxonifi
|
|
7
8
|
RANKS = %w{
|
8
9
|
kingdom
|
9
10
|
phylum
|
11
|
+
superclass
|
10
12
|
class
|
13
|
+
subclass
|
11
14
|
infraclass
|
15
|
+
cohort
|
16
|
+
superorder
|
12
17
|
order
|
13
18
|
suborder
|
14
19
|
infraorder
|
@@ -21,6 +26,7 @@ module Taxonifi
|
|
21
26
|
subgenus
|
22
27
|
species
|
23
28
|
subspecies
|
29
|
+
variety
|
24
30
|
}
|
25
31
|
|
26
32
|
|
@@ -33,4 +39,10 @@ module Taxonifi
|
|
33
39
|
require file
|
34
40
|
end
|
35
41
|
|
42
|
+
Dir.glob( File.expand_path(File.join(File.dirname(__FILE__), "utils/*.rb") )) do |file|
|
43
|
+
require file
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
36
48
|
end
|
data/lib/utils/array.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../taxonifi'))
|
2
|
+
|
3
|
+
# Generic Array methods
|
4
|
+
module Taxonifi::Utils
|
5
|
+
module Array
|
6
|
+
|
7
|
+
# Return an Array of length size of black Arrays
|
8
|
+
def self.build_array_of_empty_arrays(size)
|
9
|
+
a = []
|
10
|
+
(0..(size-1)).each do |i|
|
11
|
+
a[i] = []
|
12
|
+
end
|
13
|
+
a
|
14
|
+
end
|
15
|
+
|
16
|
+
end # end Taxonifi::Utils::Array Module
|
17
|
+
end
|
data/lib/utils/hash.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../taxonifi'))
|
2
|
+
|
3
|
+
# Generic Array methods
|
4
|
+
module Taxonifi::Utils
|
5
|
+
module Hash
|
6
|
+
|
7
|
+
# Return an Array of length size of black Arrays
|
8
|
+
def self.build_hash_of_hashes_with_keys(keys)
|
9
|
+
h = {}
|
10
|
+
keys.each do |k|
|
11
|
+
h[k] = {}
|
12
|
+
end
|
13
|
+
h
|
14
|
+
end
|
15
|
+
|
16
|
+
end # end Taxonifi::Utils::Array Module
|
17
|
+
end
|
data/taxonifi.gemspec
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "taxonifi"
|
8
|
+
s.version = "0.2.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["mjy"]
|
12
|
+
s.date = "2013-03-27"
|
13
|
+
s.description = "Taxonifi contains simple models and utilties of use in for parsing lists of taxonomic name (life) related metadata"
|
14
|
+
s.email = "diapriid@gmail.com"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
"Gemfile",
|
22
|
+
"Gemfile.lock",
|
23
|
+
"LICENSE.txt",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"VERSION",
|
27
|
+
"lib/assessor/assessor.rb",
|
28
|
+
"lib/assessor/base.rb",
|
29
|
+
"lib/assessor/row_assessor.rb",
|
30
|
+
"lib/export/export.rb",
|
31
|
+
"lib/export/format/base.rb",
|
32
|
+
"lib/export/format/obo_nomenclature.rb",
|
33
|
+
"lib/export/format/prolog.rb",
|
34
|
+
"lib/export/format/species_file.rb",
|
35
|
+
"lib/lumper/clump.rb",
|
36
|
+
"lib/lumper/lumper.rb",
|
37
|
+
"lib/lumper/lumps/parent_child_name_collection.rb",
|
38
|
+
"lib/models/author_year.rb",
|
39
|
+
"lib/models/base.rb",
|
40
|
+
"lib/models/collection.rb",
|
41
|
+
"lib/models/generic_object.rb",
|
42
|
+
"lib/models/geog.rb",
|
43
|
+
"lib/models/geog_collection.rb",
|
44
|
+
"lib/models/name.rb",
|
45
|
+
"lib/models/name_collection.rb",
|
46
|
+
"lib/models/person.rb",
|
47
|
+
"lib/models/ref.rb",
|
48
|
+
"lib/models/ref_collection.rb",
|
49
|
+
"lib/models/shared_class_methods.rb",
|
50
|
+
"lib/models/species_name.rb",
|
51
|
+
"lib/splitter/builder.rb",
|
52
|
+
"lib/splitter/lexer.rb",
|
53
|
+
"lib/splitter/parser.rb",
|
54
|
+
"lib/splitter/splitter.rb",
|
55
|
+
"lib/splitter/tokens.rb",
|
56
|
+
"lib/taxonifi.rb",
|
57
|
+
"lib/utils/array.rb",
|
58
|
+
"lib/utils/hash.rb",
|
59
|
+
"taxonifi.gemspec",
|
60
|
+
"test/file_fixtures/Fossil.csv",
|
61
|
+
"test/file_fixtures/Lygaeoidea.csv",
|
62
|
+
"test/file_fixtures/names.csv",
|
63
|
+
"test/helper.rb",
|
64
|
+
"test/test_export_prolog.rb",
|
65
|
+
"test/test_exporter.rb",
|
66
|
+
"test/test_lumper_clump.rb",
|
67
|
+
"test/test_lumper_geogs.rb",
|
68
|
+
"test/test_lumper_hierarchical_collection.rb",
|
69
|
+
"test/test_lumper_names.rb",
|
70
|
+
"test/test_lumper_parent_child_name_collection.rb",
|
71
|
+
"test/test_lumper_refs.rb",
|
72
|
+
"test/test_obo_nomenclature.rb",
|
73
|
+
"test/test_parser.rb",
|
74
|
+
"test/test_splitter.rb",
|
75
|
+
"test/test_splitter_tokens.rb",
|
76
|
+
"test/test_taxonifi.rb",
|
77
|
+
"test/test_taxonifi_accessor.rb",
|
78
|
+
"test/test_taxonifi_base.rb",
|
79
|
+
"test/test_taxonifi_geog.rb",
|
80
|
+
"test/test_taxonifi_name.rb",
|
81
|
+
"test/test_taxonifi_name_collection.rb",
|
82
|
+
"test/test_taxonifi_ref.rb",
|
83
|
+
"test/test_taxonifi_ref_collection.rb",
|
84
|
+
"test/test_taxonifi_species_name.rb"
|
85
|
+
]
|
86
|
+
s.homepage = "http://github.com/SpeciesFile/taxonifi"
|
87
|
+
s.licenses = ["MIT"]
|
88
|
+
s.require_paths = ["lib"]
|
89
|
+
s.rubygems_version = "1.8.25"
|
90
|
+
s.summary = "A general purpose framework for scripted handling of taxonomic names"
|
91
|
+
|
92
|
+
if s.respond_to? :specification_version then
|
93
|
+
s.specification_version = 3
|
94
|
+
|
95
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
96
|
+
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
97
|
+
s.add_development_dependency(%q<bundler>, ["> 1.0.0"])
|
98
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
99
|
+
s.add_development_dependency(%q<activerecord>, ["= 3.2.8"])
|
100
|
+
s.add_development_dependency(%q<debugger>, [">= 0"])
|
101
|
+
else
|
102
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
103
|
+
s.add_dependency(%q<bundler>, ["> 1.0.0"])
|
104
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
105
|
+
s.add_dependency(%q<activerecord>, ["= 3.2.8"])
|
106
|
+
s.add_dependency(%q<debugger>, [">= 0"])
|
107
|
+
end
|
108
|
+
else
|
109
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
110
|
+
s.add_dependency(%q<bundler>, ["> 1.0.0"])
|
111
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
112
|
+
s.add_dependency(%q<activerecord>, ["= 3.2.8"])
|
113
|
+
s.add_dependency(%q<debugger>, [">= 0"])
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
Phylum,Superclass,Class,Subclass,Infraclass,Cohort,Superorder,Order,Suborder,Infraorder,Superfamily,Family,Subfamily,Genus,Subgenus,Species,Subspecies,Variety,Author_Date,Authors,Year,Title,Publication,Volume_Number,Pages,Cited_Page,Litho_Age_Range,Stage,Epoch,Subperiod,Period,Era,Fossil_Preservation,Group,Formation,Member,Locality,County,State,Country,Continent,Synonyms,Synonym_Notes,In_Carpenter?
|
2
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Polyneoptera,Orthopterida,Orthoptera,Ensifera,,Grylloidea,Gryllidae,Trigonidiinae,Abanaxipha,,incongrua,,,"Vickery & Poinar, 1994","Vickery, V.R., & Poinar, Jr., G.O.",1994,Crickets (Grylloptera: Grylloidea) in Dominican amber.,The Canadian Entomologist,126(1),13-22.,17,20.5 - 16.4,Burdigalian,Miocene Early,Neogene,Tertiary,Cenozoic,Amber,Wealden,Wessex,,Dominica,,,Dominican Republic,South America,,,
|
3
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Polyneoptera,Orthopterida,Orthoptera,Ensifera,,Grylloidea,Gryllidae,Trigonidiinae,Abanaxipha,,longispina,,,"Vickery & Poinar, 1994","Vickery, V.R., & Poinar, Jr., G.O.",1994,Crickets (Grylloptera: Grylloidea) in Dominican amber.,The Canadian Entomologist,126(1),13-22.,17,20.5 - 16.4,Burdigalian,Miocene Early,Neogene,Tertiary,Cenozoic,Amber,Wealden,Wessex,,Dominica,,,Dominican Republic,South America,,,
|
4
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,,Metapterygota,Palaeodictyopterida,Palaeodictyoptera,,,,Spilapteridae,,Abaptilon,,sibericum $,,,"Zalessky, 1946","Zalessky,",1946,,,,,58,354 - 296,,,,Carboniferous,Palaeozoic,,,,,Russia,,,Russia,Asia,,,Yes
|
5
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Holometabola,Panorpida,Diptera,Brachycera,,Phoroidea,Phoridae,,Abaristophora,,domicamberae,,,"Disney, 1996","Disney, R.H.L., & Ross, A.J.",1996,"Abaristophora & Puliciphora (Diptera, Phoridae) from Dominical amber and revisionary notes on modern species.",European Journal of Entomology,93,127-135.,128,20.5 - 16.4,Burdigalian,Miocene Early,Neogene,Tertiary,Cenozoic,Amber,Wealden,Wessex,,Dominica,,,Dominican Republic,South America,,,
|
6
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Polyneoptera,,Grylloblattodea,Grylloblattina,,,Liomopteridae,,Abashevia,,suchovi,,,"Sharov, 1961","Sharov, A.G.",1961,Order Paraplecoptera. IN Rohdendorf et.al. Paleozoic insects of the Kuznetsk basin.,Trudy paleontologicheskogo instituta akadamii nauk SSSR,85,"164-224, text-fig. 93-164, pl. 19.",194,545 - 251,,,,,Palaeozoic,,,,,Kuznetsk Basin,,,Russia,Asia,,,Yes
|
7
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Holometabola,,Coleoptera,,,,,,Abax,,durhamensis,,,"Lesne, 1926","Lesne, P.",1926,Nouvelles donnees sur la faunule coleopterologique Pliocene de Castle Eden (Angleterre Septle),Encyclopedie entomologique (series B) Coleoptera,2(1),1-15.,3,5.3 - 1.8,,Pliocene,Neogene,Tertiary,Cenozoic,,,,,Castle Eden,Durham,,UK,Europe,,,
|
8
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Holometabola,,Coleoptera,Polyphaga,,Tenebrionoidea,Melandryidae,,Abderina,,helm(s)i,,,"Seidlitz, 1898","Seidlitz, G.C.M.",1898,Coleoptera,IN Erichsons Naturgeschichte der Insekten Deutschlands Berlin,5(2),305-680.,577,54.8 - 33.7,,Eocene,Palaeogene,Tertiary,Cenozoic,Amber,Wealden,Wessex,,Baltic,,,Baltic,Europe,,,Yes
|
9
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Holometabola,,Coleoptera,Polyphaga,,Cantharoidea,Cleridae,,Aberrokorynetes,,abludens,,,"Winkler, 1990","Winkler, J.",1990,Two new genera of fossil Korynetinae from Baltic Amber (Coleoptera Cleridae).,Acta Universitatis Carolinae - Biologica,34,371-381.,377,54.8 - 33.7,,Eocene,Palaeogene,Tertiary,Cenozoic,Amber,Wealden,Wessex,,Baltic,,,Baltic,Europe,,,
|
10
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Holometabola,,Hymenoptera,Symphyta,,Tenthredinoidea,Cimbicidae,,Abia,,duplicata =,,,"Giebel, 1856","Giebel, C.G.A.",1856,"Fauna der Vorwelt, Bd II. Die Insecten und Spinnen der Vorwelt.",F.U. Brodhaus. Leipzig,,1-511.,264,150.7 - 140,Tithonian - Berriasian,,,Jurassic / Cretaceous,Mesozoic,Lower Purbeck beds,Purbeck,Lulworth,,Durlston Bay,Dorset,England,UK,Europe,Osmylopsis duplicata,,
|
11
|
+
Arthropoda,Hexapoda,Insecta,Pterygota,Neoptera,Holometabola,,Hymenoptera,Symphyta,,Tenthredinoidea,Cimbicidae,,Abia,,kochi =,,,"Geinitz, 1887","Geinitz, F.E.",1887,Neue Aufschlusse der Flozformation Mecklenburgs. IX Beitrag zur Geologie Mecklenburgs. IV Jura.,Archiv des Vereins der Freunde naturgeschichte Mecklenburg,41,"143-216, pl. 4-6",200,189.6 - 180.1,,Liassic Late,,Jurassic Early,Mesozoic,,,,,Dobbertin,Mecklenburg,,Germany,Europe,Solenoptilon kochi,,
|