taxonifi 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +24 -7
- data/README.rdoc +5 -6
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/assessor/row_assessor.rb +25 -18
- data/lib/export/format/base.rb +96 -1
- data/lib/export/format/obo_nomenclature.rb +71 -0
- data/lib/export/format/prolog.rb +59 -0
- data/lib/export/format/species_file.rb +303 -193
- data/lib/lumper/clump.rb +112 -0
- data/lib/lumper/lumper.rb +71 -45
- data/lib/lumper/lumps/parent_child_name_collection.rb +79 -15
- data/lib/models/author_year.rb +1 -2
- data/lib/models/base.rb +56 -51
- data/lib/models/collection.rb +16 -1
- data/lib/models/name.rb +56 -15
- data/lib/models/name_collection.rb +70 -19
- data/lib/models/ref.rb +17 -0
- data/lib/models/ref_collection.rb +2 -1
- data/lib/models/shared_class_methods.rb +29 -0
- data/lib/models/species_name.rb +14 -12
- data/lib/splitter/parser.rb +1 -2
- data/lib/splitter/tokens.rb +1 -1
- data/lib/taxonifi.rb +12 -0
- data/lib/utils/array.rb +17 -0
- data/lib/utils/hash.rb +17 -0
- data/taxonifi.gemspec +116 -0
- data/test/file_fixtures/Fossil.csv +11 -0
- data/test/file_fixtures/Lygaeoidea.csv +1 -1
- data/test/file_fixtures/names.csv +1 -0
- data/test/helper.rb +14 -0
- data/test/test_export_prolog.rb +14 -0
- data/test/test_exporter.rb +23 -0
- data/test/test_lumper_clump.rb +75 -0
- data/test/test_lumper_names.rb +67 -9
- data/test/test_lumper_parent_child_name_collection.rb +47 -3
- data/test/test_lumper_refs.rb +22 -7
- data/test/test_obo_nomenclature.rb +14 -0
- data/test/test_parser.rb +4 -2
- data/test/test_splitter_tokens.rb +9 -0
- data/test/test_taxonifi_accessor.rb +21 -15
- data/test/test_taxonifi_base.rb +25 -0
- data/test/test_taxonifi_name.rb +41 -4
- data/test/test_taxonifi_name_collection.rb +54 -17
- data/test/test_taxonifi_species_name.rb +1 -1
- metadata +34 -5
data/lib/lumper/clump.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
# require File.expand_path(File.join(File.dirname(__FILE__), '../taxonifi'))
|
2
|
+
|
3
|
+
# A Clump is a "C"ollection of lump derivatives and the relatinoships between these derivatives!
|
4
|
+
# It's used to define relationships among objects derived, for example, between single rows of data
|
5
|
+
module Taxonifi::Lumper:Clumps
|
6
|
+
|
7
|
+
class Taxonifi::Lumper::Clump
|
8
|
+
|
9
|
+
attr_accessor :collections
|
10
|
+
attr_accessor :annonymous_collection_index
|
11
|
+
attr_accessor :csv
|
12
|
+
|
13
|
+
def initialize(csv = nil)
|
14
|
+
@collections = {}
|
15
|
+
@annonymous_collection_index = 0
|
16
|
+
@csv = csv if !csv.nil?
|
17
|
+
@csv ||= nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_csv(csv)
|
21
|
+
if @csv.nil?
|
22
|
+
@csv = csv
|
23
|
+
else
|
24
|
+
return false
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def remove_csv
|
29
|
+
if !@csv.nil?
|
30
|
+
@csv = nil
|
31
|
+
true
|
32
|
+
else
|
33
|
+
false
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_from_csv(options = {})
|
38
|
+
opts = {
|
39
|
+
collection: :name
|
40
|
+
}.merge!(options)
|
41
|
+
raise if @csv.nil?
|
42
|
+
raise if not Taxonifi::Model::Collection.subclass_prefixes.include?(opts[:collection].to_s)
|
43
|
+
|
44
|
+
case opts[:collection]
|
45
|
+
when :name
|
46
|
+
add_name_collection(opts)
|
47
|
+
when :ref
|
48
|
+
add_ref_collection(opts)
|
49
|
+
else
|
50
|
+
raise
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def next_available_collection_name
|
55
|
+
"collection#{annonymous_collection_index}"
|
56
|
+
end
|
57
|
+
|
58
|
+
def increment_annonymous_collection_index
|
59
|
+
@annonymous_collection_index += 1
|
60
|
+
true
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_ref_collection(options)
|
64
|
+
opts = {
|
65
|
+
:name => next_available_collection_name
|
66
|
+
}.merge!(options)
|
67
|
+
if add_collection(opts[:name],Taxonifi::Model::RefCollection.new(opts))
|
68
|
+
increment_annonymous_collection_index if (opts[:name] == next_available_collection_name)
|
69
|
+
true
|
70
|
+
else
|
71
|
+
false
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def add_name_collection(options)
|
76
|
+
opts = {
|
77
|
+
:name => next_available_collection_name
|
78
|
+
}.merge!(options)
|
79
|
+
if add_collection(opts[:name],Taxonifi::Model::NameCollection.new(opts))
|
80
|
+
increment_annonymous_collection_index if opts[:name] == next_available_collection_name
|
81
|
+
true
|
82
|
+
else
|
83
|
+
false
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def add_collection(name = nil, collection = nil)
|
88
|
+
return false if (name.nil? || collection.nil?)
|
89
|
+
return false if @collections.keys.include?(name)
|
90
|
+
@collections.merge!(name => collection)
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
def link(collection1, collection2, link_method)
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.link_name_collection_and_ref_collection(options = {})
|
99
|
+
opt = {
|
100
|
+
:nc => Taxonifi::Model::NameCollection.new,
|
101
|
+
:rc => Taxonifi::Model::RefCollection.new,
|
102
|
+
:by => :row_number
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
# Should ultimately make this a reddis hook
|
107
|
+
|
108
|
+
# variable indecies b/w data
|
109
|
+
|
110
|
+
|
111
|
+
end
|
112
|
+
end
|
data/lib/lumper/lumper.rb
CHANGED
@@ -29,7 +29,7 @@ module Taxonifi::Lumper
|
|
29
29
|
quad_author_year: QUAD + AUTHOR_YEAR,
|
30
30
|
names: Taxonifi::RANKS + AUTHOR_YEAR,
|
31
31
|
higher: Taxonifi::RANKS - [QUAD + AUTHOR_YEAR],
|
32
|
-
species: ['species', 'subspecies'],
|
32
|
+
species: ['species', 'subspecies', 'variety'],
|
33
33
|
genera: ['genus', 'subgenus'],
|
34
34
|
citation_basic: %w{authors year title publication volume number pages pg_start pg_end},
|
35
35
|
citation_small: %w{authors year title publication volume_number pages},
|
@@ -37,6 +37,8 @@ module Taxonifi::Lumper
|
|
37
37
|
eol_basic: %w{identifier parent child rank synonyms}
|
38
38
|
}
|
39
39
|
|
40
|
+
# Authors, Year, Title, Publication, Volume_Number Pages Cited_Page
|
41
|
+
|
40
42
|
# Lumps for which all columns are represented
|
41
43
|
# TODO: This is really an assessor method
|
42
44
|
def self.available_lumps(columns)
|
@@ -55,21 +57,26 @@ module Taxonifi::Lumper
|
|
55
57
|
intersections
|
56
58
|
end
|
57
59
|
|
60
|
+
|
58
61
|
# Return a Taxonifi::Model::NameCollection from a csv file.
|
59
|
-
def self.create_name_collection(
|
62
|
+
def self.create_name_collection(options = {})
|
63
|
+
opts = {
|
64
|
+
:csv => [],
|
65
|
+
:initial_id => 0,
|
66
|
+
:capture_related_fields => true # Stores other column values in (column_header => value) pairs in Name.related
|
67
|
+
}.merge!(options)
|
68
|
+
|
69
|
+
csv = opts[:csv]
|
60
70
|
raise Taxonifi::Lumper::LumperError, 'Something that is not a CSV::Table was passed to Lumper.create_name_collection.' if csv.class != CSV::Table
|
61
|
-
|
62
|
-
|
71
|
+
|
72
|
+
nc = Taxonifi::Model::NameCollection.new(:initial_id => opts[:initial_id])
|
63
73
|
row_size = csv.size
|
64
74
|
|
65
75
|
# The row index contains a vector of parent ids like
|
66
76
|
# [0, 4, 29]
|
67
77
|
# This implies that Name with #id 29 has Parent with #id 4
|
68
78
|
# Initialize an empty index.
|
69
|
-
row_index =
|
70
|
-
(0..(row_size-1)).each do |i|
|
71
|
-
row_index[i] = []
|
72
|
-
end
|
79
|
+
row_index = Taxonifi::Utils::Array.build_array_of_empty_arrays(row_size)
|
73
80
|
|
74
81
|
# The name_index keeps track of unique name per rank like
|
75
82
|
# :genus => {'Foo' => [0,2]}
|
@@ -77,27 +84,30 @@ module Taxonifi::Lumper
|
|
77
84
|
# name collection, with id 0, and id 2.
|
78
85
|
name_index = {}
|
79
86
|
|
87
|
+
has_ref_fields = ([:citation_basic, :citation_small] & Taxonifi::Lumper.intersecting_lumps(csv.headers)).size > 0
|
88
|
+
unused_fields = csv.headers - Taxonifi::Lumper::LUMPS[:names]
|
89
|
+
|
90
|
+
|
80
91
|
# First pass, create and index names
|
81
92
|
Taxonifi::Assessor::RowAssessor.rank_headers(csv.headers).each do |rank|
|
82
93
|
name_index[rank] = {}
|
83
94
|
csv.each_with_index do |row, i|
|
84
|
-
|
85
|
-
|
95
|
+
shares_rank = (rank == Taxonifi::Assessor::RowAssessor.lump_name_rank(row).to_s)
|
86
96
|
name = row[rank]
|
87
97
|
|
88
98
|
if !name.nil? # cell has data
|
89
99
|
n = nil # a Name if necessary
|
90
|
-
name_id = nil # index the new or existing
|
100
|
+
name_id = nil # index the new or existing Name
|
91
101
|
|
92
|
-
if name_index[rank][name] # name (
|
102
|
+
if name_index[rank][name] # A matching name (String) has been previously added
|
103
|
+
exists = false
|
93
104
|
|
94
|
-
exists = false
|
95
105
|
name_index[rank][name].each do |id|
|
96
106
|
# Compare vectors of parent_ids for name presence
|
97
107
|
if nc.parent_id_vector(id) == row_index[i]
|
98
108
|
exists = true
|
99
109
|
name_id = id
|
100
|
-
break
|
110
|
+
break
|
101
111
|
end
|
102
112
|
end
|
103
113
|
|
@@ -109,7 +119,8 @@ module Taxonifi::Lumper
|
|
109
119
|
n = Taxonifi::Model::Name.new()
|
110
120
|
end # end name exists
|
111
121
|
|
112
|
-
|
122
|
+
|
123
|
+
# Populate the new name if created. Previously matched names are not effected.
|
113
124
|
if !n.nil?
|
114
125
|
n.rank = rank
|
115
126
|
n.name = name
|
@@ -119,17 +130,26 @@ module Taxonifi::Lumper
|
|
119
130
|
# Name/year needs to be standardized / cased out
|
120
131
|
# headers are overlapping at times
|
121
132
|
|
122
|
-
if
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
133
|
+
# Check to see if metadata (e.g. author year) apply to this rank, attach if so.
|
134
|
+
if shares_rank
|
135
|
+
if row['author_year']
|
136
|
+
builder = Taxonifi::Splitter::Builder.build_author_year(row['author_year'])
|
137
|
+
n.author = builder.people
|
138
|
+
n.year = builder.year
|
139
|
+
n.parens = !builder.parens
|
140
|
+
end
|
141
|
+
|
142
|
+
n.related.merge!(:link_to_ref_from_row => i) if has_ref_fields
|
143
|
+
n.related.merge!(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
|
127
144
|
end
|
128
145
|
|
129
146
|
name_id = nc.add_object(n).id
|
130
|
-
# Add the name to the index of unique names
|
131
147
|
name_index[rank][name] ||= []
|
132
148
|
name_index[rank][name].push name_id
|
149
|
+
|
150
|
+
$DEBUG && $stderr.puts("added #{nc.collection.size - 1} | #{n.name} | #{n.rank} | #{n.parent ? n.parent.name : '-'} | #{n.parent ? n.parent.id : '-'}")
|
151
|
+
else
|
152
|
+
$DEBUG && $stderr.puts("already present #{rank} | #{name}")
|
133
153
|
end
|
134
154
|
|
135
155
|
# build a by row vector of parent child relationships
|
@@ -138,15 +158,22 @@ module Taxonifi::Lumper
|
|
138
158
|
|
139
159
|
end
|
140
160
|
end
|
141
|
-
|
142
161
|
nc
|
143
162
|
end
|
144
163
|
|
145
164
|
# Return a Taxonifi::Model::RefCollection from a CSV file.
|
146
|
-
def self.create_ref_collection(
|
165
|
+
def self.create_ref_collection(options = {})
|
166
|
+
opts = {
|
167
|
+
:csv => nil,
|
168
|
+
:inital_id => 1,
|
169
|
+
:capture_related_fields => true # Stores other column values in (column_header => value) pairs in Ref.related
|
170
|
+
}.merge!(options)
|
171
|
+
csv = opts[:csv]
|
172
|
+
|
147
173
|
raise Taxonifi::Lumper::LumperError, 'Something that is not a CSV::Table was passed to Lumper.create_ref_collection.' if csv.class != CSV::Table
|
148
|
-
rc = Taxonifi::Model::RefCollection.new
|
149
|
-
|
174
|
+
rc = Taxonifi::Model::RefCollection.new(opts)
|
175
|
+
|
176
|
+
unused_fields = csv.headers - (Taxonifi::Lumper::LUMPS[:citation_basic] | Taxonifi::Lumper::LUMPS[:citation_small])
|
150
177
|
|
151
178
|
ref_index = {}
|
152
179
|
csv.each_with_index do |row, i|
|
@@ -178,30 +205,41 @@ module Taxonifi::Lumper
|
|
178
205
|
|
179
206
|
if row['pages'] && !row['pages'].empty?
|
180
207
|
# If our regex doesn't match dump the field into pages
|
208
|
+
lexer = Taxonifi::Splitter::Lexer.new(row['pages'], :pages)
|
181
209
|
begin
|
182
|
-
|
183
|
-
t = lexer.pop(Taxonifi::Splitter::Tokens::Pages)
|
210
|
+
if t = lexer.pop(Taxonifi::Splitter::Tokens::Pages)
|
184
211
|
r.pg_start = t.pg_start
|
185
212
|
r.pg_end = t.pg_end
|
213
|
+
r.pages = t.remainder
|
214
|
+
else
|
215
|
+
r.pages = row['pages']
|
216
|
+
end
|
186
217
|
rescue
|
187
218
|
r.pages = row['pages']
|
188
219
|
end
|
189
220
|
end
|
221
|
+
|
222
|
+
r.related.merge!(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
|
190
223
|
|
191
224
|
# Do some indexing.
|
192
225
|
ref_str = r.compact_string
|
193
226
|
if !ref_index.keys.include?(ref_str)
|
194
227
|
ref_id = rc.add_object(r).id
|
195
228
|
ref_index.merge!(ref_str => ref_id)
|
229
|
+
# puts "#{i} : #{ref_id}"
|
196
230
|
rc.row_index[i] = r
|
197
231
|
else
|
198
|
-
rc.row_index[i] = ref_index[ref_str]
|
232
|
+
rc.row_index[i] = rc.object_by_id(ref_index[ref_str])
|
233
|
+
# puts "#{i} : #{ref_index[ref_str]}"
|
199
234
|
end
|
200
235
|
end
|
201
236
|
end
|
202
237
|
rc
|
203
238
|
end
|
204
239
|
|
240
|
+
# def self.link_name_and_ref_collections_by_row(nc, rc)
|
241
|
+
# end
|
242
|
+
|
205
243
|
# Creates a generic Collection with Objects of GenericObject
|
206
244
|
# Objects are assigned to parents (rank) according to the order provided in headers.
|
207
245
|
# Objects are considered the same if they have the same name and the same parents closure, e.g.
|
@@ -223,15 +261,8 @@ module Taxonifi::Lumper
|
|
223
261
|
row_size = csv.size
|
224
262
|
|
225
263
|
# See create_name_collection
|
226
|
-
row_index =
|
227
|
-
(
|
228
|
-
row_index[i] = []
|
229
|
-
end
|
230
|
-
|
231
|
-
name_index = {}
|
232
|
-
headers.each do |h|
|
233
|
-
name_index[h] = {}
|
234
|
-
end
|
264
|
+
row_index = Taxonifi::Utils::Array.build_array_of_empty_arrays(row_size)
|
265
|
+
name_index = Taxonifi::Utils::Hash.build_hash_of_hashes_with_keys(headers)
|
235
266
|
|
236
267
|
csv.each_with_index do |row, i|
|
237
268
|
headers.each do |rank|
|
@@ -240,7 +271,7 @@ module Taxonifi::Lumper
|
|
240
271
|
o = nil # a Name if necessary
|
241
272
|
name_id = nil # index the new or existing name
|
242
273
|
|
243
|
-
if name_index[rank][name] # name
|
274
|
+
if name_index[rank][name] # Matching name is found
|
244
275
|
|
245
276
|
exists = false
|
246
277
|
name_index[rank][name].each do |id|
|
@@ -267,8 +298,8 @@ module Taxonifi::Lumper
|
|
267
298
|
name_id = c.add_object(o).id
|
268
299
|
name_index[rank][name] ||= []
|
269
300
|
name_index[rank][name].push name_id
|
270
|
-
|
271
301
|
end
|
302
|
+
|
272
303
|
row_index[i].push name_id
|
273
304
|
end
|
274
305
|
end
|
@@ -282,12 +313,7 @@ module Taxonifi::Lumper
|
|
282
313
|
gc = Taxonifi::Model::GeogCollection.new
|
283
314
|
|
284
315
|
row_size = csv.size
|
285
|
-
|
286
|
-
# See create_name_collection
|
287
|
-
row_index = []
|
288
|
-
(0..(row_size-1)).each do |i|
|
289
|
-
row_index[i] = []
|
290
|
-
end
|
316
|
+
row_index = Taxonifi::Utils::Array.build_array_of_empty_arrays(row_size)
|
291
317
|
|
292
318
|
name_index = {}
|
293
319
|
geog_headers = Taxonifi::Assessor::RowAssessor.geog_headers(csv.headers)
|
@@ -8,7 +8,12 @@ module Taxonifi::Lumper::Lumps::ParentChildNameCollection
|
|
8
8
|
raise Taxonifi::Lumper::LumperError, "CSV does not have the required headers (#{Taxonifi::Lumper::LUMPS[:eol_basic].join(", ")})." if !Taxonifi::Lumper.available_lumps(csv.headers).include?(:eol_basic)
|
9
9
|
|
10
10
|
nc = Taxonifi::Model::NameCollection.new(:initial_id => 1)
|
11
|
-
|
11
|
+
|
12
|
+
# identifier => Taxonifi::Name
|
13
|
+
external_index = {}
|
14
|
+
|
15
|
+
# Array of Hashes {:synonyms => "Name|Name1|Name2", :external_index => external_index[parent_id], :valid_species_id => valid_species_id}, {} ...
|
16
|
+
synonym_list = []
|
12
17
|
|
13
18
|
csv.each_with_index do |row,i|
|
14
19
|
name = row['child']
|
@@ -17,6 +22,11 @@ module Taxonifi::Lumper::Lumps::ParentChildNameCollection
|
|
17
22
|
external_id = row['identifier'].to_i
|
18
23
|
valid_species_id = nil
|
19
24
|
|
25
|
+
# Fix me
|
26
|
+
index_rank = 'species_group' if rank == 'species' || rank == 'subspecies'
|
27
|
+
index_rank = 'genus_group' if rank == 'subgenus' || rank == 'genus'
|
28
|
+
index_rank ||= rank
|
29
|
+
|
20
30
|
case rank
|
21
31
|
when 'species', nil
|
22
32
|
valid_species_id = add_species_names_from_string(nc, name, external_index[parent_id])
|
@@ -24,10 +34,10 @@ module Taxonifi::Lumper::Lumps::ParentChildNameCollection
|
|
24
34
|
else # Just a single string, we don't have to break anything down.
|
25
35
|
n = nil
|
26
36
|
|
27
|
-
if nc.by_name_index[
|
37
|
+
if nc.by_name_index[index_rank][name]
|
28
38
|
exists = false
|
29
39
|
# TODO: this hasn't been hit yet
|
30
|
-
nc.by_name_index[
|
40
|
+
nc.by_name_index[index_rank][name].each do |id|
|
31
41
|
if nc.parent_id_vector(id).pop == nc.parent_id_vector(parent_id)
|
32
42
|
exists = true
|
33
43
|
break
|
@@ -45,39 +55,93 @@ module Taxonifi::Lumper::Lumps::ParentChildNameCollection
|
|
45
55
|
# TODO: No author, year have yet been observed for genus and higher names
|
46
56
|
n.rank = rank
|
47
57
|
n.name = name
|
48
|
-
n.external_id = external_id
|
49
58
|
n.row_number = i
|
50
|
-
|
59
|
+
n.related.merge!(:external_id => external_id)
|
60
|
+
|
51
61
|
if parent = external_index[parent_id]
|
52
62
|
n.parent = parent
|
53
63
|
end
|
54
64
|
|
55
|
-
nc.
|
56
|
-
|
65
|
+
if !nc.name_exists?(n)
|
66
|
+
nc.add_object(n)
|
67
|
+
external_index.merge!(external_id => n)
|
68
|
+
end
|
57
69
|
end
|
58
70
|
end
|
59
71
|
|
60
72
|
if !row['synonyms'].nil? && row['synonyms'].size > 0
|
61
|
-
|
62
|
-
|
63
|
-
add_species_names_from_string(nc, n, external_index[parent_id], valid_species_id)
|
64
|
-
end
|
73
|
+
# puts n.name if external_index[parent_id].nil?
|
74
|
+
synonym_list.push({:synonyms => row['synonyms'], :valid_species_id => valid_species_id, :external_index => external_index[parent_id]})
|
65
75
|
end
|
66
76
|
|
67
77
|
end # end row
|
78
|
+
|
79
|
+
# parse the synonyms last, because names might have been mixed
|
80
|
+
synonym_list.each do |s|
|
81
|
+
other_names = s[:synonyms].split("|")
|
82
|
+
other_names.each do |n|
|
83
|
+
# puts ":: #{n} :: #{s[:external_index]} :: #{s[:valid_species_id]}" if s[:external_index].nil?
|
84
|
+
add_species_names_from_string(nc, n, s[:external_index], s[:valid_species_id])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
68
88
|
nc
|
69
89
|
end
|
70
90
|
|
71
|
-
# Add the
|
91
|
+
# Add the last name in a species epithet string if new, record a new combination otherwise.
|
92
|
+
# Assumes ALL parents have been previously added, including those used in Synonym combinations.
|
93
|
+
# For example, given a row with name, synonym fields like:
|
94
|
+
# 'Neortholomus scolopax (Say, 1832)', 'Lygaeus scolopax Say, 1832']
|
95
|
+
# The names Neortholomus and Lygaeus must exist.
|
96
|
+
#
|
72
97
|
def self.add_species_names_from_string(nc, string, parent = nil, synonym_id = nil)
|
73
98
|
names = Taxonifi::Splitter::Builder.build_species_name(string) # A Taxonifi::Model::SpeciesName instance
|
74
99
|
if !parent.nil? # nc.object_by_id(parent_id)
|
75
|
-
names.names.last.parent = parent # swap out the
|
100
|
+
names.names.last.parent = parent # swap out the parent with the id referenced by the parent_id
|
76
101
|
else
|
77
102
|
raise Taxonifi::Lumper::LumperError, "Parent of [#{names.names.last.name}] within [#{names.display_name}] not yet instantiated. \n !! To resolve: \n\t 1) If this is not a species name your file may be missing a value in the 'Rank' column (nil values are assumed to be species, all other ranks must be populated). \n\t 2) Parent names must be read before children, check that this is the case."
|
78
103
|
end
|
79
|
-
|
80
|
-
|
104
|
+
|
105
|
+
last_id = nil
|
106
|
+
if !nc.name_exists?(names.names.last)
|
107
|
+
last_id = nc.add_object(names.names.last).id
|
108
|
+
nc.object_by_id(last_id).related_name = nc.object_by_id(synonym_id) if !synonym_id.nil?
|
109
|
+
else
|
110
|
+
|
111
|
+
tmp_genus = names.genus.clone
|
112
|
+
# tmp_subgenus = names.subgenus.clone if !names.subgenus.nil?
|
113
|
+
tmp_species = names.species.clone
|
114
|
+
tmp_subspecies = names.subspecies.clone if !names.subspecies.nil?
|
115
|
+
|
116
|
+
case parent.rank
|
117
|
+
when 'genus'
|
118
|
+
tmp_genus.parent = parent.parent # OK
|
119
|
+
when 'subgenus'
|
120
|
+
tmp_genus.parent = parent.parent # OK
|
121
|
+
when 'species'
|
122
|
+
tmp_genus.parent = parent.parent.parent
|
123
|
+
tmp_species = parent
|
124
|
+
tmp_subspecies.parent = tmp_species
|
125
|
+
end
|
126
|
+
|
127
|
+
# tmp_subgenus.parent = tmp_genus if !tmp_subgenus.nil?
|
128
|
+
# real_subgenus = nc.object_by_id(nc.name_exists?(tmp_subgenus)) if !tmp_subgenus.nil?
|
129
|
+
|
130
|
+
real_genus = nc.object_by_id(nc.name_exists?(tmp_genus))
|
131
|
+
real_species = nc.object_by_id(nc.name_exists?(tmp_species))
|
132
|
+
|
133
|
+
# !! Existing demo data Lygaeoidea have synonyms in which the genus name is not instantiated. This might be a problem with DwC file
|
134
|
+
# validation in general, something to look at, for now, throw up our hands and move on.
|
135
|
+
return last_id if (real_genus.nil? || real_species.nil?)
|
136
|
+
|
137
|
+
# debugger if real_genus.id == 399
|
138
|
+
real_subgenus = nil # revisit
|
139
|
+
real_subspecies = nc.object_by_id(nc.name_exists?(tmp_subspecies)) if !tmp_subspecies.nil?
|
140
|
+
|
141
|
+
rc = [real_genus, real_subgenus, real_species, real_subspecies]
|
142
|
+
nc.combinations.push rc
|
143
|
+
end
|
144
|
+
|
81
145
|
last_id
|
82
146
|
end
|
83
147
|
|
data/lib/models/author_year.rb
CHANGED
@@ -3,7 +3,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "../models/base.rb"))
|
|
3
3
|
module Taxonifi
|
4
4
|
module Model
|
5
5
|
# A class to aggregate People and Year combinations.
|
6
|
-
|
6
|
+
class AuthorYear < Taxonifi::Model::Base
|
7
7
|
# Array of Taxonifi::Model::People
|
8
8
|
attr_accessor :people
|
9
9
|
# String
|
@@ -32,7 +32,6 @@ module Taxonifi
|
|
32
32
|
end
|
33
33
|
index.join("-")
|
34
34
|
end
|
35
|
-
|
36
35
|
end
|
37
36
|
end
|
38
37
|
end
|
data/lib/models/base.rb
CHANGED
@@ -1,73 +1,78 @@
|
|
1
1
|
module Taxonifi
|
2
2
|
class ModelError < StandardError; end
|
3
3
|
module Model
|
4
|
-
|
4
|
+
|
5
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'shared_class_methods'))
|
6
|
+
|
5
7
|
# A base class for all Taxonifi::Models that represent
|
6
8
|
# "individuals" (as opposed to collections of indviduals).
|
7
9
|
class Base
|
10
|
+
|
11
|
+
include Taxonifi::Model::SharedClassMethods
|
12
|
+
|
8
13
|
# The id of this object.
|
9
14
|
attr_accessor :id
|
15
|
+
|
10
16
|
# Optionly store the row this came from
|
11
17
|
attr_accessor :row_number
|
12
|
-
# Optionally store an id representing the original id usef for this record.
|
13
|
-
attr_accessor :external_id
|
14
18
|
|
15
|
-
|
16
|
-
|
17
|
-
# !! Check validity prior to building.
|
18
|
-
def build(attributes, opts)
|
19
|
-
attributes.each do |c|
|
20
|
-
self.send("#{c}=",opts[c]) if !opts[c].nil?
|
21
|
-
end
|
22
|
-
end
|
19
|
+
# A general purpose hash populable as needed for related metadata
|
20
|
+
attr_accessor :related
|
23
21
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
22
|
+
# TODO: Rethink this. See @@ATTRIBUTES in subclasses.
|
23
|
+
ATTRIBUTES = [:row_number]
|
24
|
+
|
25
|
+
def initialize(options = {})
|
26
|
+
@related = {}
|
27
|
+
end
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
while !p.nil?
|
36
|
-
ids.unshift p.id
|
37
|
-
p = p.parent
|
38
|
-
i += 1
|
39
|
-
raise Taxonifi::ModelError, "Infite recursion in parent string detected for Base model object #{id}." if i > 100
|
40
|
-
end
|
41
|
-
ids
|
29
|
+
# Assign on new() all attributes for the ATTRIBUTES
|
30
|
+
# constant in a given subclass.
|
31
|
+
# !! Check validity prior to building.
|
32
|
+
def build(attributes, opts)
|
33
|
+
attributes.each do |c|
|
34
|
+
self.send("#{c}=",opts[c]) if !opts[c].nil?
|
42
35
|
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def id=(id)
|
39
|
+
raise Taxonifi::ModelError, "Base model objects must have Fixnum ids." if !id.nil? && id.class != Fixnum
|
40
|
+
@id = id
|
41
|
+
end
|
43
42
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
raise Taxonifi::ModelError, "Infite recursion in parent string detected for Base model object #{id.display_name}." if i > 100
|
56
|
-
end
|
57
|
-
ancestors
|
43
|
+
# The ids only of ancestors.
|
44
|
+
# Immediate ancestor id is in [].last
|
45
|
+
def ancestor_ids
|
46
|
+
i = 0 # check for recursion
|
47
|
+
ids = []
|
48
|
+
p = parent
|
49
|
+
while !p.nil?
|
50
|
+
ids.unshift p.id
|
51
|
+
p = p.parent
|
52
|
+
i += 1
|
53
|
+
raise Taxonifi::ModelError, "Infite recursion in parent string detected for Base model object #{id}." if i > 100
|
58
54
|
end
|
55
|
+
ids
|
56
|
+
end
|
59
57
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
58
|
+
# Ancestor objects for subclasses
|
59
|
+
# that have a parent property.
|
60
|
+
# TODO: check for parent attributes
|
61
|
+
def ancestors
|
62
|
+
i = 0 # check for recursion
|
63
|
+
ancestors = []
|
64
|
+
p = parent
|
65
|
+
while !p.nil?
|
66
|
+
ancestors.unshift p
|
67
|
+
p = p.parent
|
68
|
+
i += 1
|
69
|
+
raise Taxonifi::ModelError, "Infite recursion in parent string detected for Base model object #{id.display_name}." if i > 100
|
69
70
|
end
|
71
|
+
ancestors
|
72
|
+
end
|
70
73
|
|
71
74
|
end
|
75
|
+
|
76
|
+
|
72
77
|
end
|
73
78
|
end
|