taxonifi 0.2.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +59 -0
- data/.travis.yml +11 -0
- data/Gemfile +5 -17
- data/Gemfile.lock +22 -40
- data/README.md +192 -0
- data/Rakefile +35 -26
- data/lib/export/format/base.rb +1 -1
- data/lib/export/format/species_file.rb +154 -152
- data/lib/lumper/clump.rb +1 -1
- data/lib/lumper/lumper.rb +22 -18
- data/lib/lumper/lumps/parent_child_name_collection.rb +1 -2
- data/lib/lumper/name_index.rb +21 -0
- data/lib/{models → model}/author_year.rb +2 -2
- data/lib/{models → model}/base.rb +35 -5
- data/lib/{models → model}/collection.rb +8 -1
- data/lib/{models → model}/name.rb +128 -36
- data/lib/{models → model}/name_collection.rb +134 -33
- data/lib/{models → model}/person.rb +1 -1
- data/lib/{models → model}/ref.rb +4 -2
- data/lib/model/ref_collection.rb +171 -0
- data/lib/{models → model}/species_name.rb +24 -3
- data/lib/splitter/builder.rb +1 -1
- data/lib/splitter/parser.rb +5 -0
- data/lib/splitter/tokens.rb +54 -9
- data/lib/taxonifi/version.rb +3 -0
- data/lib/taxonifi.rb +5 -9
- data/taxonifi.gemspec +29 -99
- data/test/helper.rb +1 -1
- data/test/test_exporter.rb +1 -1
- data/test/test_lumper_names.rb +9 -9
- data/test/test_lumper_refs.rb +4 -4
- data/test/test_parser.rb +97 -26
- data/test/test_splitter_tokens.rb +25 -4
- data/test/test_taxonifi_base.rb +1 -1
- data/test/test_taxonifi_geog.rb +1 -1
- data/test/test_taxonifi_name.rb +13 -14
- data/test/test_taxonifi_name_collection.rb +11 -5
- data/test/test_taxonifi_ref.rb +1 -1
- data/test/test_taxonifi_ref_collection.rb +40 -3
- data/test/test_taxonifi_species_name.rb +51 -1
- data/travis/before_install.sh +2 -0
- metadata +96 -66
- data/README.rdoc +0 -154
- data/VERSION +0 -1
- data/lib/models/ref_collection.rb +0 -107
- /data/lib/{models → model}/generic_object.rb +0 -0
- /data/lib/{models → model}/geog.rb +0 -0
- /data/lib/{models → model}/geog_collection.rb +0 -0
- /data/lib/{models → model}/shared_class_methods.rb +0 -0
@@ -0,0 +1,171 @@
|
|
1
|
+
module Taxonifi
|
2
|
+
class RefCollectionError < StandardError; end
|
3
|
+
|
4
|
+
module Model
|
5
|
+
|
6
|
+
# A collection of references.
|
7
|
+
class RefCollection < Taxonifi::Model::Collection
|
8
|
+
|
9
|
+
# An options index when there is one reference per row.
|
10
|
+
# A Hash. {:row_number => Ref
|
11
|
+
attr_accessor :row_index
|
12
|
+
|
13
|
+
# A Hash. Keys are Ref#id, values are an Array of Person#ids.
|
14
|
+
# Built on request.
|
15
|
+
attr_accessor :author_index
|
16
|
+
|
17
|
+
def initialize(options = {})
|
18
|
+
super
|
19
|
+
@row_index = []
|
20
|
+
@author_index = {}
|
21
|
+
@fingerprint_index = {}
|
22
|
+
true
|
23
|
+
end
|
24
|
+
|
25
|
+
# The instance collection class.
|
26
|
+
def object_class
|
27
|
+
Taxonifi::Model::Ref
|
28
|
+
end
|
29
|
+
|
30
|
+
# The object at a given row.
|
31
|
+
# TODO: inherit from Collection?
|
32
|
+
def object_from_row(row_number)
|
33
|
+
return nil if row_number.nil?
|
34
|
+
@row_index[row_number]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Incrementally (re-)assigns the id of every associated author (Person)
|
38
|
+
# This is only useful if you assume every author is unique.
|
39
|
+
def enumerate_authors(initial_id = 0)
|
40
|
+
i = initial_id
|
41
|
+
collection.each do ||r
|
42
|
+
r.authors.each do |a|
|
43
|
+
a.id = i
|
44
|
+
i += 1
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Finds unique authors, and combines them, then
|
50
|
+
# rebuilds author lists using references to the new unique set.
|
51
|
+
def uniquify_authors(initial_id = 0)
|
52
|
+
|
53
|
+
matching_index = {
|
54
|
+
# ref_id => { 'ref_string_fingerprint' => [author_position in Ref.authors]}
|
55
|
+
}
|
56
|
+
|
57
|
+
author_fingerprints = {}
|
58
|
+
|
59
|
+
# First pass, build matching array
|
60
|
+
collection.each do |r|
|
61
|
+
# Check for, and modify where necessary, Authors that are clearly not unique because
|
62
|
+
# they are replicated names in a author string, e.g. "Sweet and Sweet".
|
63
|
+
matching_index[r.id] = {}
|
64
|
+
r.authors.each_with_index do |a,i|
|
65
|
+
id = a.compact_string
|
66
|
+
if matching_index[r.id][id]
|
67
|
+
matching_index[r.id][id].push(i)
|
68
|
+
else
|
69
|
+
matching_index[r.id][id] = [i]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Next pass, modify names of necessarily unique authors so
|
75
|
+
# their fingerprint is unique. Note we do not differentiate
|
76
|
+
# b/w sequential sets.
|
77
|
+
# E.g. if we have 5 names like so:
|
78
|
+
# Quate [1] and Quate [2]
|
79
|
+
# Quate [3], Smith [4] and Quate [5]
|
80
|
+
# Then [1,3], [2,5] become the same Person in this process. We can not
|
81
|
+
# of course differentiate order, or if a 3rd "Quate" is present here given
|
82
|
+
# only this information. Later on we might use Year of publication, or something
|
83
|
+
# similar to further "guess".
|
84
|
+
collection.each do |r|
|
85
|
+
matching_index[r.id].keys.each do |i|
|
86
|
+
if matching_index[r.id][i].size > 1
|
87
|
+
matching_index[r.id][i].each_with_index do |j,k|
|
88
|
+
# puts "uniquifying:" + "\_#{k}\_#{r.authors[j].last_name}"
|
89
|
+
r.authors[j].last_name = "\_#{k}\_#{r.authors[j].last_name}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Generate new authors based on identity
|
96
|
+
authors = []
|
97
|
+
collection.each do |r|
|
98
|
+
r.authors.each do |a|
|
99
|
+
found = false
|
100
|
+
authors.each do |x|
|
101
|
+
if a.identical?(x)
|
102
|
+
found = true
|
103
|
+
next
|
104
|
+
end
|
105
|
+
end
|
106
|
+
if not found
|
107
|
+
authors.push a.clone
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Sequentially number the new authors, and index them.
|
113
|
+
auth_index = {}
|
114
|
+
authors.each_with_index do |a, i|
|
115
|
+
a.id = i + initial_id
|
116
|
+
auth_index.merge!(a.compact_string => a)
|
117
|
+
end
|
118
|
+
|
119
|
+
# Replace old authors with newly built/sequntially id'ed authors
|
120
|
+
collection.each do |r|
|
121
|
+
new_authors = []
|
122
|
+
r.authors.inject(new_authors){|ary, a| ary.push(auth_index[a.compact_string])}
|
123
|
+
r.authors = new_authors
|
124
|
+
end
|
125
|
+
|
126
|
+
# Remove the modifications that made authors unique
|
127
|
+
# Crude to loop those unnecessary, but clean
|
128
|
+
authors.each do |a|
|
129
|
+
a.last_name.gsub!(/\_\d+\_/, '')
|
130
|
+
end
|
131
|
+
|
132
|
+
true
|
133
|
+
end
|
134
|
+
|
135
|
+
# Build the author index.
|
136
|
+
# {Ref#id => [a1#id, ... an#id]}
|
137
|
+
def build_author_index
|
138
|
+
collection.each do |r|
|
139
|
+
@author_index.merge!(r.id => r.authors.collect{|a| a.id ? a.id : -1})
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Return an Array the unique author strings in this collection.
|
144
|
+
def unique_author_strings
|
145
|
+
auths = {}
|
146
|
+
collection.each do |r|
|
147
|
+
r.authors.each do |a|
|
148
|
+
auths.merge!(a.display_name => nil)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
auths.keys.sort
|
152
|
+
end
|
153
|
+
|
154
|
+
# Returns Array of Taxonifi::Model::Person
|
155
|
+
# !! Runs uniquify first. Careful, you might not want to do this
|
156
|
+
# !! unless you understand the consequences.
|
157
|
+
def unique_authors
|
158
|
+
uniquify_authors
|
159
|
+
all_authors
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
# Returns Array of Taxonifi::Model::Person
|
164
|
+
def all_authors
|
165
|
+
@collection.collect{|r| r.authors}.flatten.compact.uniq
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
@@ -8,7 +8,7 @@ module Taxonifi
|
|
8
8
|
# Taxonifi::Model::Names have no ids!
|
9
9
|
|
10
10
|
class SpeciesName < Taxonifi::Model::Base
|
11
|
-
ATTRIBUTES = [:genus, :subgenus, :species, :subspecies, :parent]
|
11
|
+
ATTRIBUTES = [:genus, :subgenus, :species, :subspecies, :variety, :parent]
|
12
12
|
ATTRIBUTES.each do |a|
|
13
13
|
attr_accessor a
|
14
14
|
end
|
@@ -20,6 +20,20 @@ module Taxonifi
|
|
20
20
|
true
|
21
21
|
end
|
22
22
|
|
23
|
+
# Create a new SpeciesName from a string format
|
24
|
+
# TODO: Is this replicated somewhere else?
|
25
|
+
# Examples:
|
26
|
+
# Taxonifi::Model::SpeciesName.new_from_string('Aus bus Smith, 1920')
|
27
|
+
# Taxonifi::Model::SpeciesName.new_from_string('Aus (Cus) bus dus (Smith, 1920)')
|
28
|
+
def self.new_from_string(name)
|
29
|
+
raise Taxonifi::SpeciesNameError, "No name passed to SpeciesName.new_from_string" if name.nil? || name.length == 0
|
30
|
+
# Appears to be a validly formatted species epithet at this point.
|
31
|
+
lexer = Taxonifi::Splitter::Lexer.new(name, :species_name)
|
32
|
+
builder = Taxonifi::Model::SpeciesName.new
|
33
|
+
Taxonifi::Splitter::Parser.new(lexer, builder).parse_species_name
|
34
|
+
builder
|
35
|
+
end
|
36
|
+
|
23
37
|
# Set the genus name.
|
24
38
|
def genus=(genus)
|
25
39
|
@genus = genus
|
@@ -46,6 +60,13 @@ module Taxonifi
|
|
46
60
|
@subspecies.parent = @species
|
47
61
|
end
|
48
62
|
|
63
|
+
# Set the variety name.
|
64
|
+
def variety=(variety)
|
65
|
+
raise Taxonifi::SpeciesNameError, "Varieties name must have a species name before variety can be assigned" if @species.nil?
|
66
|
+
@variety = variety
|
67
|
+
@variety.parent = (@subspecies ? @subspecies : @species)
|
68
|
+
end
|
69
|
+
|
49
70
|
# Set the parent name.
|
50
71
|
def parent=(parent)
|
51
72
|
if parent.class != Taxonifi::Model::Name
|
@@ -72,12 +93,12 @@ module Taxonifi
|
|
72
93
|
end
|
73
94
|
|
74
95
|
# Returns true if this combination contains a nominotypic subspecies name
|
75
|
-
def nominotypical_species
|
96
|
+
def nominotypical_species?
|
76
97
|
names.species && names.subspecies && (names.species.name == names.subspecies.name)
|
77
98
|
end
|
78
99
|
|
79
100
|
# Returns true if this combinations contains a nominotypic subgenus
|
80
|
-
def nominotypical_genus
|
101
|
+
def nominotypical_genus?
|
81
102
|
names.genus && names.subgenus && (names.genus.name == names.subgenus.name)
|
82
103
|
end
|
83
104
|
|
data/lib/splitter/builder.rb
CHANGED
@@ -3,7 +3,7 @@ module Taxonifi::Splitter::Builder
|
|
3
3
|
|
4
4
|
# Load all builders (= models)
|
5
5
|
# TODO: perhaps use a different scope that doesn't require loading all at once
|
6
|
-
Dir.glob( File.expand_path(File.join(File.dirname(__FILE__), "../
|
6
|
+
Dir.glob( File.expand_path(File.join(File.dirname(__FILE__), "../model/*.rb") )) do |file|
|
7
7
|
require file
|
8
8
|
end
|
9
9
|
|
data/lib/splitter/parser.rb
CHANGED
@@ -39,6 +39,11 @@ class Taxonifi::Splitter::Parser
|
|
39
39
|
@builder.send("#{r}=", Taxonifi::Model::Name.new(:name => t.send(r), rank: r) ) if t.send(r)
|
40
40
|
end
|
41
41
|
|
42
|
+
if @lexer.peek(Taxonifi::Splitter::Tokens::Variety)
|
43
|
+
t = @lexer.pop(Taxonifi::Splitter::Tokens::Variety)
|
44
|
+
@builder.variety = Taxonifi::Model::Name.new(:name => t.variety, rank: 'variety')
|
45
|
+
end
|
46
|
+
|
42
47
|
if @lexer.peek(Taxonifi::Splitter::Tokens::AuthorYear)
|
43
48
|
t = @lexer.pop(Taxonifi::Splitter::Tokens::AuthorYear)
|
44
49
|
@builder.names.last.author = t.authors
|
data/lib/splitter/tokens.rb
CHANGED
@@ -63,8 +63,8 @@ module Taxonifi::Splitter::Tokens
|
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
66
|
-
# Complex breakdown of author strings.
|
67
|
-
# a wide variety of formats.
|
66
|
+
# Complex breakdown of author strings.
|
67
|
+
# Handles a wide variety of formats.
|
68
68
|
# See test_splitter_tokens.rb for scope. As with
|
69
69
|
# AuthorYear this will match just about anything when used alone.
|
70
70
|
# Add exceptions at will, just test using TestSplittTokens#test_authors.
|
@@ -103,6 +103,33 @@ module Taxonifi::Splitter::Tokens
|
|
103
103
|
str = nil
|
104
104
|
end
|
105
105
|
|
106
|
+
# Look for an exception case. Last name, commas, no and. The idea is to decompose and
|
107
|
+
# have nothing left, if possible then the match is good.
|
108
|
+
if str && !naked_and && (str.split(",").size > 1) && (str =~ /[A-Z]\./)
|
109
|
+
test_str = str.clone
|
110
|
+
ok = true
|
111
|
+
pseudo_individuals = test_str.split(",").collect{|i| i.strip}
|
112
|
+
pseudo_individuals.each do |pi|
|
113
|
+
# All names must be identically formatted in this special case.
|
114
|
+
if pi =~ /(([A-Z][a-z]+)\s*(([A-Z]\.\s*)+))/
|
115
|
+
if not($1 == pi)
|
116
|
+
ok = false
|
117
|
+
end
|
118
|
+
else
|
119
|
+
ok = false
|
120
|
+
end
|
121
|
+
test_str.gsub!(/#{Regexp.quote(pi)}/, "")
|
122
|
+
end
|
123
|
+
|
124
|
+
if ok
|
125
|
+
test_str.gsub!(/\s*/, "")
|
126
|
+
if test_str.split(//).uniq == [","]
|
127
|
+
individuals = pseudo_individuals
|
128
|
+
str = nil
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
106
133
|
prefix = ['van den ', 'Van ', "O'", "Mc", 'Campos ', 'Costa ']
|
107
134
|
pre_reg = prefix.collect{|p| "(#{Regexp.escape(p)})?"}.join
|
108
135
|
|
@@ -135,7 +162,7 @@ module Taxonifi::Splitter::Tokens
|
|
135
162
|
while parsing
|
136
163
|
individual = ''
|
137
164
|
check_for_more_individuals = false
|
138
|
-
[m2, m1].each do |regex|
|
165
|
+
[ m2, m1].each do |regex|
|
139
166
|
if str =~ regex
|
140
167
|
individual = $1
|
141
168
|
str.slice!(individual)
|
@@ -165,7 +192,7 @@ module Taxonifi::Splitter::Tokens
|
|
165
192
|
|
166
193
|
individuals.push(last_individual) if !last_individual.nil?
|
167
194
|
individuals.flatten!
|
168
|
-
|
195
|
+
|
169
196
|
# At this point we have isolated individuals. Strategy is to slice out initials and remainder is last name.
|
170
197
|
# Initials regex matches any A-B. A. or " A ", "A-B" pattern (including repeats)
|
171
198
|
# TODO: Make a Token
|
@@ -257,11 +284,13 @@ module Taxonifi::Splitter::Tokens
|
|
257
284
|
# Foo stuff von Helsing, 1920
|
258
285
|
class Quadrinomial < Token
|
259
286
|
attr_reader :genus, :subgenus, :species, :subspecies
|
260
|
-
|
287
|
+
|
288
|
+
# Makes use of negative look ahead for a a period ( (?!\.) ) at the end of a word bounder (\b).
|
289
|
+
@regexp = Regexp.new(/\A\s*(([A-Z][^\s]+\w)\s*(\([A-Z][a-z]+\))?\s?([a-z][^\s]+(?!\.))?\s?([a-z][^\s]*(?!\.)\b)?)\s*/)
|
261
290
|
|
262
291
|
def initialize(str)
|
263
292
|
str.strip
|
264
|
-
str =~ /\A\s*([A-Z][^\s]
|
293
|
+
str =~ /\A\s*([A-Z][^\s]+\w)\s*(\([A-Z][a-z]+\))?\s?([a-z][^\s]+(?!\.))?\s?([a-z][^\s]*(?!\.)\b)?\s*/i
|
265
294
|
@genus = $1
|
266
295
|
@subgenus = $2
|
267
296
|
@species = $3
|
@@ -272,6 +301,19 @@ module Taxonifi::Splitter::Tokens
|
|
272
301
|
end
|
273
302
|
end
|
274
303
|
end
|
304
|
+
|
305
|
+
# A token to match variety
|
306
|
+
# Matches:
|
307
|
+
# var. blorf
|
308
|
+
# v. blorf
|
309
|
+
class Variety < Token
|
310
|
+
attr_reader :variety
|
311
|
+
@regexp = Regexp.new(/\A\s*((var\.\s*|v\.\s*)(\w+))/i)
|
312
|
+
def initialize (str)
|
313
|
+
str =~ Regexp.new(/\A\s*(var\.\s*|v\.\s*)(\w+)/i)
|
314
|
+
@variety = $2
|
315
|
+
end
|
316
|
+
end
|
275
317
|
|
276
318
|
# !! You must register token lists as symbols in
|
277
319
|
# !! Taxonifi::Splitter
|
@@ -282,15 +324,16 @@ module Taxonifi::Splitter::Tokens
|
|
282
324
|
# Create an untested list at your own risk, any proposed
|
283
325
|
# ordering will be accepted as long as tests pass.
|
284
326
|
|
285
|
-
# All tokens.
|
327
|
+
# All tokens. Order matters!
|
286
328
|
def self.global_token_list
|
287
329
|
[
|
330
|
+
Taxonifi::Splitter::Tokens::AuthorYear,
|
288
331
|
Taxonifi::Splitter::Tokens::Quadrinomial,
|
332
|
+
Taxonifi::Splitter::Tokens::Variety,
|
289
333
|
Taxonifi::Splitter::Tokens::LeftParen,
|
290
334
|
Taxonifi::Splitter::Tokens::Year,
|
291
335
|
Taxonifi::Splitter::Tokens::Comma,
|
292
336
|
Taxonifi::Splitter::Tokens::RightParen,
|
293
|
-
Taxonifi::Splitter::Tokens::AuthorYear,
|
294
337
|
Taxonifi::Splitter::Tokens::Authors,
|
295
338
|
Taxonifi::Splitter::Tokens::VolumeNumber,
|
296
339
|
Taxonifi::Splitter::Tokens::Pages,
|
@@ -312,10 +355,12 @@ module Taxonifi::Splitter::Tokens
|
|
312
355
|
end
|
313
356
|
|
314
357
|
# Tokens used in breaking down species names.
|
358
|
+
# Order matters.
|
315
359
|
def self.species_name
|
316
360
|
[
|
317
|
-
Taxonifi::Splitter::Tokens::Quadrinomial,
|
318
361
|
Taxonifi::Splitter::Tokens::AuthorYear,
|
362
|
+
Taxonifi::Splitter::Tokens::Quadrinomial,
|
363
|
+
Taxonifi::Splitter::Tokens::Variety
|
319
364
|
]
|
320
365
|
end
|
321
366
|
|
data/lib/taxonifi.rb
CHANGED
@@ -29,20 +29,16 @@ module Taxonifi
|
|
29
29
|
variety
|
30
30
|
}
|
31
31
|
|
32
|
-
|
33
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'lumper/lumper'))
|
34
32
|
require File.expand_path(File.join(File.dirname(__FILE__), 'splitter/splitter'))
|
35
33
|
require File.expand_path(File.join(File.dirname(__FILE__), 'assessor/assessor'))
|
36
34
|
require File.expand_path(File.join(File.dirname(__FILE__), 'export/export'))
|
37
35
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
require file
|
36
|
+
# TODO use **/*.rb syntax
|
37
|
+
%w{model utils lumper}.each do |dir|
|
38
|
+
Dir.glob( File.expand_path(File.join(File.dirname(__FILE__), "#{dir}/*.rb") )) do |file|
|
39
|
+
require file
|
40
|
+
end
|
44
41
|
end
|
45
42
|
|
46
43
|
|
47
|
-
|
48
44
|
end
|
data/taxonifi.gemspec
CHANGED
@@ -1,116 +1,46 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'taxonifi/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "taxonifi"
|
8
|
-
s.version =
|
8
|
+
s.version = Taxonifi::VERSION
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["mjy"]
|
12
12
|
s.date = "2013-03-27"
|
13
|
-
|
14
|
-
s.
|
13
|
+
|
14
|
+
s.summary = "A general purpose framework for scripted handling of taxonomic names or other heirarchical metadata."
|
15
|
+
s.description = 'Taxonifi contains simple models and utilties of use in for parsing lists of taxonomic name (life) related metadata or other heirarchically defined data.'
|
16
|
+
s.email = 'diapriid@gmail.com'
|
15
17
|
s.extra_rdoc_files = [
|
16
18
|
"LICENSE.txt",
|
17
|
-
"README.
|
18
|
-
]
|
19
|
-
s.files = [
|
20
|
-
".document",
|
21
|
-
"Gemfile",
|
22
|
-
"Gemfile.lock",
|
23
|
-
"LICENSE.txt",
|
24
|
-
"README.rdoc",
|
25
|
-
"Rakefile",
|
26
|
-
"VERSION",
|
27
|
-
"lib/assessor/assessor.rb",
|
28
|
-
"lib/assessor/base.rb",
|
29
|
-
"lib/assessor/row_assessor.rb",
|
30
|
-
"lib/export/export.rb",
|
31
|
-
"lib/export/format/base.rb",
|
32
|
-
"lib/export/format/obo_nomenclature.rb",
|
33
|
-
"lib/export/format/prolog.rb",
|
34
|
-
"lib/export/format/species_file.rb",
|
35
|
-
"lib/lumper/clump.rb",
|
36
|
-
"lib/lumper/lumper.rb",
|
37
|
-
"lib/lumper/lumps/parent_child_name_collection.rb",
|
38
|
-
"lib/models/author_year.rb",
|
39
|
-
"lib/models/base.rb",
|
40
|
-
"lib/models/collection.rb",
|
41
|
-
"lib/models/generic_object.rb",
|
42
|
-
"lib/models/geog.rb",
|
43
|
-
"lib/models/geog_collection.rb",
|
44
|
-
"lib/models/name.rb",
|
45
|
-
"lib/models/name_collection.rb",
|
46
|
-
"lib/models/person.rb",
|
47
|
-
"lib/models/ref.rb",
|
48
|
-
"lib/models/ref_collection.rb",
|
49
|
-
"lib/models/shared_class_methods.rb",
|
50
|
-
"lib/models/species_name.rb",
|
51
|
-
"lib/splitter/builder.rb",
|
52
|
-
"lib/splitter/lexer.rb",
|
53
|
-
"lib/splitter/parser.rb",
|
54
|
-
"lib/splitter/splitter.rb",
|
55
|
-
"lib/splitter/tokens.rb",
|
56
|
-
"lib/taxonifi.rb",
|
57
|
-
"lib/utils/array.rb",
|
58
|
-
"lib/utils/hash.rb",
|
59
|
-
"taxonifi.gemspec",
|
60
|
-
"test/file_fixtures/Fossil.csv",
|
61
|
-
"test/file_fixtures/Lygaeoidea.csv",
|
62
|
-
"test/file_fixtures/names.csv",
|
63
|
-
"test/helper.rb",
|
64
|
-
"test/test_export_prolog.rb",
|
65
|
-
"test/test_exporter.rb",
|
66
|
-
"test/test_lumper_clump.rb",
|
67
|
-
"test/test_lumper_geogs.rb",
|
68
|
-
"test/test_lumper_hierarchical_collection.rb",
|
69
|
-
"test/test_lumper_names.rb",
|
70
|
-
"test/test_lumper_parent_child_name_collection.rb",
|
71
|
-
"test/test_lumper_refs.rb",
|
72
|
-
"test/test_obo_nomenclature.rb",
|
73
|
-
"test/test_parser.rb",
|
74
|
-
"test/test_splitter.rb",
|
75
|
-
"test/test_splitter_tokens.rb",
|
76
|
-
"test/test_taxonifi.rb",
|
77
|
-
"test/test_taxonifi_accessor.rb",
|
78
|
-
"test/test_taxonifi_base.rb",
|
79
|
-
"test/test_taxonifi_geog.rb",
|
80
|
-
"test/test_taxonifi_name.rb",
|
81
|
-
"test/test_taxonifi_name_collection.rb",
|
82
|
-
"test/test_taxonifi_ref.rb",
|
83
|
-
"test/test_taxonifi_ref_collection.rb",
|
84
|
-
"test/test_taxonifi_species_name.rb"
|
19
|
+
"README.md"
|
85
20
|
]
|
21
|
+
|
22
|
+
s.files = `git ls-files -z`.split("\x0")
|
23
|
+
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
24
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
86
25
|
s.homepage = "http://github.com/SpeciesFile/taxonifi"
|
87
26
|
s.licenses = ["MIT"]
|
88
27
|
s.require_paths = ["lib"]
|
89
|
-
s.rubygems_version = "
|
90
|
-
|
28
|
+
s.rubygems_version = "2.4.5"
|
29
|
+
|
30
|
+
s.add_dependency "rake", '~> 10.4'
|
31
|
+
|
32
|
+
s.add_development_dependency "bundler", "~> 1.9"
|
33
|
+
s.add_development_dependency 'awesome_print', '~> 1.6'
|
34
|
+
s.add_development_dependency 'did_you_mean', '~> 0.9'
|
35
|
+
s.add_development_dependency "byebug", "~> 4.0"
|
36
|
+
# s.add_development_dependency "rdoc", "~> 4.2"
|
37
|
+
|
38
|
+
s.add_development_dependency "builder", "~> 3.2"
|
39
|
+
|
40
|
+
|
41
|
+
# Travis
|
42
|
+
|
91
43
|
|
92
|
-
if s.respond_to? :specification_version then
|
93
|
-
s.specification_version = 3
|
94
44
|
|
95
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
96
|
-
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
97
|
-
s.add_development_dependency(%q<bundler>, ["> 1.0.0"])
|
98
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
99
|
-
s.add_development_dependency(%q<activerecord>, ["= 3.2.8"])
|
100
|
-
s.add_development_dependency(%q<debugger>, [">= 0"])
|
101
|
-
else
|
102
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
103
|
-
s.add_dependency(%q<bundler>, ["> 1.0.0"])
|
104
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
105
|
-
s.add_dependency(%q<activerecord>, ["= 3.2.8"])
|
106
|
-
s.add_dependency(%q<debugger>, [">= 0"])
|
107
|
-
end
|
108
|
-
else
|
109
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
110
|
-
s.add_dependency(%q<bundler>, ["> 1.0.0"])
|
111
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
112
|
-
s.add_dependency(%q<activerecord>, ["= 3.2.8"])
|
113
|
-
s.add_dependency(%q<debugger>, [">= 0"])
|
114
|
-
end
|
115
45
|
end
|
116
46
|
|
data/test/helper.rb
CHANGED
data/test/test_exporter.rb
CHANGED
@@ -45,7 +45,7 @@ class Test_TaxonifiExports < Test::Unit::TestCase
|
|
45
45
|
nc.ref_collection = rc
|
46
46
|
|
47
47
|
assert_equal "Crickets (Grylloptera: Grylloidea) in Dominican amber.", nc.ref_collection.object_from_row(0).title
|
48
|
-
assert_equal "Crickets (Grylloptera: Grylloidea) in Dominican amber.", nc.ref_collection.object_from_row(nc.collection[43].
|
48
|
+
assert_equal "Crickets (Grylloptera: Grylloidea) in Dominican amber.", nc.ref_collection.object_from_row(nc.collection[43].properties[:link_to_ref_from_row]).title
|
49
49
|
|
50
50
|
e = Taxonifi::Export::SpeciesFile.new(:nc => nc, :authorized_user_id => 11 )
|
51
51
|
e.export
|
data/test/test_lumper_names.rb
CHANGED
@@ -98,18 +98,18 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
98
98
|
|
99
99
|
csv = CSV.parse(string, {headers: true})
|
100
100
|
nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
|
101
|
-
assert_equal 1, nc.collection[3].
|
102
|
-
assert_equal 'Smith', nc.collection[3].
|
101
|
+
assert_equal 1, nc.collection[3].authors.size
|
102
|
+
assert_equal 'Smith', nc.collection[3].authors.first.last_name
|
103
103
|
assert_equal 1854, nc.collection[3].year
|
104
104
|
|
105
105
|
# Name only applies to the "last" name in the order.
|
106
106
|
assert_equal nil, nc.collection[0].author
|
107
107
|
assert_equal nil, nc.collection[1].author
|
108
|
-
assert_equal 1, nc.collection[2].
|
108
|
+
assert_equal 1, nc.collection[2].authors.size
|
109
109
|
|
110
110
|
assert_equal nil, nc.collection[0].parens
|
111
|
-
assert_equal
|
112
|
-
assert_equal
|
111
|
+
assert_equal false, nc.collection[2].parens
|
112
|
+
assert_equal true, nc.collection[3].parens
|
113
113
|
end
|
114
114
|
|
115
115
|
def test_that_create_a_name_collection_handles_related_columns
|
@@ -124,9 +124,9 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
124
124
|
|
125
125
|
csv = CSV.parse(string, {headers: true})
|
126
126
|
nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
|
127
|
-
assert_equal nil, nc.collection[2].
|
128
|
-
assert_equal "1", nc.collection[2].
|
129
|
-
assert_equal 'one', nc.collection[2].
|
127
|
+
assert_equal nil, nc.collection[2].properties['foo']
|
128
|
+
assert_equal "1", nc.collection[2].properties['bar'] # !!! everything converted to String
|
129
|
+
assert_equal 'one', nc.collection[2].properties['Stuff']
|
130
130
|
end
|
131
131
|
|
132
132
|
def test_that_create_a_name_collection_handles_varieties
|
@@ -154,7 +154,7 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
154
154
|
assert_equal nc.collection[2], nc.collection[4].parent
|
155
155
|
assert_equal 'variety', nc.collection[4].rank
|
156
156
|
assert_equal 'varblorf', nc.collection[5].name
|
157
|
-
assert_equal 'Smith', nc.collection[6].
|
157
|
+
assert_equal 'Smith', nc.collection[6].authors.first.last_name
|
158
158
|
|
159
159
|
# assert_equal 1, nc.collection[3].author.size
|
160
160
|
|
data/test/test_lumper_refs.rb
CHANGED
@@ -96,10 +96,10 @@ class Test_TaxonifiLumperRefs < Test::Unit::TestCase
|
|
96
96
|
csv = CSV.parse(csv_string, {headers: true})
|
97
97
|
rc = Taxonifi::Lumper.create_ref_collection(:csv => csv)
|
98
98
|
|
99
|
-
assert_equal "foo value", rc.collection.first.
|
100
|
-
assert_equal nil, rc.collection.last.
|
101
|
-
assert_equal '1', rc.collection.first.
|
102
|
-
assert_equal 'bar value', rc.collection.last.
|
99
|
+
assert_equal "foo value", rc.collection.first.properties['foo']
|
100
|
+
assert_equal nil, rc.collection.last.properties['foo']
|
101
|
+
assert_equal '1', rc.collection.first.properties['bar']
|
102
|
+
assert_equal 'bar value', rc.collection.last.properties['bar']
|
103
103
|
end
|
104
104
|
|
105
105
|
end
|