taxonifi 0.2.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +59 -0
- data/.travis.yml +11 -0
- data/Gemfile +5 -17
- data/Gemfile.lock +22 -40
- data/README.md +192 -0
- data/Rakefile +35 -26
- data/lib/export/format/base.rb +1 -1
- data/lib/export/format/species_file.rb +154 -152
- data/lib/lumper/clump.rb +1 -1
- data/lib/lumper/lumper.rb +22 -18
- data/lib/lumper/lumps/parent_child_name_collection.rb +1 -2
- data/lib/lumper/name_index.rb +21 -0
- data/lib/{models → model}/author_year.rb +2 -2
- data/lib/{models → model}/base.rb +35 -5
- data/lib/{models → model}/collection.rb +8 -1
- data/lib/{models → model}/name.rb +128 -36
- data/lib/{models → model}/name_collection.rb +134 -33
- data/lib/{models → model}/person.rb +1 -1
- data/lib/{models → model}/ref.rb +4 -2
- data/lib/model/ref_collection.rb +171 -0
- data/lib/{models → model}/species_name.rb +24 -3
- data/lib/splitter/builder.rb +1 -1
- data/lib/splitter/parser.rb +5 -0
- data/lib/splitter/tokens.rb +54 -9
- data/lib/taxonifi/version.rb +3 -0
- data/lib/taxonifi.rb +5 -9
- data/taxonifi.gemspec +29 -99
- data/test/helper.rb +1 -1
- data/test/test_exporter.rb +1 -1
- data/test/test_lumper_names.rb +9 -9
- data/test/test_lumper_refs.rb +4 -4
- data/test/test_parser.rb +97 -26
- data/test/test_splitter_tokens.rb +25 -4
- data/test/test_taxonifi_base.rb +1 -1
- data/test/test_taxonifi_geog.rb +1 -1
- data/test/test_taxonifi_name.rb +13 -14
- data/test/test_taxonifi_name_collection.rb +11 -5
- data/test/test_taxonifi_ref.rb +1 -1
- data/test/test_taxonifi_ref_collection.rb +40 -3
- data/test/test_taxonifi_species_name.rb +51 -1
- data/travis/before_install.sh +2 -0
- metadata +96 -66
- data/README.rdoc +0 -154
- data/VERSION +0 -1
- data/lib/models/ref_collection.rb +0 -107
- /data/lib/{models → model}/generic_object.rb +0 -0
- /data/lib/{models → model}/geog.rb +0 -0
- /data/lib/{models → model}/geog_collection.rb +0 -0
- /data/lib/{models → model}/shared_class_methods.rb +0 -0
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: taxonifi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.2
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- mjy
|
@@ -12,101 +11,106 @@ cert_chain: []
|
|
12
11
|
date: 2013-03-27 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
14
|
+
name: rake
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version: '
|
22
|
-
type: :
|
19
|
+
version: '10.4'
|
20
|
+
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
|
-
version: '
|
26
|
+
version: '10.4'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: bundler
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.9'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.9'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: awesome_print
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
36
46
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.
|
47
|
+
version: '1.6'
|
38
48
|
type: :development
|
39
49
|
prerelease: false
|
40
50
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
51
|
requirements:
|
43
|
-
- -
|
52
|
+
- - "~>"
|
44
53
|
- !ruby/object:Gem::Version
|
45
|
-
version: 1.
|
54
|
+
version: '1.6'
|
46
55
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
56
|
+
name: did_you_mean
|
48
57
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
58
|
requirements:
|
51
|
-
- - ~>
|
59
|
+
- - "~>"
|
52
60
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
61
|
+
version: '0.9'
|
54
62
|
type: :development
|
55
63
|
prerelease: false
|
56
64
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
65
|
requirements:
|
59
|
-
- - ~>
|
66
|
+
- - "~>"
|
60
67
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
68
|
+
version: '0.9'
|
62
69
|
- !ruby/object:Gem::Dependency
|
63
|
-
name:
|
70
|
+
name: byebug
|
64
71
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
72
|
requirements:
|
67
|
-
- -
|
73
|
+
- - "~>"
|
68
74
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
75
|
+
version: '4.0'
|
70
76
|
type: :development
|
71
77
|
prerelease: false
|
72
78
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
79
|
requirements:
|
75
|
-
- -
|
80
|
+
- - "~>"
|
76
81
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
82
|
+
version: '4.0'
|
78
83
|
- !ruby/object:Gem::Dependency
|
79
|
-
name:
|
84
|
+
name: builder
|
80
85
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
86
|
requirements:
|
83
|
-
- -
|
87
|
+
- - "~>"
|
84
88
|
- !ruby/object:Gem::Version
|
85
|
-
version: '
|
89
|
+
version: '3.2'
|
86
90
|
type: :development
|
87
91
|
prerelease: false
|
88
92
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
93
|
requirements:
|
91
|
-
- -
|
94
|
+
- - "~>"
|
92
95
|
- !ruby/object:Gem::Version
|
93
|
-
version: '
|
96
|
+
version: '3.2'
|
94
97
|
description: Taxonifi contains simple models and utilties of use in for parsing lists
|
95
|
-
of taxonomic name (life) related metadata
|
98
|
+
of taxonomic name (life) related metadata or other heirarchically defined data.
|
96
99
|
email: diapriid@gmail.com
|
97
100
|
executables: []
|
98
101
|
extensions: []
|
99
102
|
extra_rdoc_files:
|
100
103
|
- LICENSE.txt
|
101
|
-
- README.
|
104
|
+
- README.md
|
102
105
|
files:
|
103
|
-
- .document
|
106
|
+
- ".document"
|
107
|
+
- ".gitignore"
|
108
|
+
- ".travis.yml"
|
104
109
|
- Gemfile
|
105
110
|
- Gemfile.lock
|
106
111
|
- LICENSE.txt
|
107
|
-
- README.
|
112
|
+
- README.md
|
108
113
|
- Rakefile
|
109
|
-
- VERSION
|
110
114
|
- lib/assessor/assessor.rb
|
111
115
|
- lib/assessor/base.rb
|
112
116
|
- lib/assessor/row_assessor.rb
|
@@ -118,25 +122,27 @@ files:
|
|
118
122
|
- lib/lumper/clump.rb
|
119
123
|
- lib/lumper/lumper.rb
|
120
124
|
- lib/lumper/lumps/parent_child_name_collection.rb
|
121
|
-
- lib/
|
122
|
-
- lib/
|
123
|
-
- lib/
|
124
|
-
- lib/
|
125
|
-
- lib/
|
126
|
-
- lib/
|
127
|
-
- lib/
|
128
|
-
- lib/
|
129
|
-
- lib/
|
130
|
-
- lib/
|
131
|
-
- lib/
|
132
|
-
- lib/
|
133
|
-
- lib/
|
125
|
+
- lib/lumper/name_index.rb
|
126
|
+
- lib/model/author_year.rb
|
127
|
+
- lib/model/base.rb
|
128
|
+
- lib/model/collection.rb
|
129
|
+
- lib/model/generic_object.rb
|
130
|
+
- lib/model/geog.rb
|
131
|
+
- lib/model/geog_collection.rb
|
132
|
+
- lib/model/name.rb
|
133
|
+
- lib/model/name_collection.rb
|
134
|
+
- lib/model/person.rb
|
135
|
+
- lib/model/ref.rb
|
136
|
+
- lib/model/ref_collection.rb
|
137
|
+
- lib/model/shared_class_methods.rb
|
138
|
+
- lib/model/species_name.rb
|
134
139
|
- lib/splitter/builder.rb
|
135
140
|
- lib/splitter/lexer.rb
|
136
141
|
- lib/splitter/parser.rb
|
137
142
|
- lib/splitter/splitter.rb
|
138
143
|
- lib/splitter/tokens.rb
|
139
144
|
- lib/taxonifi.rb
|
145
|
+
- lib/taxonifi/version.rb
|
140
146
|
- lib/utils/array.rb
|
141
147
|
- lib/utils/hash.rb
|
142
148
|
- taxonifi.gemspec
|
@@ -165,32 +171,56 @@ files:
|
|
165
171
|
- test/test_taxonifi_ref.rb
|
166
172
|
- test/test_taxonifi_ref_collection.rb
|
167
173
|
- test/test_taxonifi_species_name.rb
|
174
|
+
- travis/before_install.sh
|
168
175
|
homepage: http://github.com/SpeciesFile/taxonifi
|
169
176
|
licenses:
|
170
177
|
- MIT
|
178
|
+
metadata: {}
|
171
179
|
post_install_message:
|
172
180
|
rdoc_options: []
|
173
181
|
require_paths:
|
174
182
|
- lib
|
175
183
|
required_ruby_version: !ruby/object:Gem::Requirement
|
176
|
-
none: false
|
177
184
|
requirements:
|
178
|
-
- -
|
185
|
+
- - ">="
|
179
186
|
- !ruby/object:Gem::Version
|
180
187
|
version: '0'
|
181
|
-
segments:
|
182
|
-
- 0
|
183
|
-
hash: -2473283969605789743
|
184
188
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
185
|
-
none: false
|
186
189
|
requirements:
|
187
|
-
- -
|
190
|
+
- - ">="
|
188
191
|
- !ruby/object:Gem::Version
|
189
192
|
version: '0'
|
190
193
|
requirements: []
|
191
194
|
rubyforge_project:
|
192
|
-
rubygems_version:
|
195
|
+
rubygems_version: 2.4.5
|
193
196
|
signing_key:
|
194
|
-
specification_version:
|
195
|
-
summary: A general purpose framework for scripted handling of taxonomic names
|
196
|
-
|
197
|
+
specification_version: 4
|
198
|
+
summary: A general purpose framework for scripted handling of taxonomic names or other
|
199
|
+
heirarchical metadata.
|
200
|
+
test_files:
|
201
|
+
- test/file_fixtures/Fossil.csv
|
202
|
+
- test/file_fixtures/Lygaeoidea.csv
|
203
|
+
- test/file_fixtures/names.csv
|
204
|
+
- test/helper.rb
|
205
|
+
- test/test_export_prolog.rb
|
206
|
+
- test/test_exporter.rb
|
207
|
+
- test/test_lumper_clump.rb
|
208
|
+
- test/test_lumper_geogs.rb
|
209
|
+
- test/test_lumper_hierarchical_collection.rb
|
210
|
+
- test/test_lumper_names.rb
|
211
|
+
- test/test_lumper_parent_child_name_collection.rb
|
212
|
+
- test/test_lumper_refs.rb
|
213
|
+
- test/test_obo_nomenclature.rb
|
214
|
+
- test/test_parser.rb
|
215
|
+
- test/test_splitter.rb
|
216
|
+
- test/test_splitter_tokens.rb
|
217
|
+
- test/test_taxonifi.rb
|
218
|
+
- test/test_taxonifi_accessor.rb
|
219
|
+
- test/test_taxonifi_base.rb
|
220
|
+
- test/test_taxonifi_geog.rb
|
221
|
+
- test/test_taxonifi_name.rb
|
222
|
+
- test/test_taxonifi_name_collection.rb
|
223
|
+
- test/test_taxonifi_ref.rb
|
224
|
+
- test/test_taxonifi_ref_collection.rb
|
225
|
+
- test/test_taxonifi_species_name.rb
|
226
|
+
has_rdoc:
|
data/README.rdoc
DELETED
@@ -1,154 +0,0 @@
|
|
1
|
-
= taxonifi
|
2
|
-
There will always be "legacy" taxonomic data that needs shuffling around. The taxonifi gem is a suite of general purpose tools that act as a middle layer for data-conversion purposes (e.g. migrating legacy taxonomic databases). It's first application was to convert DwC-style data downloaded from EoL into a Species File. The code is well documented in unit tests, poke around to see if it might be useful. In particular, if you've considered building a collection of regular expressions particular to biodiversity data look at the Tokens code and related tests.
|
3
|
-
|
4
|
-
Overall, the goal is to provide well documented (and unit-tested) coded that is broadly useful, and vanilla enough to encourage other to fork and hack on their own.
|
5
|
-
|
6
|
-
== Source
|
7
|
-
Source is available at https://github.com/SpeciesFile/taxonifi . The rdoc API is also viewable at http://taxonifi.speciesfile.org , (though those docs may lag behind commits to github).
|
8
|
-
|
9
|
-
== What's next?
|
10
|
-
|
11
|
-
Before you jump on board you should also check out similar code from the Global Names team at https://github.com/GlobalNamesArchitecture. Future integration and merging of shared functionality is planned. Code will be released in an "early-and-often" approach.
|
12
|
-
|
13
|
-
Taxonifi is presently coded for convience, not speed (though it's not necessarily slow). It assumes that conversion processes are typically one-offs that can afford to run over a longer period of time (read minutes rather than seconds). Reading, and fully parsing into objects, around 25k rows of nomenclature (class to species, inc. author year, = ~45k names) in to memory as Taxonifi objects benchmarks at around 2 minutes. Faster indexing is planned as needed, likely using Redis (see GNA link above).
|
14
|
-
|
15
|
-
= Getting started
|
16
|
-
taxonifi is coded for Ruby 1.9.3, it has not been tested on earlier versions (though it will certainly not work with 1.8.7).
|
17
|
-
Using Ruby Version Manager (RVM, https://rvm.io/ ) is highly recommend. You can test your version of Ruby by doinging "ruby -v" in your terminal.
|
18
|
-
|
19
|
-
To install:
|
20
|
-
|
21
|
-
gem install taxonifi
|
22
|
-
|
23
|
-
In your script
|
24
|
-
|
25
|
-
require 'taxonifi'
|
26
|
-
|
27
|
-
|
28
|
-
= Use
|
29
|
-
== Quick start
|
30
|
-
|
31
|
-
Write some code:
|
32
|
-
|
33
|
-
require 'taxonifi'
|
34
|
-
|
35
|
-
headers = ["a", "B", "c"]
|
36
|
-
csv_string = CSV.generate() do |csv|
|
37
|
-
csv << @headers
|
38
|
-
csv << %w{a b c}
|
39
|
-
end
|
40
|
-
|
41
|
-
csv = CSV.parse(csv_string, {headers: true, :header_converters :downcase})
|
42
|
-
|
43
|
-
# Taxonifi can create generic hierachical collections based on column headers
|
44
|
-
c = Taxonifi::Lumper.create_hierarchical_collection(csv, %w{a b c}) # => a Taxonifi::Model::Collection
|
45
|
-
c.collection.first # => Taxonifi::Model::GenericObject
|
46
|
-
c.collection.first.name # => "a"
|
47
|
-
c.collection.last.name # => "c"
|
48
|
-
c.collection.last.parent.name # => "b"
|
49
|
-
c.collection.first.row_number # => 0
|
50
|
-
c.collection.first.rank # => "a"
|
51
|
-
|
52
|
-
# Header order is important:
|
53
|
-
c = Taxonifi::Lumper.create_hierarchical_collection(csv, %w{c a b}) # => a Taxonifi::Model::Collection
|
54
|
-
c.collection.first.name # => "c"
|
55
|
-
c.collection.last.rank # => "c"
|
56
|
-
c.collection.last.name # => "b"
|
57
|
-
c.collection.last.parent.name # => "a"
|
58
|
-
|
59
|
-
# Collections of GenericObjects (and some other Taxonifi::Collection based objects like TaxonifiNameCollection) only include
|
60
|
-
# unique names, i.e. if a name has a shared parent lineage only the name itself is created, not its parents.
|
61
|
-
# For example, for:
|
62
|
-
# a b c
|
63
|
-
# a d nil
|
64
|
-
# b nil d
|
65
|
-
# The collection consists of objects with names a,b,c,d,b,d respectively.
|
66
|
-
# This makes it very useful for handling not only nomenclatural but other nested data as well.
|
67
|
-
|
68
|
-
There are collections of specific types (e.g. taxonomic names, geographic names):
|
69
|
-
|
70
|
-
string = CSV.generate() do |csv|
|
71
|
-
csv << %w{family genus species author_year}
|
72
|
-
csv << ["Fooidae", "Foo", "bar", "Smith, 1854"]
|
73
|
-
csv << ["Fooidae", "Foo", "foo", "(Smith, 1854)"]
|
74
|
-
end
|
75
|
-
|
76
|
-
csv = CSV.parse(string, {headers: true})
|
77
|
-
|
78
|
-
nc = Taxonifi::Lumper.create_name_collection(:csv => csv) # => Taxonifi::Model::NameCollection
|
79
|
-
|
80
|
-
nc.collection.first # => Taxonifi::Model::Name
|
81
|
-
nc.collection.first.name # => "Fooidae"
|
82
|
-
nc.collection.first.rank # => "family"
|
83
|
-
nc.collection.first.year # => nil
|
84
|
-
nc.collection.first.author # => []
|
85
|
-
nc.collection.last.rank # => "species"
|
86
|
-
nc.collection.last.name # => "foo"
|
87
|
-
nc.collection.last.author.first.last_name # => "Smith"
|
88
|
-
nc.collection.last.year # => "1854"
|
89
|
-
|
90
|
-
Parent/child style nomenclature is also parseable.
|
91
|
-
|
92
|
-
There are *lots* more examples of code use in the test suite.
|
93
|
-
|
94
|
-
== Export/conversion
|
95
|
-
|
96
|
-
The following is an example that translates a DwC style input format as exported by EOL into tables importable to SpeciesFile. The input file is has id, parent, child, vernacular, synonym columns. Data are exported by default to a the users home folder in a taxonifi directory. The export creates 6 tables that can be imported into Species File directly.
|
97
|
-
|
98
|
-
require 'taxonifi'
|
99
|
-
file = File.expand_path(File.join(File.dirname(__FILE__), 'file_fixtures/Lygaeoidea-csv.tsv'))
|
100
|
-
|
101
|
-
csv = CSV.read(file, {
|
102
|
-
headers: true,
|
103
|
-
col_sep: "\t",
|
104
|
-
header_converters: :downcase
|
105
|
-
} )
|
106
|
-
|
107
|
-
nc = Taxonifi::Lumper::Lumps::ParentChildNameCollection.name_collection(csv)
|
108
|
-
e = Taxonifi::Export::SpeciesFile.new(:nc => nc, :authorized_user_id => 1)
|
109
|
-
e.export
|
110
|
-
|
111
|
-
You should be able to relativley quickly use the export framework to code new output formats.
|
112
|
-
|
113
|
-
== Reading files
|
114
|
-
|
115
|
-
taxonifi feeds on Ruby's CSV. read your files with header true, and downcased, e.g.:
|
116
|
-
|
117
|
-
csv = CSV.read('input/my_data.tab', {
|
118
|
-
headers: true,
|
119
|
-
header_converters: :downcase,
|
120
|
-
col_sep: "\t" } )
|
121
|
-
|
122
|
-
== Code organization
|
123
|
-
|
124
|
-
test # unit tests, quite a few of them
|
125
|
-
lib # the main libraries
|
126
|
-
lib/assessor # libraries to assess the properties of incoming data
|
127
|
-
lib/export # export wrappers
|
128
|
-
lib/export/format # one module for each export type
|
129
|
-
lumper # code that builds Taxonifi objects
|
130
|
-
models # Taxonifi objects
|
131
|
-
splitter # a parser/lexer/token suite for breaking down data
|
132
|
-
|
133
|
-
= Contributing to taxonifi
|
134
|
-
|
135
|
-
(this is generic)
|
136
|
-
|
137
|
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
138
|
-
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
139
|
-
* Fork the project.
|
140
|
-
* Start a feature/bugfix branch.
|
141
|
-
* Commit and push until you are happy with your contribution.
|
142
|
-
* Write unit test for your code. Changes are good, just as long as tests run clean.
|
143
|
-
* All pull requests should test clean.
|
144
|
-
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
145
|
-
|
146
|
-
= About
|
147
|
-
|
148
|
-
taxonifi is coded by Matt Yoder in consultation with the Species File Group at University of Illinois.
|
149
|
-
|
150
|
-
= Copyright
|
151
|
-
|
152
|
-
Copyright (c) 2012 Illinois Natural History Survey. See LICENSE.txt for
|
153
|
-
further details.
|
154
|
-
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.2.0
|
@@ -1,107 +0,0 @@
|
|
1
|
-
module Taxonifi
|
2
|
-
class RefCollectionError < StandardError; end
|
3
|
-
|
4
|
-
module Model
|
5
|
-
|
6
|
-
# A collection of references.
|
7
|
-
class RefCollection < Taxonifi::Model::Collection
|
8
|
-
|
9
|
-
# An options index when there is one reference per row.
|
10
|
-
attr_accessor :row_index
|
11
|
-
|
12
|
-
# Points a Ref#id to an array of Person#ids.
|
13
|
-
# Built on request.
|
14
|
-
attr_accessor :author_index
|
15
|
-
|
16
|
-
def initialize(options = {})
|
17
|
-
super
|
18
|
-
@row_index = []
|
19
|
-
@author_index = {}
|
20
|
-
true
|
21
|
-
end
|
22
|
-
|
23
|
-
# The instance collection class.
|
24
|
-
def object_class
|
25
|
-
Taxonifi::Model::Ref
|
26
|
-
end
|
27
|
-
|
28
|
-
# The object at a given row.
|
29
|
-
# TODO: inherit from Collection?
|
30
|
-
def object_from_row(row_number)
|
31
|
-
return nil if row_number.nil?
|
32
|
-
@row_index[row_number]
|
33
|
-
end
|
34
|
-
|
35
|
-
# Incrementally (re-)assigns the id of every associated author (Person)
|
36
|
-
# This is only useful if you assume every author is unique.
|
37
|
-
def enumerate_authors(initial_id = 0)
|
38
|
-
i = initial_id
|
39
|
-
collection.each do |r|
|
40
|
-
r.authors.each do |a|
|
41
|
-
a.id = i
|
42
|
-
i += 1
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
# Finds unique authors, and combines them, then
|
48
|
-
# rebuilds author lists using references to the new unique set.
|
49
|
-
def uniquify_authors(initial_id = 0)
|
50
|
-
auth_index = {}
|
51
|
-
unique_authors.each_with_index do |a, i|
|
52
|
-
a.id = i + initial_id
|
53
|
-
auth_index.merge!(a.compact_string => a)
|
54
|
-
end
|
55
|
-
|
56
|
-
collection.each do |r|
|
57
|
-
new_authors = []
|
58
|
-
r.authors.inject(new_authors){|ary, a| ary.push(auth_index[a.compact_string])}
|
59
|
-
r.authors = new_authors
|
60
|
-
end
|
61
|
-
true
|
62
|
-
end
|
63
|
-
|
64
|
-
# Build the author index.
|
65
|
-
# {Ref#id => [a1#id, ... an#id]}
|
66
|
-
def build_author_index
|
67
|
-
collection.each do |r|
|
68
|
-
@author_index.merge!(r.id => r.authors.collect{|a| a.id ? a.id : -1})
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
# Return an array the unique author strings in this collection.
|
73
|
-
def unique_author_strings
|
74
|
-
auths = {}
|
75
|
-
collection.each do |r|
|
76
|
-
r.authors.each do |a|
|
77
|
-
auths.merge!(a.display_name => nil)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
auths.keys.sort
|
81
|
-
end
|
82
|
-
|
83
|
-
# Returns Array of Taxonifi::Model::Person
|
84
|
-
# Will need better indexing on big lists?
|
85
|
-
def unique_authors
|
86
|
-
auths = []
|
87
|
-
collection.each do |r|
|
88
|
-
r.authors.each do |a|
|
89
|
-
found = false
|
90
|
-
auths.each do |x|
|
91
|
-
if a.identical?(x)
|
92
|
-
found = true
|
93
|
-
next
|
94
|
-
end
|
95
|
-
end
|
96
|
-
if not found
|
97
|
-
auths.push a.clone
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
auths
|
102
|
-
end
|
103
|
-
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|