taxonifi 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +155 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/lib/assessor/assessor.rb +31 -0
- data/lib/assessor/base.rb +17 -0
- data/lib/assessor/row_assessor.rb +131 -0
- data/lib/export/export.rb +9 -0
- data/lib/export/format/base.rb +43 -0
- data/lib/export/format/species_file.rb +341 -0
- data/lib/lumper/lumper.rb +334 -0
- data/lib/lumper/lumps/parent_child_name_collection.rb +84 -0
- data/lib/models/author_year.rb +39 -0
- data/lib/models/base.rb +73 -0
- data/lib/models/collection.rb +92 -0
- data/lib/models/generic_object.rb +15 -0
- data/lib/models/geog.rb +59 -0
- data/lib/models/geog_collection.rb +28 -0
- data/lib/models/name.rb +206 -0
- data/lib/models/name_collection.rb +149 -0
- data/lib/models/person.rb +49 -0
- data/lib/models/ref.rb +85 -0
- data/lib/models/ref_collection.rb +106 -0
- data/lib/models/species_name.rb +85 -0
- data/lib/splitter/builder.rb +26 -0
- data/lib/splitter/lexer.rb +70 -0
- data/lib/splitter/parser.rb +54 -0
- data/lib/splitter/splitter.rb +45 -0
- data/lib/splitter/tokens.rb +322 -0
- data/lib/taxonifi.rb +36 -0
- data/test/file_fixtures/Lygaeoidea.csv +801 -0
- data/test/helper.rb +38 -0
- data/test/test_exporter.rb +32 -0
- data/test/test_lumper_geogs.rb +59 -0
- data/test/test_lumper_hierarchical_collection.rb +88 -0
- data/test/test_lumper_names.rb +119 -0
- data/test/test_lumper_parent_child_name_collection.rb +41 -0
- data/test/test_lumper_refs.rb +91 -0
- data/test/test_parser.rb +34 -0
- data/test/test_splitter.rb +27 -0
- data/test/test_splitter_tokens.rb +403 -0
- data/test/test_taxonifi.rb +11 -0
- data/test/test_taxonifi_accessor.rb +61 -0
- data/test/test_taxonifi_geog.rb +51 -0
- data/test/test_taxonifi_name.rb +186 -0
- data/test/test_taxonifi_name_collection.rb +158 -0
- data/test/test_taxonifi_ref.rb +90 -0
- data/test/test_taxonifi_ref_collection.rb +69 -0
- data/test/test_taxonifi_species_name.rb +95 -0
- metadata +167 -0
data/test/helper.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
require 'debugger'
|
4
|
+
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
#require 'shoulda'
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
17
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
18
|
+
|
19
|
+
class Test::Unit::TestCase
|
20
|
+
end
|
21
|
+
|
22
|
+
# TODO: rename to reflect format
|
23
|
+
def generic_csv_with_names
|
24
|
+
@headers = %W{identifier parent child rank synonyms}
|
25
|
+
@csv_string = CSV.generate() do |csv|
|
26
|
+
csv << @headers
|
27
|
+
csv << [0, nil, "Root", "class", nil ]
|
28
|
+
csv << [1, "0", "Aidae", "Family", nil ]
|
29
|
+
csv << [2, "0", "Bidae", "Family", nil ]
|
30
|
+
csv << [3, "1", "Foo", "Genus", nil ]
|
31
|
+
csv << [4, "3", "Foo bar", "species", nil ] # case testing
|
32
|
+
csv << [5, "4", "Foo bar bar", "species", nil ]
|
33
|
+
csv << [6, "3", "Foo bar stuff (Guy, 1921)", "species", "Foo bar blorf (Guy, 1921)"] # initial subspecies rank data had rank blank, assuming they will be called species
|
34
|
+
end
|
35
|
+
|
36
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
37
|
+
end
|
38
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/export/export'))
|
3
|
+
|
4
|
+
class Test_TaxonifiExports < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_that_new_generic_export_can_be_instantiated
|
7
|
+
assert Taxonifi::Export::Base.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def dont_test_that_species_file_export_does_stuff
|
11
|
+
csv = generic_csv_with_names
|
12
|
+
nc = Taxonifi::Lumper::Lumps::EolNameCollection.name_collection(csv)
|
13
|
+
e = Taxonifi::Export::SpeciesFile.new(:nc => nc, :authorized_user_id => 15)
|
14
|
+
assert foo = e.export
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_big_file
|
18
|
+
file = File.expand_path(File.join(File.dirname(__FILE__), 'file_fixtures/Lygaeoidea.csv'))
|
19
|
+
|
20
|
+
csv = CSV.read(file, {
|
21
|
+
headers: true,
|
22
|
+
col_sep: ",",
|
23
|
+
header_converters: :downcase
|
24
|
+
} )
|
25
|
+
|
26
|
+
nc = Taxonifi::Lumper::Lumps::ParentChildNameCollection.name_collection(csv)
|
27
|
+
e = Taxonifi::Export::SpeciesFile.new(:nc => nc, :authorized_user_id => 15)
|
28
|
+
assert foo = e.export
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
|
3
|
+
# Builder construction
|
4
|
+
|
5
|
+
class Test_TaxonifiLumperGeogs < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@headers = ["country", "state", "county"]
|
9
|
+
@csv_string = CSV.generate() do |csv|
|
10
|
+
csv << @headers
|
11
|
+
csv << ["Canada", "", nil]
|
12
|
+
csv << ["Canada", "Saskatchewan", nil]
|
13
|
+
csv << ["USA", "Texas", nil]
|
14
|
+
csv << ["USA", "Texas", "Brazos"]
|
15
|
+
csv << ["Utopia", nil, "Wonderland"]
|
16
|
+
end
|
17
|
+
|
18
|
+
# The row_index looks like this:
|
19
|
+
# 0
|
20
|
+
# 0 1
|
21
|
+
# 2 3
|
22
|
+
# 2 3 4
|
23
|
+
# 5 6
|
24
|
+
#
|
25
|
+
# The name_index looks like
|
26
|
+
# {:country => {"Canada" => 0, "USA" => 2, "Utopia" => 5} ... etc.
|
27
|
+
|
28
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
29
|
+
end
|
30
|
+
|
31
|
+
def _create_a_collection
|
32
|
+
@gc = Taxonifi::Lumper.create_geog_collection(@csv)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_available_lumps
|
36
|
+
assert_equal [:basic_geog], Taxonifi::Lumper.available_lumps(@csv.headers)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_that_create_geog_collection_creates_a_geog_collection
|
40
|
+
gc = Taxonifi::Lumper.create_geog_collection(@csv)
|
41
|
+
assert_equal Taxonifi::Model::GeogCollection, gc.class
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_that_create_geog_collection_instantiates_geogs
|
45
|
+
_create_a_collection
|
46
|
+
assert_equal 7, @gc.collection.size
|
47
|
+
assert_equal "Canada", @gc.collection.first.name
|
48
|
+
assert_equal "Wonderland", @gc.collection.last.name
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_that_create_geog_collection_assigns_parenthood
|
52
|
+
_create_a_collection
|
53
|
+
assert_equal 0, @gc.collection[1].parent.id
|
54
|
+
assert_equal 5, @gc.collection[6].parent.id
|
55
|
+
assert_equal 3, @gc.collection[4].parent.id
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/lumper/lumper'))
|
3
|
+
|
4
|
+
# Builder construction
|
5
|
+
|
6
|
+
class Test_TaxonifiLumperHierarchicalCollection < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@headers = ["a", "b", "c"]
|
10
|
+
@csv_string = CSV.generate() do |csv|
|
11
|
+
csv << @headers
|
12
|
+
csv << %w{a b c}
|
13
|
+
end
|
14
|
+
|
15
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_that_create_hierarchical_collection_creates_collection
|
19
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{a b c}, )
|
20
|
+
assert_equal Taxonifi::Model::Collection, c.class
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_that_a_hierarchical_collection_instantiates_generic_objects
|
24
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{a b c})
|
25
|
+
assert_equal Taxonifi::Model::GenericObject, c.collection.first.class
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_that_collection_store_names
|
29
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{a b c})
|
30
|
+
assert_equal "a", c.collection.first.name
|
31
|
+
assert_equal "b", c.collection[1].name
|
32
|
+
assert_equal "c", c.collection[2].name
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_that_header_order_is_applied
|
36
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{c a b})
|
37
|
+
assert_equal "c", c.collection.first.name
|
38
|
+
assert_equal "a", c.collection[1].name
|
39
|
+
assert_equal "b", c.collection[2].name
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_that_parent_objects_are_assigned
|
43
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{a b c})
|
44
|
+
assert_equal nil, c.collection.first.parent
|
45
|
+
assert_equal "a", c.collection[1].parent.name
|
46
|
+
assert_equal "b", c.collection[2].parent.name
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_that_parents_are_assigned_across_blank_columns
|
50
|
+
csv_string = CSV.generate() do |csv|
|
51
|
+
csv << @headers
|
52
|
+
csv << ["a", nil, "c"]
|
53
|
+
end
|
54
|
+
csv = CSV.parse(csv_string, {headers: true})
|
55
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(csv, %w{a b c})
|
56
|
+
assert_equal nil, c.collection.first.parent
|
57
|
+
assert_equal "a", c.collection[1].parent.name
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_that_names_at_rank_are_synonymous_when_parents_are_identical
|
61
|
+
csv_string = CSV.generate() do |csv|
|
62
|
+
csv << @headers
|
63
|
+
csv << ["a", "b", "c"]
|
64
|
+
csv << ["a", "b", "d"]
|
65
|
+
csv << ["e", "b", "f"]
|
66
|
+
end
|
67
|
+
csv = CSV.parse(csv_string, {headers: true})
|
68
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(csv, %w{a b c})
|
69
|
+
assert_equal %w{a b c d e b f}, c.collection.collect{|o| o.name}
|
70
|
+
assert_equal 7, c.collection.size
|
71
|
+
end
|
72
|
+
|
73
|
+
# def test_that_create_geog_collection_instantiates_geogs
|
74
|
+
# _create_a_collection
|
75
|
+
# assert_equal 7, @gc.collection.size
|
76
|
+
# assert_equal "Canada", @gc.collection.first.name
|
77
|
+
# assert_equal "Wonderland", @gc.collection.last.name
|
78
|
+
# end
|
79
|
+
|
80
|
+
# def test_that_create_geog_collection_assigns_parenthood
|
81
|
+
# _create_a_collection
|
82
|
+
# assert_equal 0, @gc.collection[1].parent.id
|
83
|
+
# assert_equal 5, @gc.collection[6].parent.id
|
84
|
+
# assert_equal 3, @gc.collection[4].parent.id
|
85
|
+
# end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/lumper/lumper'))
|
3
|
+
|
4
|
+
# Builder construction
|
5
|
+
|
6
|
+
class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@headers = ["family", "genus", "species", "author", "year"]
|
10
|
+
@csv_string = CSV.generate() do |csv|
|
11
|
+
csv << @headers
|
12
|
+
csv << ["Fooidae", "Foo", "bar", "Smith", "1854"]
|
13
|
+
end
|
14
|
+
|
15
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_that_setup_setups
|
19
|
+
assert_equal @headers, @csv.headers
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_available_lumps_raise_without_arrays
|
23
|
+
assert_raises Taxonifi::Lumper::LumperError do
|
24
|
+
Taxonifi::Lumper.available_lumps( "foo" )
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_available_lumps
|
29
|
+
assert Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::QUAD ).include?(:quadrinomial)
|
30
|
+
assert Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:quad_author_year)
|
31
|
+
assert (not Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:names) )
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_create_name_collection_creates_a_name_collection
|
35
|
+
assert_equal Taxonifi::Model::NameCollection, Taxonifi::Lumper.create_name_collection(@csv).class
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_that_create_name_collection_raises_when_fed_non_csv
|
39
|
+
assert_raises Taxonifi::Lumper::LumperError do
|
40
|
+
Taxonifi::Lumper.create_name_collection("FOO")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_that_create_name_collection_populates_a_name_collection
|
45
|
+
nc = Taxonifi::Lumper.create_name_collection(@csv)
|
46
|
+
assert_equal 3, nc.collection.size
|
47
|
+
assert_equal ["Fooidae", "Foo", "bar"], nc.collection.collect{|n| n.name}
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_that_create_name_collection_assigns_row_number
|
51
|
+
nc = Taxonifi::Lumper.create_name_collection(@csv)
|
52
|
+
assert_equal 0, nc.collection.first.row_number
|
53
|
+
assert_equal 0, nc.collection.last.row_number
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def test_that_create_name_collection_parentifies
|
58
|
+
nc = Taxonifi::Lumper.create_name_collection(@csv)
|
59
|
+
assert_equal nc.collection[0], nc.collection[1].parent
|
60
|
+
assert_equal nc.collection[1], nc.collection[2].parent
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_that_create_a_name_collection_handles_homonomy
|
64
|
+
string = CSV.generate() do |csv|
|
65
|
+
csv << @headers
|
66
|
+
csv << ["Fooidae", "Foo", "bar", "Smith", "1854"]
|
67
|
+
csv << ["Blorf", "Foo", "bar", "Smith", "1854"]
|
68
|
+
csv << ["Fooidae", "Bar", "bar", "Smith", "1854"]
|
69
|
+
end
|
70
|
+
|
71
|
+
# The index should break down like this
|
72
|
+
# 0 2 5
|
73
|
+
# 1 3 6
|
74
|
+
# 0 4 7
|
75
|
+
|
76
|
+
csv = CSV.parse(string, {headers: true})
|
77
|
+
nc = Taxonifi::Lumper.create_name_collection(csv)
|
78
|
+
|
79
|
+
assert_equal nc.collection[2], nc.collection[5].parent
|
80
|
+
assert_equal nc.collection[0], nc.collection[2].parent
|
81
|
+
assert_equal nc.collection[1], nc.collection[3].parent
|
82
|
+
assert_equal nc.collection[3], nc.collection[6].parent
|
83
|
+
assert_equal nc.collection[0], nc.collection[4].parent
|
84
|
+
assert_equal nc.collection[4], nc.collection[7].parent
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def test_that_create_a_name_collection_handles_author_year
|
89
|
+
string = CSV.generate() do |csv|
|
90
|
+
csv << %w{family genus species author_year}
|
91
|
+
csv << ["Fooidae", "Foo", "bar", "Smith, 1854"]
|
92
|
+
csv << ["Fooidae", "Foo", "foo", "(Smith, 1854)"]
|
93
|
+
end
|
94
|
+
|
95
|
+
# 0 Fooidae
|
96
|
+
# 1 Foo
|
97
|
+
# 2 bar
|
98
|
+
# 3 foo
|
99
|
+
|
100
|
+
csv = CSV.parse(string, {headers: true})
|
101
|
+
nc = Taxonifi::Lumper.create_name_collection(csv)
|
102
|
+
assert_equal 1, nc.collection[3].author.size
|
103
|
+
assert_equal 'Smith', nc.collection[3].author.first.last_name
|
104
|
+
assert_equal 1854, nc.collection[3].year
|
105
|
+
|
106
|
+
# Name only applies to the "last" name in the order.
|
107
|
+
assert_equal nil, nc.collection[0].author
|
108
|
+
assert_equal nil, nc.collection[1].author
|
109
|
+
assert_equal 1, nc.collection[2].author.size
|
110
|
+
|
111
|
+
assert_equal nil, nc.collection[0].parens
|
112
|
+
assert_equal true, nc.collection[2].parens
|
113
|
+
assert_equal false, nc.collection[3].parens
|
114
|
+
end
|
115
|
+
|
116
|
+
#--- reference collections
|
117
|
+
|
118
|
+
end
|
119
|
+
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
|
3
|
+
class Test_TaxonifiLumperParentChildNameCollection < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
@headers = %W{identifier parent child rank synonyms}
|
7
|
+
@csv_string = CSV.generate() do |csv|
|
8
|
+
csv << @headers
|
9
|
+
csv << [0, nil, "Root", "class", nil ]
|
10
|
+
csv << [1, "0", "Aidae", "Family", nil ]
|
11
|
+
csv << [2, "1", "Foo", "Genus", nil ]
|
12
|
+
csv << [3, "2", "Foo bar", "species", nil ] # case testing
|
13
|
+
csv << [4, "2", "Foo bar stuff (Guy, 1921)", "species", "Foo bar blorf (Guy, 1921)"] # initial subspecies rank data had rank blank, assuming they will be called species
|
14
|
+
csv << [5, "0", "Bidae", "Family", nil ]
|
15
|
+
end
|
16
|
+
|
17
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
18
|
+
end
|
19
|
+
|
20
|
+
def _create_a_collection
|
21
|
+
@nc = Taxonifi::Lumper::Lumps::ParentChildNameCollection.name_collection(@csv)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_that_name_collection_returns_a_name_collection
|
25
|
+
_create_a_collection
|
26
|
+
assert_equal Taxonifi::Model::NameCollection, @nc.class
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_that_higher_taxon_names_are_created
|
30
|
+
_create_a_collection
|
31
|
+
assert_equal "Aidae", @nc.names_at_rank('family').first.name
|
32
|
+
assert_equal "family", @nc.names_at_rank('family').first.rank
|
33
|
+
assert_equal "Foo", @nc.names_at_rank('genus').first.name
|
34
|
+
assert @nc.names_at_rank("species").collect{|n| n.name}.include?("bar")
|
35
|
+
assert_equal 1, @nc.names_at_rank("genus").size
|
36
|
+
assert @nc.names_at_rank("subspecies").collect{|n| n.name}.include?("stuff")
|
37
|
+
assert @nc.names_at_rank("subspecies").collect{|n| n.name}.include?("blorf")
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/lumper/lumper'))
|
3
|
+
|
4
|
+
# Builder construction
|
5
|
+
|
6
|
+
class Test_TaxonifiLumperRefs < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@headers = ["authors", "year", "title", "publication", "pg_start", "pg_end", "pages", "cited_page" ,"volume", "number", "volume_number"]
|
10
|
+
@csv_string = CSV.generate() do |csv|
|
11
|
+
csv << @headers
|
12
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
13
|
+
end
|
14
|
+
|
15
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_available_lumps
|
19
|
+
assert_equal [:citation_basic, :citation_small], Taxonifi::Lumper.available_lumps(@csv.headers)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_intersecting_lumps
|
23
|
+
headers = ["authors"]
|
24
|
+
csv_string = CSV.generate() do |csv|
|
25
|
+
csv << headers
|
26
|
+
csv << ["Smith J. and Barnes S."]
|
27
|
+
end
|
28
|
+
|
29
|
+
csv = CSV.parse(csv_string, {headers: true})
|
30
|
+
|
31
|
+
assert_equal [:citation_basic, :citation_small], Taxonifi::Lumper.intersecting_lumps(csv.headers)
|
32
|
+
assert_equal [], Taxonifi::Lumper.available_lumps(csv.headers)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_create_ref_collection
|
36
|
+
assert_equal Taxonifi::Model::RefCollection, Taxonifi::Lumper.create_ref_collection(@csv).class
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_creates_refs
|
40
|
+
assert_equal 1, Taxonifi::Lumper.create_ref_collection(@csv).collection.size
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_assigns_attributes_to_instantiated_refs
|
44
|
+
rc = Taxonifi::Lumper.create_ref_collection(@csv)
|
45
|
+
assert_equal ["J"], rc.collection.first.authors.first.initials
|
46
|
+
assert_equal "Smith", rc.collection.first.authors.first.last_name
|
47
|
+
assert_equal "2012", rc.collection.first.year
|
48
|
+
assert_equal "Bar and foo", rc.collection.first.title
|
49
|
+
assert_equal "Journal of Foo", rc.collection.first.publication
|
50
|
+
assert_equal "2", rc.collection.first.volume
|
51
|
+
assert_equal "4", rc.collection.first.number
|
52
|
+
assert_equal "2", rc.collection.first.pg_start
|
53
|
+
assert_equal "3", rc.collection.first.pg_end
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_indexes_unique_refs
|
57
|
+
csv_string = CSV.generate() do |csv|
|
58
|
+
csv << @headers
|
59
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
60
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
61
|
+
end
|
62
|
+
csv = CSV.parse(csv_string, {headers: true})
|
63
|
+
rc = Taxonifi::Lumper.create_ref_collection(csv)
|
64
|
+
assert_equal 1, rc.collection.size
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_indexes_unique_refs2
|
68
|
+
csv_string = CSV.generate() do |csv|
|
69
|
+
csv << @headers
|
70
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
71
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
72
|
+
csv << ["Smith J. and Bartes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
73
|
+
end
|
74
|
+
csv = CSV.parse(csv_string, {headers: true})
|
75
|
+
rc = Taxonifi::Lumper.create_ref_collection(csv)
|
76
|
+
assert_equal 2, rc.collection.size
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_that_refs_can_be_returned_by_row
|
80
|
+
csv_string = CSV.generate() do |csv|
|
81
|
+
csv << @headers
|
82
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
83
|
+
csv << ["Smith J.", "2012", "Foo and bar", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
84
|
+
end
|
85
|
+
csv = CSV.parse(csv_string, {headers: true})
|
86
|
+
rc = Taxonifi::Lumper.create_ref_collection(csv)
|
87
|
+
assert_equal "Foo and bar", rc.object_from_row(1).title
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/splitter/parser'))
|
3
|
+
|
4
|
+
class Test_TaxonifiSplitterParser < Test::Unit::TestCase
|
5
|
+
|
6
|
+
# TODO: this could also go to builder related tests
|
7
|
+
def test_that_parse_species_name_parses
|
8
|
+
lexer = Taxonifi::Splitter::Lexer.new("Foo (Bar) stuff things (Smith, 1912)", :species_name)
|
9
|
+
builder = Taxonifi::Model::SpeciesName.new
|
10
|
+
Taxonifi::Splitter::Parser.new(lexer, builder).parse_species_name
|
11
|
+
assert_equal "Foo", builder.genus.name
|
12
|
+
assert_equal "Bar", builder.subgenus.name
|
13
|
+
assert_equal builder.genus, builder.subgenus.parent
|
14
|
+
assert_equal "stuff", builder.species.name
|
15
|
+
assert_equal builder.subgenus, builder.species.parent
|
16
|
+
assert_equal "things", builder.subspecies.name
|
17
|
+
assert_equal builder.species, builder.subspecies.parent
|
18
|
+
assert_equal "Smith", builder.names.last.author
|
19
|
+
assert_equal 1912, builder.names.last.year
|
20
|
+
assert_equal false, builder.names.last.parens
|
21
|
+
|
22
|
+
lexer = Taxonifi::Splitter::Lexer.new("Foo stuff things Smith, 1912", :species_name)
|
23
|
+
builder = Taxonifi::Model::SpeciesName.new
|
24
|
+
Taxonifi::Splitter::Parser.new(lexer, builder).parse_species_name
|
25
|
+
assert_equal "Foo", builder.genus.name
|
26
|
+
assert_equal "stuff", builder.species.name
|
27
|
+
assert_equal "things", builder.subspecies.name
|
28
|
+
assert_equal "Smith", builder.names.last.author
|
29
|
+
assert_equal 1912 , builder.names.last.year
|
30
|
+
assert_equal true, builder.names.last.parens
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/splitter/splitter'))
|
3
|
+
|
4
|
+
class SplitterTest < Test::Unit::TestCase
|
5
|
+
def test_truth
|
6
|
+
assert true
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class Test_TaxonifiSplitterLexer < Test::Unit::TestCase
|
11
|
+
|
12
|
+
def test_that_vanilla_new_succeed
|
13
|
+
assert lexer = Taxonifi::Splitter::Lexer.new("foo")
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_that_lexer_can_only_be_passed_valid_token_lists
|
17
|
+
assert_raises Taxonifi::Splitter::SplitterError do
|
18
|
+
lexer = Taxonifi::Splitter::Lexer.new("foo", :bar)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_that_lexer_can_be_created_with_token_list_subsets
|
23
|
+
assert lexer = Taxonifi::Splitter::Lexer.new("foo")
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|