taxonifi 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +155 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/lib/assessor/assessor.rb +31 -0
- data/lib/assessor/base.rb +17 -0
- data/lib/assessor/row_assessor.rb +131 -0
- data/lib/export/export.rb +9 -0
- data/lib/export/format/base.rb +43 -0
- data/lib/export/format/species_file.rb +341 -0
- data/lib/lumper/lumper.rb +334 -0
- data/lib/lumper/lumps/parent_child_name_collection.rb +84 -0
- data/lib/models/author_year.rb +39 -0
- data/lib/models/base.rb +73 -0
- data/lib/models/collection.rb +92 -0
- data/lib/models/generic_object.rb +15 -0
- data/lib/models/geog.rb +59 -0
- data/lib/models/geog_collection.rb +28 -0
- data/lib/models/name.rb +206 -0
- data/lib/models/name_collection.rb +149 -0
- data/lib/models/person.rb +49 -0
- data/lib/models/ref.rb +85 -0
- data/lib/models/ref_collection.rb +106 -0
- data/lib/models/species_name.rb +85 -0
- data/lib/splitter/builder.rb +26 -0
- data/lib/splitter/lexer.rb +70 -0
- data/lib/splitter/parser.rb +54 -0
- data/lib/splitter/splitter.rb +45 -0
- data/lib/splitter/tokens.rb +322 -0
- data/lib/taxonifi.rb +36 -0
- data/test/file_fixtures/Lygaeoidea.csv +801 -0
- data/test/helper.rb +38 -0
- data/test/test_exporter.rb +32 -0
- data/test/test_lumper_geogs.rb +59 -0
- data/test/test_lumper_hierarchical_collection.rb +88 -0
- data/test/test_lumper_names.rb +119 -0
- data/test/test_lumper_parent_child_name_collection.rb +41 -0
- data/test/test_lumper_refs.rb +91 -0
- data/test/test_parser.rb +34 -0
- data/test/test_splitter.rb +27 -0
- data/test/test_splitter_tokens.rb +403 -0
- data/test/test_taxonifi.rb +11 -0
- data/test/test_taxonifi_accessor.rb +61 -0
- data/test/test_taxonifi_geog.rb +51 -0
- data/test/test_taxonifi_name.rb +186 -0
- data/test/test_taxonifi_name_collection.rb +158 -0
- data/test/test_taxonifi_ref.rb +90 -0
- data/test/test_taxonifi_ref_collection.rb +69 -0
- data/test/test_taxonifi_species_name.rb +95 -0
- metadata +167 -0
data/test/helper.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
require 'debugger'
|
4
|
+
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
#require 'shoulda'
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
17
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
18
|
+
|
19
|
+
class Test::Unit::TestCase
|
20
|
+
end
|
21
|
+
|
22
|
+
# TODO: rename to reflect format
|
23
|
+
def generic_csv_with_names
|
24
|
+
@headers = %W{identifier parent child rank synonyms}
|
25
|
+
@csv_string = CSV.generate() do |csv|
|
26
|
+
csv << @headers
|
27
|
+
csv << [0, nil, "Root", "class", nil ]
|
28
|
+
csv << [1, "0", "Aidae", "Family", nil ]
|
29
|
+
csv << [2, "0", "Bidae", "Family", nil ]
|
30
|
+
csv << [3, "1", "Foo", "Genus", nil ]
|
31
|
+
csv << [4, "3", "Foo bar", "species", nil ] # case testing
|
32
|
+
csv << [5, "4", "Foo bar bar", "species", nil ]
|
33
|
+
csv << [6, "3", "Foo bar stuff (Guy, 1921)", "species", "Foo bar blorf (Guy, 1921)"] # initial subspecies rank data had rank blank, assuming they will be called species
|
34
|
+
end
|
35
|
+
|
36
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
37
|
+
end
|
38
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/export/export'))
|
3
|
+
|
4
|
+
class Test_TaxonifiExports < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_that_new_generic_export_can_be_instantiated
|
7
|
+
assert Taxonifi::Export::Base.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def dont_test_that_species_file_export_does_stuff
|
11
|
+
csv = generic_csv_with_names
|
12
|
+
nc = Taxonifi::Lumper::Lumps::EolNameCollection.name_collection(csv)
|
13
|
+
e = Taxonifi::Export::SpeciesFile.new(:nc => nc, :authorized_user_id => 15)
|
14
|
+
assert foo = e.export
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_big_file
|
18
|
+
file = File.expand_path(File.join(File.dirname(__FILE__), 'file_fixtures/Lygaeoidea.csv'))
|
19
|
+
|
20
|
+
csv = CSV.read(file, {
|
21
|
+
headers: true,
|
22
|
+
col_sep: ",",
|
23
|
+
header_converters: :downcase
|
24
|
+
} )
|
25
|
+
|
26
|
+
nc = Taxonifi::Lumper::Lumps::ParentChildNameCollection.name_collection(csv)
|
27
|
+
e = Taxonifi::Export::SpeciesFile.new(:nc => nc, :authorized_user_id => 15)
|
28
|
+
assert foo = e.export
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
|
3
|
+
# Builder construction
|
4
|
+
|
5
|
+
class Test_TaxonifiLumperGeogs < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@headers = ["country", "state", "county"]
|
9
|
+
@csv_string = CSV.generate() do |csv|
|
10
|
+
csv << @headers
|
11
|
+
csv << ["Canada", "", nil]
|
12
|
+
csv << ["Canada", "Saskatchewan", nil]
|
13
|
+
csv << ["USA", "Texas", nil]
|
14
|
+
csv << ["USA", "Texas", "Brazos"]
|
15
|
+
csv << ["Utopia", nil, "Wonderland"]
|
16
|
+
end
|
17
|
+
|
18
|
+
# The row_index looks like this:
|
19
|
+
# 0
|
20
|
+
# 0 1
|
21
|
+
# 2 3
|
22
|
+
# 2 3 4
|
23
|
+
# 5 6
|
24
|
+
#
|
25
|
+
# The name_index looks like
|
26
|
+
# {:country => {"Canada" => 0, "USA" => 2, "Utopia" => 5} ... etc.
|
27
|
+
|
28
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
29
|
+
end
|
30
|
+
|
31
|
+
def _create_a_collection
|
32
|
+
@gc = Taxonifi::Lumper.create_geog_collection(@csv)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_available_lumps
|
36
|
+
assert_equal [:basic_geog], Taxonifi::Lumper.available_lumps(@csv.headers)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_that_create_geog_collection_creates_a_geog_collection
|
40
|
+
gc = Taxonifi::Lumper.create_geog_collection(@csv)
|
41
|
+
assert_equal Taxonifi::Model::GeogCollection, gc.class
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_that_create_geog_collection_instantiates_geogs
|
45
|
+
_create_a_collection
|
46
|
+
assert_equal 7, @gc.collection.size
|
47
|
+
assert_equal "Canada", @gc.collection.first.name
|
48
|
+
assert_equal "Wonderland", @gc.collection.last.name
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_that_create_geog_collection_assigns_parenthood
|
52
|
+
_create_a_collection
|
53
|
+
assert_equal 0, @gc.collection[1].parent.id
|
54
|
+
assert_equal 5, @gc.collection[6].parent.id
|
55
|
+
assert_equal 3, @gc.collection[4].parent.id
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/lumper/lumper'))
|
3
|
+
|
4
|
+
# Builder construction
|
5
|
+
|
6
|
+
class Test_TaxonifiLumperHierarchicalCollection < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@headers = ["a", "b", "c"]
|
10
|
+
@csv_string = CSV.generate() do |csv|
|
11
|
+
csv << @headers
|
12
|
+
csv << %w{a b c}
|
13
|
+
end
|
14
|
+
|
15
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_that_create_hierarchical_collection_creates_collection
|
19
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{a b c}, )
|
20
|
+
assert_equal Taxonifi::Model::Collection, c.class
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_that_a_hierarchical_collection_instantiates_generic_objects
|
24
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{a b c})
|
25
|
+
assert_equal Taxonifi::Model::GenericObject, c.collection.first.class
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_that_collection_store_names
|
29
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{a b c})
|
30
|
+
assert_equal "a", c.collection.first.name
|
31
|
+
assert_equal "b", c.collection[1].name
|
32
|
+
assert_equal "c", c.collection[2].name
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_that_header_order_is_applied
|
36
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{c a b})
|
37
|
+
assert_equal "c", c.collection.first.name
|
38
|
+
assert_equal "a", c.collection[1].name
|
39
|
+
assert_equal "b", c.collection[2].name
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_that_parent_objects_are_assigned
|
43
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(@csv, %w{a b c})
|
44
|
+
assert_equal nil, c.collection.first.parent
|
45
|
+
assert_equal "a", c.collection[1].parent.name
|
46
|
+
assert_equal "b", c.collection[2].parent.name
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_that_parents_are_assigned_across_blank_columns
|
50
|
+
csv_string = CSV.generate() do |csv|
|
51
|
+
csv << @headers
|
52
|
+
csv << ["a", nil, "c"]
|
53
|
+
end
|
54
|
+
csv = CSV.parse(csv_string, {headers: true})
|
55
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(csv, %w{a b c})
|
56
|
+
assert_equal nil, c.collection.first.parent
|
57
|
+
assert_equal "a", c.collection[1].parent.name
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_that_names_at_rank_are_synonymous_when_parents_are_identical
|
61
|
+
csv_string = CSV.generate() do |csv|
|
62
|
+
csv << @headers
|
63
|
+
csv << ["a", "b", "c"]
|
64
|
+
csv << ["a", "b", "d"]
|
65
|
+
csv << ["e", "b", "f"]
|
66
|
+
end
|
67
|
+
csv = CSV.parse(csv_string, {headers: true})
|
68
|
+
c = Taxonifi::Lumper.create_hierarchical_collection(csv, %w{a b c})
|
69
|
+
assert_equal %w{a b c d e b f}, c.collection.collect{|o| o.name}
|
70
|
+
assert_equal 7, c.collection.size
|
71
|
+
end
|
72
|
+
|
73
|
+
# def test_that_create_geog_collection_instantiates_geogs
|
74
|
+
# _create_a_collection
|
75
|
+
# assert_equal 7, @gc.collection.size
|
76
|
+
# assert_equal "Canada", @gc.collection.first.name
|
77
|
+
# assert_equal "Wonderland", @gc.collection.last.name
|
78
|
+
# end
|
79
|
+
|
80
|
+
# def test_that_create_geog_collection_assigns_parenthood
|
81
|
+
# _create_a_collection
|
82
|
+
# assert_equal 0, @gc.collection[1].parent.id
|
83
|
+
# assert_equal 5, @gc.collection[6].parent.id
|
84
|
+
# assert_equal 3, @gc.collection[4].parent.id
|
85
|
+
# end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/lumper/lumper'))
|
3
|
+
|
4
|
+
# Builder construction
|
5
|
+
|
6
|
+
class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@headers = ["family", "genus", "species", "author", "year"]
|
10
|
+
@csv_string = CSV.generate() do |csv|
|
11
|
+
csv << @headers
|
12
|
+
csv << ["Fooidae", "Foo", "bar", "Smith", "1854"]
|
13
|
+
end
|
14
|
+
|
15
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_that_setup_setups
|
19
|
+
assert_equal @headers, @csv.headers
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_available_lumps_raise_without_arrays
|
23
|
+
assert_raises Taxonifi::Lumper::LumperError do
|
24
|
+
Taxonifi::Lumper.available_lumps( "foo" )
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_available_lumps
|
29
|
+
assert Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::QUAD ).include?(:quadrinomial)
|
30
|
+
assert Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:quad_author_year)
|
31
|
+
assert (not Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:names) )
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_create_name_collection_creates_a_name_collection
|
35
|
+
assert_equal Taxonifi::Model::NameCollection, Taxonifi::Lumper.create_name_collection(@csv).class
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_that_create_name_collection_raises_when_fed_non_csv
|
39
|
+
assert_raises Taxonifi::Lumper::LumperError do
|
40
|
+
Taxonifi::Lumper.create_name_collection("FOO")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_that_create_name_collection_populates_a_name_collection
|
45
|
+
nc = Taxonifi::Lumper.create_name_collection(@csv)
|
46
|
+
assert_equal 3, nc.collection.size
|
47
|
+
assert_equal ["Fooidae", "Foo", "bar"], nc.collection.collect{|n| n.name}
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_that_create_name_collection_assigns_row_number
|
51
|
+
nc = Taxonifi::Lumper.create_name_collection(@csv)
|
52
|
+
assert_equal 0, nc.collection.first.row_number
|
53
|
+
assert_equal 0, nc.collection.last.row_number
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def test_that_create_name_collection_parentifies
|
58
|
+
nc = Taxonifi::Lumper.create_name_collection(@csv)
|
59
|
+
assert_equal nc.collection[0], nc.collection[1].parent
|
60
|
+
assert_equal nc.collection[1], nc.collection[2].parent
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_that_create_a_name_collection_handles_homonomy
|
64
|
+
string = CSV.generate() do |csv|
|
65
|
+
csv << @headers
|
66
|
+
csv << ["Fooidae", "Foo", "bar", "Smith", "1854"]
|
67
|
+
csv << ["Blorf", "Foo", "bar", "Smith", "1854"]
|
68
|
+
csv << ["Fooidae", "Bar", "bar", "Smith", "1854"]
|
69
|
+
end
|
70
|
+
|
71
|
+
# The index should break down like this
|
72
|
+
# 0 2 5
|
73
|
+
# 1 3 6
|
74
|
+
# 0 4 7
|
75
|
+
|
76
|
+
csv = CSV.parse(string, {headers: true})
|
77
|
+
nc = Taxonifi::Lumper.create_name_collection(csv)
|
78
|
+
|
79
|
+
assert_equal nc.collection[2], nc.collection[5].parent
|
80
|
+
assert_equal nc.collection[0], nc.collection[2].parent
|
81
|
+
assert_equal nc.collection[1], nc.collection[3].parent
|
82
|
+
assert_equal nc.collection[3], nc.collection[6].parent
|
83
|
+
assert_equal nc.collection[0], nc.collection[4].parent
|
84
|
+
assert_equal nc.collection[4], nc.collection[7].parent
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def test_that_create_a_name_collection_handles_author_year
|
89
|
+
string = CSV.generate() do |csv|
|
90
|
+
csv << %w{family genus species author_year}
|
91
|
+
csv << ["Fooidae", "Foo", "bar", "Smith, 1854"]
|
92
|
+
csv << ["Fooidae", "Foo", "foo", "(Smith, 1854)"]
|
93
|
+
end
|
94
|
+
|
95
|
+
# 0 Fooidae
|
96
|
+
# 1 Foo
|
97
|
+
# 2 bar
|
98
|
+
# 3 foo
|
99
|
+
|
100
|
+
csv = CSV.parse(string, {headers: true})
|
101
|
+
nc = Taxonifi::Lumper.create_name_collection(csv)
|
102
|
+
assert_equal 1, nc.collection[3].author.size
|
103
|
+
assert_equal 'Smith', nc.collection[3].author.first.last_name
|
104
|
+
assert_equal 1854, nc.collection[3].year
|
105
|
+
|
106
|
+
# Name only applies to the "last" name in the order.
|
107
|
+
assert_equal nil, nc.collection[0].author
|
108
|
+
assert_equal nil, nc.collection[1].author
|
109
|
+
assert_equal 1, nc.collection[2].author.size
|
110
|
+
|
111
|
+
assert_equal nil, nc.collection[0].parens
|
112
|
+
assert_equal true, nc.collection[2].parens
|
113
|
+
assert_equal false, nc.collection[3].parens
|
114
|
+
end
|
115
|
+
|
116
|
+
#--- reference collections
|
117
|
+
|
118
|
+
end
|
119
|
+
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
|
3
|
+
class Test_TaxonifiLumperParentChildNameCollection < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
@headers = %W{identifier parent child rank synonyms}
|
7
|
+
@csv_string = CSV.generate() do |csv|
|
8
|
+
csv << @headers
|
9
|
+
csv << [0, nil, "Root", "class", nil ]
|
10
|
+
csv << [1, "0", "Aidae", "Family", nil ]
|
11
|
+
csv << [2, "1", "Foo", "Genus", nil ]
|
12
|
+
csv << [3, "2", "Foo bar", "species", nil ] # case testing
|
13
|
+
csv << [4, "2", "Foo bar stuff (Guy, 1921)", "species", "Foo bar blorf (Guy, 1921)"] # initial subspecies rank data had rank blank, assuming they will be called species
|
14
|
+
csv << [5, "0", "Bidae", "Family", nil ]
|
15
|
+
end
|
16
|
+
|
17
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
18
|
+
end
|
19
|
+
|
20
|
+
def _create_a_collection
|
21
|
+
@nc = Taxonifi::Lumper::Lumps::ParentChildNameCollection.name_collection(@csv)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_that_name_collection_returns_a_name_collection
|
25
|
+
_create_a_collection
|
26
|
+
assert_equal Taxonifi::Model::NameCollection, @nc.class
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_that_higher_taxon_names_are_created
|
30
|
+
_create_a_collection
|
31
|
+
assert_equal "Aidae", @nc.names_at_rank('family').first.name
|
32
|
+
assert_equal "family", @nc.names_at_rank('family').first.rank
|
33
|
+
assert_equal "Foo", @nc.names_at_rank('genus').first.name
|
34
|
+
assert @nc.names_at_rank("species").collect{|n| n.name}.include?("bar")
|
35
|
+
assert_equal 1, @nc.names_at_rank("genus").size
|
36
|
+
assert @nc.names_at_rank("subspecies").collect{|n| n.name}.include?("stuff")
|
37
|
+
assert @nc.names_at_rank("subspecies").collect{|n| n.name}.include?("blorf")
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/lumper/lumper'))
|
3
|
+
|
4
|
+
# Builder construction
|
5
|
+
|
6
|
+
class Test_TaxonifiLumperRefs < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@headers = ["authors", "year", "title", "publication", "pg_start", "pg_end", "pages", "cited_page" ,"volume", "number", "volume_number"]
|
10
|
+
@csv_string = CSV.generate() do |csv|
|
11
|
+
csv << @headers
|
12
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
13
|
+
end
|
14
|
+
|
15
|
+
@csv = CSV.parse(@csv_string, {headers: true})
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_available_lumps
|
19
|
+
assert_equal [:citation_basic, :citation_small], Taxonifi::Lumper.available_lumps(@csv.headers)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_intersecting_lumps
|
23
|
+
headers = ["authors"]
|
24
|
+
csv_string = CSV.generate() do |csv|
|
25
|
+
csv << headers
|
26
|
+
csv << ["Smith J. and Barnes S."]
|
27
|
+
end
|
28
|
+
|
29
|
+
csv = CSV.parse(csv_string, {headers: true})
|
30
|
+
|
31
|
+
assert_equal [:citation_basic, :citation_small], Taxonifi::Lumper.intersecting_lumps(csv.headers)
|
32
|
+
assert_equal [], Taxonifi::Lumper.available_lumps(csv.headers)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_create_ref_collection
|
36
|
+
assert_equal Taxonifi::Model::RefCollection, Taxonifi::Lumper.create_ref_collection(@csv).class
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_creates_refs
|
40
|
+
assert_equal 1, Taxonifi::Lumper.create_ref_collection(@csv).collection.size
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_assigns_attributes_to_instantiated_refs
|
44
|
+
rc = Taxonifi::Lumper.create_ref_collection(@csv)
|
45
|
+
assert_equal ["J"], rc.collection.first.authors.first.initials
|
46
|
+
assert_equal "Smith", rc.collection.first.authors.first.last_name
|
47
|
+
assert_equal "2012", rc.collection.first.year
|
48
|
+
assert_equal "Bar and foo", rc.collection.first.title
|
49
|
+
assert_equal "Journal of Foo", rc.collection.first.publication
|
50
|
+
assert_equal "2", rc.collection.first.volume
|
51
|
+
assert_equal "4", rc.collection.first.number
|
52
|
+
assert_equal "2", rc.collection.first.pg_start
|
53
|
+
assert_equal "3", rc.collection.first.pg_end
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_indexes_unique_refs
|
57
|
+
csv_string = CSV.generate() do |csv|
|
58
|
+
csv << @headers
|
59
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
60
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
61
|
+
end
|
62
|
+
csv = CSV.parse(csv_string, {headers: true})
|
63
|
+
rc = Taxonifi::Lumper.create_ref_collection(csv)
|
64
|
+
assert_equal 1, rc.collection.size
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_indexes_unique_refs2
|
68
|
+
csv_string = CSV.generate() do |csv|
|
69
|
+
csv << @headers
|
70
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
71
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
72
|
+
csv << ["Smith J. and Bartes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
73
|
+
end
|
74
|
+
csv = CSV.parse(csv_string, {headers: true})
|
75
|
+
rc = Taxonifi::Lumper.create_ref_collection(csv)
|
76
|
+
assert_equal 2, rc.collection.size
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_that_refs_can_be_returned_by_row
|
80
|
+
csv_string = CSV.generate() do |csv|
|
81
|
+
csv << @headers
|
82
|
+
csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
83
|
+
csv << ["Smith J.", "2012", "Foo and bar", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
|
84
|
+
end
|
85
|
+
csv = CSV.parse(csv_string, {headers: true})
|
86
|
+
rc = Taxonifi::Lumper.create_ref_collection(csv)
|
87
|
+
assert_equal "Foo and bar", rc.object_from_row(1).title
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/splitter/parser'))
|
3
|
+
|
4
|
+
class Test_TaxonifiSplitterParser < Test::Unit::TestCase
|
5
|
+
|
6
|
+
# TODO: this could also go to builder related tests
|
7
|
+
def test_that_parse_species_name_parses
|
8
|
+
lexer = Taxonifi::Splitter::Lexer.new("Foo (Bar) stuff things (Smith, 1912)", :species_name)
|
9
|
+
builder = Taxonifi::Model::SpeciesName.new
|
10
|
+
Taxonifi::Splitter::Parser.new(lexer, builder).parse_species_name
|
11
|
+
assert_equal "Foo", builder.genus.name
|
12
|
+
assert_equal "Bar", builder.subgenus.name
|
13
|
+
assert_equal builder.genus, builder.subgenus.parent
|
14
|
+
assert_equal "stuff", builder.species.name
|
15
|
+
assert_equal builder.subgenus, builder.species.parent
|
16
|
+
assert_equal "things", builder.subspecies.name
|
17
|
+
assert_equal builder.species, builder.subspecies.parent
|
18
|
+
assert_equal "Smith", builder.names.last.author
|
19
|
+
assert_equal 1912, builder.names.last.year
|
20
|
+
assert_equal false, builder.names.last.parens
|
21
|
+
|
22
|
+
lexer = Taxonifi::Splitter::Lexer.new("Foo stuff things Smith, 1912", :species_name)
|
23
|
+
builder = Taxonifi::Model::SpeciesName.new
|
24
|
+
Taxonifi::Splitter::Parser.new(lexer, builder).parse_species_name
|
25
|
+
assert_equal "Foo", builder.genus.name
|
26
|
+
assert_equal "stuff", builder.species.name
|
27
|
+
assert_equal "things", builder.subspecies.name
|
28
|
+
assert_equal "Smith", builder.names.last.author
|
29
|
+
assert_equal 1912 , builder.names.last.year
|
30
|
+
assert_equal true, builder.names.last.parens
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/splitter/splitter'))
|
3
|
+
|
4
|
+
class SplitterTest < Test::Unit::TestCase
|
5
|
+
def test_truth
|
6
|
+
assert true
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class Test_TaxonifiSplitterLexer < Test::Unit::TestCase
|
11
|
+
|
12
|
+
def test_that_vanilla_new_succeed
|
13
|
+
assert lexer = Taxonifi::Splitter::Lexer.new("foo")
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_that_lexer_can_only_be_passed_valid_token_lists
|
17
|
+
assert_raises Taxonifi::Splitter::SplitterError do
|
18
|
+
lexer = Taxonifi::Splitter::Lexer.new("foo", :bar)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_that_lexer_can_be_created_with_token_list_subsets
|
23
|
+
assert lexer = Taxonifi::Splitter::Lexer.new("foo")
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|