biodiversity19 0.5.15

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,211 @@
1
+ # encoding: UTF-8
2
+ grammar ScientificNameDirty
3
+ include ScientificNameClean
4
+
5
+ rule root
6
+ super
7
+ end
8
+
9
+ rule scientific_name_5
10
+ a:scientific_name_4 garbage {
11
+ def value
12
+ a.value
13
+ end
14
+
15
+ def canonical
16
+ a.canonical
17
+ end
18
+
19
+ def pos
20
+ a.pos
21
+ end
22
+
23
+ def details
24
+ a.details
25
+ end
26
+ }
27
+ /
28
+ super
29
+ end
30
+
31
+ rule infraspecies
32
+ a:infraspecies_string space b:year {
33
+ def value
34
+ a.value + " " + b.value
35
+ end
36
+
37
+ def canonical
38
+ a.canonical
39
+ end
40
+
41
+ def pos
42
+ a.pos.merge(b.pos)
43
+ end
44
+
45
+ def details
46
+ {:infraspecies => a.details[:infraspecies].merge(b.details)}
47
+ end
48
+ }
49
+ /
50
+ a:infraspecies_string space string_authorship_inconsistencies space b:authorship {
51
+ def value
52
+ a.value + " " + b.value
53
+ end
54
+
55
+ def canonical
56
+ a.canonical
57
+ end
58
+
59
+ def pos
60
+ a.pos.merge(b.pos)
61
+ end
62
+
63
+ def details
64
+ {:infraspecies => a.details[:infraspecies].merge(b.details)}
65
+ end
66
+ }
67
+ /
68
+ super
69
+ end
70
+
71
+ rule species
72
+ a:species_string space b:year {
73
+ def value
74
+ a.value + " " + b.value
75
+ end
76
+
77
+ def canonical
78
+ a.canonical
79
+ end
80
+
81
+ def pos
82
+ a.pos.merge(b.pos)
83
+ end
84
+
85
+ def details
86
+ {:species => a.details[:species].merge(b.details)}
87
+ end
88
+ }
89
+ /
90
+ super
91
+ end
92
+
93
+ rule right_paren
94
+ ")" space ")"
95
+ /
96
+ super
97
+ end
98
+
99
+ rule left_paren
100
+ "(" space "("
101
+ /
102
+ super
103
+ end
104
+
105
+ rule year
106
+ a:year_number space b:approximate_year {
107
+ def value
108
+ a.value + " " + b.value
109
+ end
110
+
111
+ def pos
112
+ a.pos.merge(b.pos)
113
+ end
114
+
115
+ def details
116
+ {:year => a.value, :approximate_year => b.value}
117
+ end
118
+ }
119
+ /
120
+ a:year_number space page_number {
121
+ def value
122
+ a.text_value
123
+ end
124
+
125
+ def pos
126
+ {a.interval.begin => ['year', a.interval.end]}
127
+ end
128
+
129
+ def details
130
+ {:year => value}
131
+ end
132
+ }
133
+ /
134
+ year_number_with_punctuation
135
+ /
136
+ approximate_year
137
+ /
138
+ double_year
139
+ /
140
+ super
141
+ end
142
+
143
+ rule approximate_year
144
+ "[" space a:year_number space "]"+ {
145
+ def value
146
+ "(" + a.text_value + ")"
147
+ end
148
+
149
+ def pos
150
+ {a.interval.begin => ['year', a.interval.end]}
151
+ end
152
+
153
+ def details
154
+ {:approximate_year => value}
155
+ end
156
+ }
157
+ end
158
+
159
+
160
+ rule double_year
161
+ year_number "-" [0-9]+ [A-Za-z]? [\?]? {
162
+ def value
163
+ text_value
164
+ end
165
+
166
+ def pos
167
+ {interval.begin => ['year', interval.end]}
168
+ end
169
+
170
+ def details
171
+ {:year => value}
172
+ end
173
+ }
174
+ end
175
+
176
+ rule year_number_with_punctuation
177
+ a:year_number "." {
178
+ def value
179
+ a.text_value
180
+ end
181
+
182
+ def pos
183
+ {interval.begin => ['year', interval.end]}
184
+ end
185
+
186
+ def details
187
+ {:year => value}
188
+ end
189
+ }
190
+ end
191
+
192
+
193
+ rule page_number
194
+ ":" space [\d]+
195
+ {
196
+ def value
197
+ end
198
+ }
199
+ end
200
+
201
+ rule string_authorship_inconsistencies
202
+ ("corrig.")
203
+ end
204
+
205
+ rule garbage
206
+ space (["',.]) space [^щ]*
207
+ /
208
+ space_hard [^ш]+
209
+ end
210
+
211
+ end
@@ -0,0 +1,57 @@
1
+ # encoding: UTF-8
2
+ dir = File.dirname(__FILE__)
3
+ require File.join(dir, *%w[parser scientific_name_clean])
4
+ require File.join(dir, *%w[parser scientific_name_dirty])
5
+ require File.join(dir, *%w[parser scientific_name_canonical])
6
+ require 'rubygems'
7
+ require 'json'
8
+
9
+ class ScientificNameParser
10
+
11
+ def initialize
12
+ @verbatim = ''
13
+ @clean = ScientificNameCleanParser.new
14
+ @dirty = ScientificNameDirtyParser.new
15
+ @canonical = ScientificNameCanonicalParser.new
16
+ @parsed = nil
17
+ end
18
+
19
+ def parsed
20
+ @parsed
21
+ end
22
+
23
+ def parse(a_string)
24
+ @verbatim = a_string
25
+ @parsed = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) || {:verbatim => a_string}
26
+ def @parsed.all
27
+ parsed = self.class != Hash
28
+ res = {:parsed => parsed}
29
+ if parsed
30
+ hybrid = self.hybrid rescue false
31
+ res.merge!({
32
+ :verbatim => self.text_value,
33
+ :normalized => self.value,
34
+ :canonical => self.canonical,
35
+ :hybrid => hybrid,
36
+ :details => self.details,
37
+ :positions => self.pos
38
+ })
39
+ else
40
+ res.merge!(self)
41
+ end
42
+ res = {:scientificName => res}
43
+ res
44
+ end
45
+
46
+ def @parsed.pos_json
47
+ self.pos.to_json rescue ''
48
+ end
49
+
50
+ def @parsed.all_json
51
+ self.all.to_json rescue ''
52
+ end
53
+
54
+ @parsed.all
55
+ end
56
+ end
57
+
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'treetop'
3
+
4
+ dir = File.dirname(__FILE__)
5
+
6
+ BIODIVERSITY_ROOT = File.join(dir, 'biodiversity')
7
+ require File.join(dir, "/../conf/environment")
8
+ require File.join(BIODIVERSITY_ROOT, "parser")
9
+ require File.join(BIODIVERSITY_ROOT, "guid")
data/pkg/.gitignore ADDED
File without changes
File without changes
@@ -0,0 +1,12 @@
1
+ dir = File.dirname("__FILE__")
2
+ require 'rubygems'
3
+ require 'spec'
4
+ require File.expand_path(dir + "../../conf/environment")
5
+ require File.expand_path(dir + "../../lib/biodiversity/guid")
6
+
7
+ describe LsidResolver do
8
+ it "should return RFD document from lsid" do
9
+ lsid = "urn:lsid:ubio.org:classificationbank:2232671"
10
+ LsidResolver.resolve(lsid).class.should == "".class
11
+ end
12
+ end
@@ -0,0 +1,35 @@
1
+ #NOTE: this spec needs compiled treetop files.
2
+ dir = File.dirname("__FILE__")
3
+ require File.expand_path(dir + '../../spec/parser/spec_helper')
4
+ require File.expand_path(dir + '../../lib/biodiversity/parser')
5
+
6
+ describe ScientificNameClean do
7
+ before(:all) do
8
+ set_parser(ScientificNameParser.new)
9
+ end
10
+
11
+ it 'should generate standardized json' do
12
+ read_test_file do |y|
13
+ JSON.load(json(y[:name])).should == JSON.load(y[:jsn]) unless y[:comment]
14
+ end
15
+ end
16
+
17
+ # it 'should generate new test_file' do
18
+ # new_test = open(File.expand_path(dir + "../../spec/parser/test_data_new.txt"),'w')
19
+ # read_test_file do |y|
20
+ # if y[:comment]
21
+ # new_test.write y[:comment]
22
+ # else
23
+ # name = y[:name]
24
+ # jsn = json(y[:name])# rescue puts(y[:name])
25
+ # new_test.write("#{name}|#{jsn}\n")
26
+ # end
27
+ # end
28
+ # end
29
+
30
+ it 'should generate reasonable output if parser failed' do
31
+ sn = 'ddd sljlkj 3223452432'
32
+ json(sn).should == '{"scientificName":{"parsed":false,"verbatim":"ddd sljlkj 3223452432"}}'
33
+ end
34
+
35
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+ dir = File.dirname("__FILE__")
3
+ require File.expand_path(dir + '../../spec/parser/spec_helper')
4
+
5
+ describe ScientificNameCanonical do
6
+ before(:all) do
7
+ set_parser(ScientificNameCanonicalParser.new)
8
+ end
9
+
10
+
11
+ it 'should parse names with valid name part and unparseable rest' do
12
+ [
13
+ ['Morea ssjjlajajaj324$33 234243242','Morea', [{:uninomial=>{:string=>"Morea"}}], {0=>["uninomial", 5]}],
14
+ ['Morea (Morea) Burt 2342343242 23424322342 23424234', 'Morea (Morea)', [{:genus=>{:string=>"Morea"}, :infragenus=>{:string=>"Morea"}}], {0=>["genus", 5], 7=>["infragenus", 12]}],
15
+ ['Morea (Morea) burtius 2342343242 23424322342 23424234', 'Morea (Morea) burtius', [{:genus=>{:string=>"Morea"}, :infragenus=>{:string=>"Morea"}, :species=>{:string=>"burtius"}}], {0=>["genus", 5], 7=>["infragenus", 12], 14=>["species", 21]}],
16
+ ['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',[{:genus=>{:string=>"Moraea"}, :species=>{:string=>"spathulata"}}], {0=>["genus", 6], 7=>["species", 17]} ],
17
+ ['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',[{:genus=>{:string=>"Verpericola"}, :species=>{:string=>"megasoma"}}], {0=>["genus", 11], 12=>["species", 20]}]
18
+ ].each do |n|
19
+ parse(n[0]).should_not be_nil
20
+ value(n[0]).should == n[1]
21
+ details(n[0]).should == n[2]
22
+ pos(n[0]).should == n[3]
23
+ parse(n[0]).hybrid.should be_false
24
+ end
25
+ end
26
+
27
+ end