biodiversity 0.5.16 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/README.rdoc +5 -5
- data/Rakefile +8 -3
- data/VERSION +1 -1
- data/bin/nnparse +7 -3
- data/bin/parserver +1 -0
- data/lib/biodiversity/parser.rb +50 -5
- data/lib/biodiversity/parser/scientific_name_clean.treetop +131 -40
- data/spec/parser/scientific_name_clean.spec.rb +53 -27
- data/spec/parser/test_data.txt +73 -21
- metadata +4 -8
- data/biodiversity.gemspec +0 -88
- data/lib/biodiversity/parser/scientific_name_canonical.rb +0 -481
- data/lib/biodiversity/parser/scientific_name_clean.rb +0 -6118
- data/lib/biodiversity/parser/scientific_name_dirty.rb +0 -1309
@@ -1,481 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
module ScientificNameCanonical
|
3
|
-
include Treetop::Runtime
|
4
|
-
|
5
|
-
def root
|
6
|
-
@root || :root
|
7
|
-
end
|
8
|
-
|
9
|
-
include ScientificNameDirty
|
10
|
-
|
11
|
-
module Root0
|
12
|
-
def hybrid
|
13
|
-
false
|
14
|
-
end
|
15
|
-
|
16
|
-
def details
|
17
|
-
[super]
|
18
|
-
end
|
19
|
-
|
20
|
-
def parser_run
|
21
|
-
3
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
module Root1
|
26
|
-
def hybrid
|
27
|
-
false
|
28
|
-
end
|
29
|
-
|
30
|
-
def details
|
31
|
-
[super]
|
32
|
-
end
|
33
|
-
|
34
|
-
def parser_run
|
35
|
-
3
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def _nt_root
|
40
|
-
start_index = index
|
41
|
-
if node_cache[:root].has_key?(index)
|
42
|
-
cached = node_cache[:root][index]
|
43
|
-
if cached
|
44
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
45
|
-
@index = cached.interval.end
|
46
|
-
end
|
47
|
-
return cached
|
48
|
-
end
|
49
|
-
|
50
|
-
i0 = index
|
51
|
-
r1 = _nt_multinomial_with_garbage
|
52
|
-
r1.extend(Root0)
|
53
|
-
if r1
|
54
|
-
r0 = r1
|
55
|
-
else
|
56
|
-
r2 = _nt_uninomial_with_garbage
|
57
|
-
r2.extend(Root1)
|
58
|
-
if r2
|
59
|
-
r0 = r2
|
60
|
-
else
|
61
|
-
@index = i0
|
62
|
-
r0 = nil
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
node_cache[:root][start_index] = r0
|
67
|
-
|
68
|
-
r0
|
69
|
-
end
|
70
|
-
|
71
|
-
module MultinomialWithGarbage0
|
72
|
-
def a
|
73
|
-
elements[0]
|
74
|
-
end
|
75
|
-
|
76
|
-
def space1
|
77
|
-
elements[1]
|
78
|
-
end
|
79
|
-
|
80
|
-
def b
|
81
|
-
elements[2]
|
82
|
-
end
|
83
|
-
|
84
|
-
def space2
|
85
|
-
elements[3]
|
86
|
-
end
|
87
|
-
|
88
|
-
def c
|
89
|
-
elements[4]
|
90
|
-
end
|
91
|
-
|
92
|
-
def garbage
|
93
|
-
elements[5]
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
module MultinomialWithGarbage1
|
98
|
-
def value
|
99
|
-
a.value + " " + b.value + " " + c.value
|
100
|
-
end
|
101
|
-
|
102
|
-
def canonical
|
103
|
-
a.canonical + " " + b.canonical + " " + c.canonical
|
104
|
-
end
|
105
|
-
|
106
|
-
def pos
|
107
|
-
a.pos.merge(b.pos).merge(c.pos)
|
108
|
-
end
|
109
|
-
|
110
|
-
def details
|
111
|
-
a.details.merge(b.details).merge(c.details)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
module MultinomialWithGarbage2
|
116
|
-
def a
|
117
|
-
elements[0]
|
118
|
-
end
|
119
|
-
|
120
|
-
def space
|
121
|
-
elements[1]
|
122
|
-
end
|
123
|
-
|
124
|
-
def b
|
125
|
-
elements[2]
|
126
|
-
end
|
127
|
-
|
128
|
-
def garbage
|
129
|
-
elements[3]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
module MultinomialWithGarbage3
|
134
|
-
def value
|
135
|
-
a.value + " " + b.value
|
136
|
-
end
|
137
|
-
|
138
|
-
def canonical
|
139
|
-
a.canonical + " " + b.canonical
|
140
|
-
end
|
141
|
-
|
142
|
-
def pos
|
143
|
-
a.pos.merge(b.pos)
|
144
|
-
end
|
145
|
-
|
146
|
-
def details
|
147
|
-
a.details.merge(b.details)
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
module MultinomialWithGarbage4
|
152
|
-
def a
|
153
|
-
elements[0]
|
154
|
-
end
|
155
|
-
|
156
|
-
def space
|
157
|
-
elements[1]
|
158
|
-
end
|
159
|
-
|
160
|
-
def b
|
161
|
-
elements[2]
|
162
|
-
end
|
163
|
-
|
164
|
-
def garbage
|
165
|
-
elements[3]
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
|
-
module MultinomialWithGarbage5
|
170
|
-
def value
|
171
|
-
a.value + " " + b.value
|
172
|
-
end
|
173
|
-
|
174
|
-
def canonical
|
175
|
-
a.canonical + " " + b.canonical
|
176
|
-
end
|
177
|
-
|
178
|
-
def pos
|
179
|
-
a.pos.merge(b.pos)
|
180
|
-
end
|
181
|
-
|
182
|
-
def details
|
183
|
-
a.details.merge(b.details)
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
|
-
def _nt_multinomial_with_garbage
|
188
|
-
start_index = index
|
189
|
-
if node_cache[:multinomial_with_garbage].has_key?(index)
|
190
|
-
cached = node_cache[:multinomial_with_garbage][index]
|
191
|
-
if cached
|
192
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
193
|
-
@index = cached.interval.end
|
194
|
-
end
|
195
|
-
return cached
|
196
|
-
end
|
197
|
-
|
198
|
-
i0 = index
|
199
|
-
i1, s1 = index, []
|
200
|
-
r2 = _nt_genus
|
201
|
-
s1 << r2
|
202
|
-
if r2
|
203
|
-
r3 = _nt_space
|
204
|
-
s1 << r3
|
205
|
-
if r3
|
206
|
-
r4 = _nt_infragenus
|
207
|
-
s1 << r4
|
208
|
-
if r4
|
209
|
-
r5 = _nt_space
|
210
|
-
s1 << r5
|
211
|
-
if r5
|
212
|
-
r6 = _nt_species
|
213
|
-
s1 << r6
|
214
|
-
if r6
|
215
|
-
r7 = _nt_garbage
|
216
|
-
s1 << r7
|
217
|
-
end
|
218
|
-
end
|
219
|
-
end
|
220
|
-
end
|
221
|
-
end
|
222
|
-
if s1.last
|
223
|
-
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
224
|
-
r1.extend(MultinomialWithGarbage0)
|
225
|
-
r1.extend(MultinomialWithGarbage1)
|
226
|
-
else
|
227
|
-
@index = i1
|
228
|
-
r1 = nil
|
229
|
-
end
|
230
|
-
if r1
|
231
|
-
r0 = r1
|
232
|
-
else
|
233
|
-
i8, s8 = index, []
|
234
|
-
r9 = _nt_genus
|
235
|
-
s8 << r9
|
236
|
-
if r9
|
237
|
-
r10 = _nt_space
|
238
|
-
s8 << r10
|
239
|
-
if r10
|
240
|
-
r11 = _nt_infragenus
|
241
|
-
s8 << r11
|
242
|
-
if r11
|
243
|
-
r12 = _nt_garbage
|
244
|
-
s8 << r12
|
245
|
-
end
|
246
|
-
end
|
247
|
-
end
|
248
|
-
if s8.last
|
249
|
-
r8 = instantiate_node(SyntaxNode,input, i8...index, s8)
|
250
|
-
r8.extend(MultinomialWithGarbage2)
|
251
|
-
r8.extend(MultinomialWithGarbage3)
|
252
|
-
else
|
253
|
-
@index = i8
|
254
|
-
r8 = nil
|
255
|
-
end
|
256
|
-
if r8
|
257
|
-
r0 = r8
|
258
|
-
else
|
259
|
-
i13, s13 = index, []
|
260
|
-
r14 = _nt_genus
|
261
|
-
s13 << r14
|
262
|
-
if r14
|
263
|
-
r15 = _nt_space
|
264
|
-
s13 << r15
|
265
|
-
if r15
|
266
|
-
r16 = _nt_species
|
267
|
-
s13 << r16
|
268
|
-
if r16
|
269
|
-
r17 = _nt_garbage
|
270
|
-
s13 << r17
|
271
|
-
end
|
272
|
-
end
|
273
|
-
end
|
274
|
-
if s13.last
|
275
|
-
r13 = instantiate_node(SyntaxNode,input, i13...index, s13)
|
276
|
-
r13.extend(MultinomialWithGarbage4)
|
277
|
-
r13.extend(MultinomialWithGarbage5)
|
278
|
-
else
|
279
|
-
@index = i13
|
280
|
-
r13 = nil
|
281
|
-
end
|
282
|
-
if r13
|
283
|
-
r0 = r13
|
284
|
-
else
|
285
|
-
@index = i0
|
286
|
-
r0 = nil
|
287
|
-
end
|
288
|
-
end
|
289
|
-
end
|
290
|
-
|
291
|
-
node_cache[:multinomial_with_garbage][start_index] = r0
|
292
|
-
|
293
|
-
r0
|
294
|
-
end
|
295
|
-
|
296
|
-
module UninomialWithGarbage0
|
297
|
-
def a
|
298
|
-
elements[0]
|
299
|
-
end
|
300
|
-
|
301
|
-
def b
|
302
|
-
elements[1]
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
module UninomialWithGarbage1
|
307
|
-
def value
|
308
|
-
a.value
|
309
|
-
end
|
310
|
-
|
311
|
-
def canonical
|
312
|
-
a.canonical
|
313
|
-
end
|
314
|
-
|
315
|
-
def pos
|
316
|
-
a.pos
|
317
|
-
end
|
318
|
-
|
319
|
-
def details
|
320
|
-
{:uninomial => a.details[:uninomial]}
|
321
|
-
end
|
322
|
-
end
|
323
|
-
|
324
|
-
def _nt_uninomial_with_garbage
|
325
|
-
start_index = index
|
326
|
-
if node_cache[:uninomial_with_garbage].has_key?(index)
|
327
|
-
cached = node_cache[:uninomial_with_garbage][index]
|
328
|
-
if cached
|
329
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
330
|
-
@index = cached.interval.end
|
331
|
-
end
|
332
|
-
return cached
|
333
|
-
end
|
334
|
-
|
335
|
-
i0, s0 = index, []
|
336
|
-
r1 = _nt_uninomial_string
|
337
|
-
s0 << r1
|
338
|
-
if r1
|
339
|
-
r2 = _nt_garbage
|
340
|
-
s0 << r2
|
341
|
-
end
|
342
|
-
if s0.last
|
343
|
-
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
344
|
-
r0.extend(UninomialWithGarbage0)
|
345
|
-
r0.extend(UninomialWithGarbage1)
|
346
|
-
else
|
347
|
-
@index = i0
|
348
|
-
r0 = nil
|
349
|
-
end
|
350
|
-
|
351
|
-
node_cache[:uninomial_with_garbage][start_index] = r0
|
352
|
-
|
353
|
-
r0
|
354
|
-
end
|
355
|
-
|
356
|
-
module Garbage0
|
357
|
-
def space1
|
358
|
-
elements[0]
|
359
|
-
end
|
360
|
-
|
361
|
-
def space2
|
362
|
-
elements[2]
|
363
|
-
end
|
364
|
-
|
365
|
-
end
|
366
|
-
|
367
|
-
module Garbage1
|
368
|
-
def space_hard
|
369
|
-
elements[0]
|
370
|
-
end
|
371
|
-
|
372
|
-
end
|
373
|
-
|
374
|
-
def _nt_garbage
|
375
|
-
start_index = index
|
376
|
-
if node_cache[:garbage].has_key?(index)
|
377
|
-
cached = node_cache[:garbage][index]
|
378
|
-
if cached
|
379
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
380
|
-
@index = cached.interval.end
|
381
|
-
end
|
382
|
-
return cached
|
383
|
-
end
|
384
|
-
|
385
|
-
i0 = index
|
386
|
-
i1, s1 = index, []
|
387
|
-
r2 = _nt_space
|
388
|
-
s1 << r2
|
389
|
-
if r2
|
390
|
-
if has_terminal?('\G["\',.]', true, index)
|
391
|
-
r3 = true
|
392
|
-
@index += 1
|
393
|
-
else
|
394
|
-
r3 = nil
|
395
|
-
end
|
396
|
-
s1 << r3
|
397
|
-
if r3
|
398
|
-
r4 = _nt_space
|
399
|
-
s1 << r4
|
400
|
-
if r4
|
401
|
-
s5, i5 = [], index
|
402
|
-
loop do
|
403
|
-
if has_terminal?('\G[^щ]', true, index)
|
404
|
-
r6 = true
|
405
|
-
@index += 1
|
406
|
-
else
|
407
|
-
r6 = nil
|
408
|
-
end
|
409
|
-
if r6
|
410
|
-
s5 << r6
|
411
|
-
else
|
412
|
-
break
|
413
|
-
end
|
414
|
-
end
|
415
|
-
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
416
|
-
s1 << r5
|
417
|
-
end
|
418
|
-
end
|
419
|
-
end
|
420
|
-
if s1.last
|
421
|
-
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
422
|
-
r1.extend(Garbage0)
|
423
|
-
else
|
424
|
-
@index = i1
|
425
|
-
r1 = nil
|
426
|
-
end
|
427
|
-
if r1
|
428
|
-
r0 = r1
|
429
|
-
else
|
430
|
-
i7, s7 = index, []
|
431
|
-
r8 = _nt_space_hard
|
432
|
-
s7 << r8
|
433
|
-
if r8
|
434
|
-
s9, i9 = [], index
|
435
|
-
loop do
|
436
|
-
if has_terminal?('\G[^ш]', true, index)
|
437
|
-
r10 = true
|
438
|
-
@index += 1
|
439
|
-
else
|
440
|
-
r10 = nil
|
441
|
-
end
|
442
|
-
if r10
|
443
|
-
s9 << r10
|
444
|
-
else
|
445
|
-
break
|
446
|
-
end
|
447
|
-
end
|
448
|
-
if s9.empty?
|
449
|
-
@index = i9
|
450
|
-
r9 = nil
|
451
|
-
else
|
452
|
-
r9 = instantiate_node(SyntaxNode,input, i9...index, s9)
|
453
|
-
end
|
454
|
-
s7 << r9
|
455
|
-
end
|
456
|
-
if s7.last
|
457
|
-
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
458
|
-
r7.extend(Garbage1)
|
459
|
-
else
|
460
|
-
@index = i7
|
461
|
-
r7 = nil
|
462
|
-
end
|
463
|
-
if r7
|
464
|
-
r0 = r7
|
465
|
-
else
|
466
|
-
@index = i0
|
467
|
-
r0 = nil
|
468
|
-
end
|
469
|
-
end
|
470
|
-
|
471
|
-
node_cache[:garbage][start_index] = r0
|
472
|
-
|
473
|
-
r0
|
474
|
-
end
|
475
|
-
|
476
|
-
end
|
477
|
-
|
478
|
-
class ScientificNameCanonicalParser < Treetop::Runtime::CompiledParser
|
479
|
-
include ScientificNameCanonical
|
480
|
-
end
|
481
|
-
|