anystyle-parser 0.6.2 → 0.6.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/anystyle-parser.gemspec +1 -1
- data/lib/anystyle/parser/parser.rb +22 -5
- data/lib/anystyle/parser/support/anystyle.mod +6608 -8965
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/normalizer_spec.rb +15 -8
- data/spec/anystyle/parser/parser_spec.rb +3 -0
- data/spec/fixtures/train_dps.txt +2 -2
- data/spec/spec_helper.rb +1 -1
- metadata +8 -2
@@ -1,3 +1,5 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
1
3
|
module Anystyle
|
2
4
|
module Parser
|
3
5
|
|
@@ -14,13 +16,13 @@ module Anystyle
|
|
14
16
|
Normalizer.instance.normalize_names('A, B').should == 'A, B.'
|
15
17
|
end
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
it "tokenizes 'A, jr., Bbb'" do
|
20
|
+
Normalizer.instance.normalize_names('A, jr., B').should == 'A, jr., B.'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "tokenizes 'A, B, jr.'" do
|
24
|
+
Normalizer.instance.normalize_names('A, B, jr.').should == 'A, jr., B.'
|
25
|
+
end
|
24
26
|
|
25
27
|
it "tokenizes 'A, B, C, D'" do
|
26
28
|
Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B. and C, D.'
|
@@ -34,8 +36,13 @@ module Anystyle
|
|
34
36
|
Normalizer.instance.normalize_names('Aa Bb, C.').should == 'Aa Bb, C.'
|
35
37
|
end
|
36
38
|
|
39
|
+
it "tokenizes 'Plath, L.C., Asgaard, G., ... Botros, N.'" do
|
40
|
+
Normalizer.instance.normalize_names('Plath, L.C., Asgaard, G., ... Botros, N.').should == 'Plath, L.C. and Asgaard, G. and Botros, N.'
|
41
|
+
Normalizer.instance.normalize_names('Plath, L.C., Asgaard, G., … Botros, N.').should == 'Plath, L.C. and Asgaard, G. and Botros, N.'
|
42
|
+
end
|
43
|
+
|
37
44
|
it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
|
38
|
-
Normalizer.instance.normalize_names('Aa Bb,
|
45
|
+
Normalizer.instance.normalize_names('Aa Bb, Cc Dd, and E F G').should == 'Bb, Aa and Dd, Cc and G, E. F.'
|
39
46
|
end
|
40
47
|
|
41
48
|
[
|
@@ -63,6 +63,9 @@ module Anystyle::Parser
|
|
63
63
|
subject.prepare('hello, world!', true)[0].map { |t| t[/\S+$/] }.should == %w{ unknown unknown }
|
64
64
|
end
|
65
65
|
|
66
|
+
it 'converts xml entitites' do
|
67
|
+
subject.prepare("<note>>> & foo</note>", true)[0].map { |t| t[/\S+/] }.should == %w{ >> & foo }
|
68
|
+
end
|
66
69
|
end
|
67
70
|
end
|
68
71
|
|
data/spec/fixtures/train_dps.txt
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
<author>Williams, J.B., J.D. Shorthouse and R.E. Lee, Jr.</author> <date>2002.</date> <title>Extreme resistance to desiccation and microclimate-related differences in cold-hardiness of gall wasps (Hymenoptera; Cynipidae) overwintering on roses in southern Canada.</title> <journal>The Journal of Experimental Biology</journal> <volume>205:</volume> <pages>2115–2124.</pages>
|
2
|
-
<author>MACDONALD, S., & FENNIAK T.</author> <date>(2007).</date> <title>Understory plant communities of boreal mixedwood forests in western Canada: Natural patterns and response to variable-retention harvesting.</title> <journal>Forest Ecology and Management.</journal> <volume>242(1):</volume> <pages>34–48.</pages>
|
2
|
+
<author>MACDONALD, S., & FENNIAK T.</author> <date>(2007).</date> <title>Understory plant communities of boreal mixedwood forests in western Canada: Natural patterns and response to variable-retention harvesting.</title> <journal>Forest Ecology and Management.</journal> <volume>242(1):</volume> <pages>34–48.</pages>
|
3
3
|
<author>Harris, P. and J.D. Shorthouse.</author> <date>1996.</date> <title>Effectiveness of gall inducers in weed biological control.</title> <journal>The Canadian Entomologist</journal> <volume>128:</volume> <pages>1021–1055.</pages>
|
4
4
|
<author>Williams, J.B., J.D. Shorthouse and R.E. Lee, Jr.</author> <date>2003.</date> <title>Deleterious effects of mild simulated overwintering temperatures on survival and potential fecundity of rose-galling Diplolepis wasps (Hymenoptera: Cynipidae).</title> <journal>Journal of Experimental Zoology</journal> <volume>298A:</volume> <pages>23–31.</pages>
|
5
5
|
<author>Shorthouse, J.D., H. Goulet and D.P. Shorthouse.</author> <date>2003.</date> <title>Notes on cynipid galls, ground beetles and ground-dwelling spiders collected at Fort Severn, Ontario.</title> <journal>Arctic</journal> <volume>56:</volume> <pages>159–167.</pages>
|
6
|
-
<author>Epling, C., Lewis H., & Ball F. M.</author> <date>(1960).</date> <title>The Breeding Group and Seed Storage: A Study in Population Dynamics.</title> <journal>Evolution.</journal> <volume>14,</volume> <pages>238-255.</pages>
|
6
|
+
<author>Epling, C., Lewis H., & Ball F. M.</author> <date>(1960).</date> <title>The Breeding Group and Seed Storage: A Study in Population Dynamics.</title> <journal>Evolution.</journal> <volume>14,</volume> <pages>238-255.</pages>
|
7
7
|
<author>Bagatto, Giuseppe, Louise C. Paquette, and Joseph D. Shorthouse.</author> <date>1995.</date> <title>Influence of galls of Phanacis taraxaci on carbon partitioning within common dandelion, Taraxacum officinale.</title> <journal>Entomologia Experimentalis et Applicata</journal> <volume>79:</volume> <pages>111–117.</pages>
|
8
8
|
<author>Shorthouse, J.D.</author> <date>1993.</date> <title>Adaptations of gall wasps of the genus Diplolepis (Hymenoptera: Cynipidae) and the role of gall anatomy in cynipid systematics.</title> <journal>Memoirs of the Entomological Society of Canada</journal> <volume>165:</volume> <pages>139–163.</pages>
|
9
9
|
<author>Bagatto, G., and J.D. Shorthouse.</author> <date>2000.</date> <title>Evaluation of municipal solid waste (MSW) compost as a soil amendment for acidic, metalliferous mine tailings.</title> <journal>International Journal of Surface Mining, Reclamation and Environment</journal> <volume>14:</volume> <pages>205–214.</pages>
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anystyle-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bibtex-ruby
|
@@ -65,6 +65,9 @@ dependencies:
|
|
65
65
|
- - ~>
|
66
66
|
- !ruby/object:Gem::Version
|
67
67
|
version: '0.8'
|
68
|
+
- - '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: 0.8.6
|
68
71
|
type: :runtime
|
69
72
|
prerelease: false
|
70
73
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -72,6 +75,9 @@ dependencies:
|
|
72
75
|
- - ~>
|
73
76
|
- !ruby/object:Gem::Version
|
74
77
|
version: '0.8'
|
78
|
+
- - '>='
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: 0.8.6
|
75
81
|
description: A sophisticated parser for academic reference lists and bibliographies
|
76
82
|
based on machine learning algorithms using conditional random fields.
|
77
83
|
email:
|