anystyle-parser 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/anystyle-parser.gemspec +1 -1
- data/lib/anystyle/parser/parser.rb +22 -5
- data/lib/anystyle/parser/support/anystyle.mod +6608 -8965
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/normalizer_spec.rb +15 -8
- data/spec/anystyle/parser/parser_spec.rb +3 -0
- data/spec/fixtures/train_dps.txt +2 -2
- data/spec/spec_helper.rb +1 -1
- metadata +8 -2
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
|
|
1
3
|
module Anystyle
|
|
2
4
|
module Parser
|
|
3
5
|
|
|
@@ -14,13 +16,13 @@ module Anystyle
|
|
|
14
16
|
Normalizer.instance.normalize_names('A, B').should == 'A, B.'
|
|
15
17
|
end
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
19
|
+
it "tokenizes 'A, jr., Bbb'" do
|
|
20
|
+
Normalizer.instance.normalize_names('A, jr., B').should == 'A, jr., B.'
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "tokenizes 'A, B, jr.'" do
|
|
24
|
+
Normalizer.instance.normalize_names('A, B, jr.').should == 'A, jr., B.'
|
|
25
|
+
end
|
|
24
26
|
|
|
25
27
|
it "tokenizes 'A, B, C, D'" do
|
|
26
28
|
Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B. and C, D.'
|
|
@@ -34,8 +36,13 @@ module Anystyle
|
|
|
34
36
|
Normalizer.instance.normalize_names('Aa Bb, C.').should == 'Aa Bb, C.'
|
|
35
37
|
end
|
|
36
38
|
|
|
39
|
+
it "tokenizes 'Plath, L.C., Asgaard, G., ... Botros, N.'" do
|
|
40
|
+
Normalizer.instance.normalize_names('Plath, L.C., Asgaard, G., ... Botros, N.').should == 'Plath, L.C. and Asgaard, G. and Botros, N.'
|
|
41
|
+
Normalizer.instance.normalize_names('Plath, L.C., Asgaard, G., … Botros, N.').should == 'Plath, L.C. and Asgaard, G. and Botros, N.'
|
|
42
|
+
end
|
|
43
|
+
|
|
37
44
|
it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
|
|
38
|
-
Normalizer.instance.normalize_names('Aa Bb,
|
|
45
|
+
Normalizer.instance.normalize_names('Aa Bb, Cc Dd, and E F G').should == 'Bb, Aa and Dd, Cc and G, E. F.'
|
|
39
46
|
end
|
|
40
47
|
|
|
41
48
|
[
|
|
@@ -63,6 +63,9 @@ module Anystyle::Parser
|
|
|
63
63
|
subject.prepare('hello, world!', true)[0].map { |t| t[/\S+$/] }.should == %w{ unknown unknown }
|
|
64
64
|
end
|
|
65
65
|
|
|
66
|
+
it 'converts xml entitites' do
|
|
67
|
+
subject.prepare("<note>>> & foo</note>", true)[0].map { |t| t[/\S+/] }.should == %w{ >> & foo }
|
|
68
|
+
end
|
|
66
69
|
end
|
|
67
70
|
end
|
|
68
71
|
|
data/spec/fixtures/train_dps.txt
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
<author>Williams, J.B., J.D. Shorthouse and R.E. Lee, Jr.</author> <date>2002.</date> <title>Extreme resistance to desiccation and microclimate-related differences in cold-hardiness of gall wasps (Hymenoptera; Cynipidae) overwintering on roses in southern Canada.</title> <journal>The Journal of Experimental Biology</journal> <volume>205:</volume> <pages>2115–2124.</pages>
|
|
2
|
-
<author>MACDONALD, S., & FENNIAK T.</author> <date>(2007).</date> <title>Understory plant communities of boreal mixedwood forests in western Canada: Natural patterns and response to variable-retention harvesting.</title> <journal>Forest Ecology and Management.</journal> <volume>242(1):</volume> <pages>34–48.</pages>
|
|
2
|
+
<author>MACDONALD, S., & FENNIAK T.</author> <date>(2007).</date> <title>Understory plant communities of boreal mixedwood forests in western Canada: Natural patterns and response to variable-retention harvesting.</title> <journal>Forest Ecology and Management.</journal> <volume>242(1):</volume> <pages>34–48.</pages>
|
|
3
3
|
<author>Harris, P. and J.D. Shorthouse.</author> <date>1996.</date> <title>Effectiveness of gall inducers in weed biological control.</title> <journal>The Canadian Entomologist</journal> <volume>128:</volume> <pages>1021–1055.</pages>
|
|
4
4
|
<author>Williams, J.B., J.D. Shorthouse and R.E. Lee, Jr.</author> <date>2003.</date> <title>Deleterious effects of mild simulated overwintering temperatures on survival and potential fecundity of rose-galling Diplolepis wasps (Hymenoptera: Cynipidae).</title> <journal>Journal of Experimental Zoology</journal> <volume>298A:</volume> <pages>23–31.</pages>
|
|
5
5
|
<author>Shorthouse, J.D., H. Goulet and D.P. Shorthouse.</author> <date>2003.</date> <title>Notes on cynipid galls, ground beetles and ground-dwelling spiders collected at Fort Severn, Ontario.</title> <journal>Arctic</journal> <volume>56:</volume> <pages>159–167.</pages>
|
|
6
|
-
<author>Epling, C., Lewis H., & Ball F. M.</author> <date>(1960).</date> <title>The Breeding Group and Seed Storage: A Study in Population Dynamics.</title> <journal>Evolution.</journal> <volume>14,</volume> <pages>238-255.</pages>
|
|
6
|
+
<author>Epling, C., Lewis H., & Ball F. M.</author> <date>(1960).</date> <title>The Breeding Group and Seed Storage: A Study in Population Dynamics.</title> <journal>Evolution.</journal> <volume>14,</volume> <pages>238-255.</pages>
|
|
7
7
|
<author>Bagatto, Giuseppe, Louise C. Paquette, and Joseph D. Shorthouse.</author> <date>1995.</date> <title>Influence of galls of Phanacis taraxaci on carbon partitioning within common dandelion, Taraxacum officinale.</title> <journal>Entomologia Experimentalis et Applicata</journal> <volume>79:</volume> <pages>111–117.</pages>
|
|
8
8
|
<author>Shorthouse, J.D.</author> <date>1993.</date> <title>Adaptations of gall wasps of the genus Diplolepis (Hymenoptera: Cynipidae) and the role of gall anatomy in cynipid systematics.</title> <journal>Memoirs of the Entomological Society of Canada</journal> <volume>165:</volume> <pages>139–163.</pages>
|
|
9
9
|
<author>Bagatto, G., and J.D. Shorthouse.</author> <date>2000.</date> <title>Evaluation of municipal solid waste (MSW) compost as a soil amendment for acidic, metalliferous mine tailings.</title> <journal>International Journal of Surface Mining, Reclamation and Environment</journal> <volume>14:</volume> <pages>205–214.</pages>
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: anystyle-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.6.
|
|
4
|
+
version: 0.6.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sylvester Keil
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-
|
|
11
|
+
date: 2014-05-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bibtex-ruby
|
|
@@ -65,6 +65,9 @@ dependencies:
|
|
|
65
65
|
- - ~>
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
67
|
version: '0.8'
|
|
68
|
+
- - '>='
|
|
69
|
+
- !ruby/object:Gem::Version
|
|
70
|
+
version: 0.8.6
|
|
68
71
|
type: :runtime
|
|
69
72
|
prerelease: false
|
|
70
73
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -72,6 +75,9 @@ dependencies:
|
|
|
72
75
|
- - ~>
|
|
73
76
|
- !ruby/object:Gem::Version
|
|
74
77
|
version: '0.8'
|
|
78
|
+
- - '>='
|
|
79
|
+
- !ruby/object:Gem::Version
|
|
80
|
+
version: 0.8.6
|
|
75
81
|
description: A sophisticated parser for academic reference lists and bibliographies
|
|
76
82
|
based on machine learning algorithms using conditional random fields.
|
|
77
83
|
email:
|