anystyle-parser 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.3.0'.freeze
3
+ VERSION = '0.4.0'.freeze
4
4
  end
5
5
  end
@@ -1,24 +1,30 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module Anystyle::Parser
4
- describe "Features" do
5
-
6
- describe "numbers" do
7
- let(:f) { Parser.feature[:numbers] }
8
-
9
- %w{ (1992) 1992 2011 1776 }.each do |year|
10
- it "returns :year for #{year.inspect}" do
11
- f.match(year).should == :year
12
- end
13
- end
14
-
15
- %w{ (1) (12) (123) }.each do |year|
16
- it "returns :year for #{year.inspect}" do
17
- f.match(year).should == :numeric
18
- end
19
- end
20
-
21
- end
22
-
23
- end
24
- end
4
+ describe "Features" do
5
+
6
+ describe "numbers" do
7
+ let(:f) { Parser.feature[:numbers] }
8
+
9
+ %w{ (1992) 1992 2011 1776 }.each do |year|
10
+ it "returns :year for #{year.inspect}" do
11
+ f.match(year).should == :year
12
+ end
13
+ end
14
+
15
+ %w{ (1) (12) (123) }.each do |year|
16
+ it "returns :year for #{year.inspect}" do
17
+ f.match(year).should == :numeric
18
+ end
19
+ end
20
+
21
+ ['pp', 'pp.', '23-4', '6124--19', '48 - 9', '19–27'].each do |page|
22
+ it "returns :page for #{page.inspect}" do
23
+ f.match(page).should == :page
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+ end
30
+ end
@@ -1,63 +1,84 @@
1
1
  module Anystyle
2
- module Parser
3
-
4
- describe "Normalizer" do
5
-
6
- describe "#tokenize_names" do
7
-
8
- it "tokenizes 'A B'" do
9
- Normalizer.instance.normalize_names('A B').should == 'B, A'
10
- end
11
-
12
- it "tokenizes 'A, B'" do
13
- Normalizer.instance.normalize_names('A, B').should == 'A, B'
14
- end
15
-
16
- # it "tokenizes 'A, jr., B'" do
17
- # Normalizer.instance.normalize_names('A, jr., B').should == 'A, jr., B'
18
- # end
19
- #
20
- # it "tokenizes 'A, B, jr.'" do
21
- # Normalizer.instance.normalize_names('A, B, jr.').should == 'A, B, jr.'
22
- # end
23
-
24
- it "tokenizes 'A, B, C, D'" do
25
- Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B and C, D'
26
- end
27
-
28
- it "tokenizes 'A, B, C'" do
29
- Normalizer.instance.normalize_names('A, B, C').should == 'A, B and C'
30
- end
31
-
32
- it "tokenizes 'Aa Bb, C.'" do
33
- Normalizer.instance.normalize_names('Aa Bb, C.').should == 'Aa Bb, C.'
34
- end
35
-
36
- it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
37
- Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C and G, E F'
38
- end
39
-
40
- [
41
- ['Poe, Edgar A.', 'Poe, Edgar A.'],
42
- ['Edgar A. Poe', 'Poe, Edgar A.'],
43
- ['Edgar A. Poe, Herman Melville', 'Poe, Edgar A. and Melville, Herman'],
44
- ['Poe, Edgar A., Melville, Herman', 'Poe, Edgar A. and Melville, Herman'],
45
- ['Aeschlimann Magnin, E.', 'Aeschlimann Magnin, E.']
46
- ].each do |name, normalized|
47
- it "tokenizes #{name.inspect}" do
48
- Normalizer.instance.normalize_names(name).should == normalized
49
- end
50
- end
51
-
52
- end
53
-
54
- describe 'date extraction' do
55
- it 'should extract the month and year from a string like (July 2009)' do
56
- Normalizer.instance.normalize_date(:date => '(July 2009).').should == { :year => 2009, :month => 7 }
57
- end
58
- end
59
-
60
- end
61
-
62
- end
63
- end
2
+ module Parser
3
+
4
+ describe "Normalizer" do
5
+ let(:n) { Normalizer.instance }
6
+
7
+ describe "#tokenize_names" do
8
+
9
+ it "tokenizes 'A B'" do
10
+ Normalizer.instance.normalize_names('A B').should == 'B, A'
11
+ end
12
+
13
+ it "tokenizes 'A, B'" do
14
+ Normalizer.instance.normalize_names('A, B').should == 'A, B'
15
+ end
16
+
17
+ # it "tokenizes 'A, jr., B'" do
18
+ # Normalizer.instance.normalize_names('A, jr., B').should == 'A, jr., B'
19
+ # end
20
+ #
21
+ # it "tokenizes 'A, B, jr.'" do
22
+ # Normalizer.instance.normalize_names('A, B, jr.').should == 'A, B, jr.'
23
+ # end
24
+
25
+ it "tokenizes 'A, B, C, D'" do
26
+ Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B and C, D'
27
+ end
28
+
29
+ it "tokenizes 'A, B, C'" do
30
+ Normalizer.instance.normalize_names('A, B, C').should == 'A, B and C'
31
+ end
32
+
33
+ it "tokenizes 'Aa Bb, C.'" do
34
+ Normalizer.instance.normalize_names('Aa Bb, C.').should == 'Aa Bb, C.'
35
+ end
36
+
37
+ it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
38
+ Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C and G, E F'
39
+ end
40
+
41
+ [
42
+ ['Poe, Edgar A.', 'Poe, Edgar A.'],
43
+ ['Edgar A. Poe', 'Poe, Edgar A.'],
44
+ ['Edgar A. Poe, Herman Melville', 'Poe, Edgar A. and Melville, Herman'],
45
+ ['Poe, Edgar A., Melville, Herman', 'Poe, Edgar A. and Melville, Herman'],
46
+ ['Aeschlimann Magnin, E.', 'Aeschlimann Magnin, E.']
47
+ ].each do |name, normalized|
48
+ it "tokenizes #{name.inspect}" do
49
+ Normalizer.instance.normalize_names(name).should == normalized
50
+ end
51
+ end
52
+
53
+ end
54
+
55
+ describe 'editors extraction' do
56
+ it 'recognizes editors in the author field' do
57
+ n.normalize_author(:author => 'D. Knuth (ed.)').should == { :editor => 'Knuth, D.' }
58
+ end
59
+ end
60
+
61
+ describe 'date extraction' do
62
+ it 'extracts month and year from a string like "(July 2009)"' do
63
+ h = Normalizer.instance.normalize_date(:date => '(July 2009)')
64
+ h[:year].should == 2009
65
+ h[:month].should == 7
66
+ h.should_not have_key(:date)
67
+ end
68
+
69
+ it 'extracts month and year from a string like "(1997 Sept.)"' do
70
+ h = Normalizer.instance.normalize_date(:date => '(1997 Sept.)')
71
+ h[:year].should == 1997
72
+ h[:month].should == 9
73
+ h.should_not have_key(:date)
74
+
75
+ h = Normalizer.instance.normalize_date(:date => '(1997 Okt.)')
76
+ h[:year].should == 1997
77
+ h[:month].should == 10
78
+ end
79
+ end
80
+
81
+ end
82
+
83
+ end
84
+ end
@@ -1,3 +1,5 @@
1
+ # -*- encoding: utf-8 -*-
2
+
1
3
  module Anystyle::Parser
2
4
  describe Parser do
3
5
 
@@ -82,18 +84,16 @@ module Anystyle::Parser
82
84
  subject.label('').should == []
83
85
  end
84
86
 
85
- it 'returns an empty array for an empty line' do
87
+ it 'returns an empty array for empty lines' do
86
88
  subject.label("\n").should == []
87
- subject.label("\n ").should == [[],[]]
88
- subject.label(" \n ").should == [[],[]]
89
- subject.label(" \n").should == [[]]
89
+ subject.label("\n ").should == []
90
+ subject.label(" \n ").should == []
91
+ subject.label(" \n").should == []
90
92
  end
91
93
 
92
94
  it 'does not fail for unrecognizable input' do
93
95
  lambda { subject.label("@misc{70213094902020,\n") }.should_not raise_error
94
96
  lambda { subject.label("doi = {DOI:10.1503/jpn.100140}\n}\n") }.should_not raise_error
95
-
96
- pending
97
97
  lambda { subject.label("\n doi ") }.should_not raise_error
98
98
  end
99
99
  end
@@ -117,8 +117,51 @@ module Anystyle::Parser
117
117
  it 'returns the label/token arrays for format "raw"' do
118
118
  subject.parse(citation, :raw)[0][0].should == [:author, 'Perec,']
119
119
  end
120
+
121
+ it 'returns the token in original order for format "raw"' do
122
+ subject.parse(citation, :raw)[0].map(&:last).join(' ').should == citation
123
+
124
+ difference = 'Derrida, J. (1967). L’écriture et la différence (1 éd.). Paris: Éditions du Seuil.'
125
+ subject.parse(difference, :raw)[0].map(&:last).join(' ').should == difference
126
+ end
120
127
  end
121
128
 
129
+ describe "#train" do
130
+ let(:dps) { File.open(fixture_path('train_dps.txt'), 'r:UTF-8').read.split(/\n/) }
131
+
132
+ describe "a pristine model" do
133
+ before(:each) { subject.train '', true }
134
+
135
+ it 'recognizes trained references' do
136
+ subject.learn dps[0]
137
+ subject.parse(strip_tags(dps[0]), :tags)[0].should == dps[0]
138
+ end
139
+
140
+ it 'recognizes trained references when learnt in one go' do
141
+ subject.learn dps
142
+
143
+ dps.each do |d|
144
+ subject.parse(strip_tags(d), :tags)[0].should == d
145
+ end
146
+ end
147
+
148
+ it 'recognizes trained references when learnt separately' do
149
+ pending
150
+
151
+ dps.each do |d|
152
+ subject.learn d
153
+ end
154
+
155
+ dps.each do |d|
156
+ subject.parse(strip_tags(d), :tags)[0].should == d
157
+ end
158
+ end
159
+ end
160
+
161
+ describe "the default model" do
162
+
163
+ end
164
+ end
122
165
 
123
166
  end
124
167
  end
@@ -0,0 +1,12 @@
1
+ <author>Williams, J.B., J.D. Shorthouse and R.E. Lee, Jr.</author> <date>2002.</date> <title>Extreme resistance to desiccation and microclimate-related differences in cold-hardiness of gall wasps (Hymenoptera; Cynipidae) overwintering on roses in southern Canada.</title> <journal>The Journal of Experimental Biology</journal> <volume>205:</volume> <pages>2115–2124.</pages>
2
+ <author>MACDONALD, S., & FENNIAK T.</author> <date>(2007).</date> <title>Understory plant communities of boreal mixedwood forests in western Canada: Natural patterns and response to variable-retention harvesting.</title> <journal>Forest Ecology and Management.</journal> <volume>242(1):</volume> <pages>34–48.</pages>
3
+ <author>Harris, P. and J.D. Shorthouse.</author> <date>1996.</date> <title>Effectiveness of gall inducers in weed biological control.</title> <journal>The Canadian Entomologist</journal> <volume>128:</volume> <pages>1021–1055.</pages>
4
+ <author>Williams, J.B., J.D. Shorthouse and R.E. Lee, Jr.</author> <date>2003.</date> <title>Deleterious effects of mild simulated overwintering temperatures on survival and potential fecundity of rose-galling Diplolepis wasps (Hymenoptera: Cynipidae).</title> <journal>Journal of Experimental Zoology</journal> <volume>298A:</volume> <pages>23–31.</pages>
5
+ <author>Shorthouse, J.D., H. Goulet and D.P. Shorthouse.</author> <date>2003.</date> <title>Notes on cynipid galls, ground beetles and ground-dwelling spiders collected at Fort Severn, Ontario.</title> <journal>Arctic</journal> <volume>56:</volume> <pages>159–167.</pages>
6
+ <author>Epling, C., Lewis H., & Ball F. M.</author> <date>(1960).</date> <title>The Breeding Group and Seed Storage: A Study in Population Dynamics.</title> <journal>Evolution.</journal> <volume>14,</volume> <pages>238-255.</pages>
7
+ <author>Bagatto, Giuseppe, Louise C. Paquette, and Joseph D. Shorthouse.</author> <date>1995.</date> <title>Influence of galls of Phanacis taraxaci on carbon partitioning within common dandelion, Taraxacum officinale.</title> <journal>Entomologia Experimentalis et Applicata</journal> <volume>79:</volume> <pages>111–117.</pages>
8
+ <author>Shorthouse, J.D.</author> <date>1993.</date> <title>Adaptations of gall wasps of the genus Diplolepis (Hymenoptera: Cynipidae) and the role of gall anatomy in cynipid systematics.</title> <journal>Memoirs of the Entomological Society of Canada</journal> <volume>165:</volume> <pages>139–163.</pages>
9
+ <author>Bagatto, G., and J.D. Shorthouse.</author> <date>2000.</date> <title>Evaluation of municipal solid waste (MSW) compost as a soil amendment for acidic, metalliferous mine tailings.</title> <journal>International Journal of Surface Mining, Reclamation and Environment</journal> <volume>14:</volume> <pages>205–214.</pages>
10
+ <author>Shorthouse, J.D., D. Wool, and A. Raman</author> <date>2005.</date> <title>Gall-inducing insects - nature's most sophisticated herbivores.</title> <journal>Basic and Applied Ecology</journal> <volume>6:</volume> <pages>407–411.</pages>
11
+ <author>Sopow, S.L., J.D. Shorthouse, W. Strong, and D.T. Quiring.</author> <date>2003.</date> <title>Evidence for long-distance, chemical gall induction by an insect.</title> <journal>Ecology Letters</journal> <volume>6:</volume> <pages>102–105.</pages>
12
+ <author>Shorthouse, J.D. and J.J. Leggo.</author> <date>2002.</date> <title>Immature stages of the galler Diplolepis triforma (Hymenoptera: Cynipidae) with comments on the role of its prepupa.</title> <journal>The Canadian Entomologist</journal> <volume>134</volume> <pages>433–446.</pages>
data/spec/spec_helper.rb CHANGED
@@ -22,10 +22,22 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
22
22
  require 'rspec'
23
23
  require 'anystyle/parser'
24
24
 
25
- # Requires supporting files with custom matchers and macros, etc,
26
- # in ./support/ and its subdirectories.
27
- Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
25
+ module Fixtures
26
+ PATH = File.expand_path('../fixtures', __FILE__)
27
+
28
+ Dir[File.join(PATH, '*.rb')].each do |fixture|
29
+ require fixture
30
+ end
31
+
32
+ def fixture_path(path)
33
+ File.join(PATH, path)
34
+ end
35
+ end
28
36
 
29
37
  RSpec.configure do |config|
38
+ config.include Fixtures
30
39
 
40
+ def strip_tags(string)
41
+ string.gsub(/<[^>]+>/, '')
42
+ end
31
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bibtex-ruby
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0.0'
33
+ version: '0.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0.0'
40
+ version: '0.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: namae
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -87,6 +87,7 @@ files:
87
87
  - spec/anystyle/parser/normalizer_spec.rb
88
88
  - spec/anystyle/parser/parser_spec.rb
89
89
  - spec/benchmark.rb
90
+ - spec/fixtures/train_dps.txt
90
91
  - spec/profile.rb
91
92
  - spec/spec_helper.rb
92
93
  homepage: http://github.com/inukshuk/anystyle-parser
@@ -115,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
115
116
  version: '0'
116
117
  requirements: []
117
118
  rubyforge_project:
118
- rubygems_version: 2.2.1
119
+ rubygems_version: 2.2.2
119
120
  signing_key:
120
121
  specification_version: 4
121
122
  summary: Smart and fast academic bibliography parser.
@@ -127,6 +128,7 @@ test_files:
127
128
  - spec/anystyle/parser/normalizer_spec.rb
128
129
  - spec/anystyle/parser/parser_spec.rb
129
130
  - spec/benchmark.rb
131
+ - spec/fixtures/train_dps.txt
130
132
  - spec/profile.rb
131
133
  - spec/spec_helper.rb
132
134
  has_rdoc: