anystyle-parser 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -29,7 +29,7 @@ recommended to install Kyoto Cabinet and the `kyotocabinet-ruby` gem.
29
29
  The database file will be created the first time you access the dictionary;
30
30
  note that you will need write permissions in the directory where the file
31
31
  is to be created. You can change the Dictionary's default path in the
32
- Dictrionary's options:
32
+ Dictionary's options:
33
33
 
34
34
  Anystyle::Parser::Dictionary.instance.options[:path]
35
35
 
@@ -72,7 +72,17 @@ The following irb sessions illustrates some parser goodness:
72
72
  => "Jorge"
73
73
  > b[0].author.to_s
74
74
  => "Liu, Dong C. and Nocedal, Jorge"
75
-
75
+ > puts Anystyle.parse('Auster, Paul. The Art of Hunger. Expanded. New York: Penguin, 1997.', :bibtex).to_s
76
+ @book{2162008820,
77
+ author = {Auster, Paul},
78
+ title = {The Art of Hunger},
79
+ location = {New York},
80
+ publisher = {Penguin},
81
+ edition = {Expanded},
82
+ year = {1997}
83
+ }
84
+ => nil
85
+
76
86
  ### Unhappy with the results?
77
87
 
78
88
  Citation references come in many forms, so, inevitably, you will find data
@@ -117,6 +127,8 @@ data again:
117
127
  > Anystyle.parse 'John Lafferty, Andrew McCallum, and Fernando Pereira. 2001. Conditional random fields: probabilistic models for segmenting and labeling sequence data. In Proceedings of the International Conference on Machine Learning, pages 282-289. Morgan Kaufmann, San Francisco, CA.'
118
128
  => [{:author=>"John Lafferty and Andrew McCallum and Fernando Pereira", :title=>"Conditional random fields: probabilistic models for segmenting and labeling sequence data", :booktitle=>"Proceedings of the International Conference on Machine Learning", :pages=>"282--289", :publisher=>"Morgan Kaufmann", :location=>"San Francisco, CA", :year=>2001, :type=>:inproceedings}]
119
129
 
130
+ If you want to make Anystyle-Parser smarter, please consider sending us your
131
+ tagged references (see below).
120
132
 
121
133
  Contributing
122
134
  ------------
@@ -132,6 +144,10 @@ If you've found a bug or have a question, please open an issue on the
132
144
  Or, for extra credit, clone the Anystyle-Parser repository, write a failing
133
145
  example, fix the bug and submit a pull request.
134
146
 
147
+ If you want to contribute tagged references, please either add them to
148
+ `resources/train.txt` or create a new file in the `resources` directory
149
+ and open a pull request on GitHub.
150
+
135
151
 
136
152
  License
137
153
  -------
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ lib = File.expand_path('../lib/', __FILE__)
2
+ $:.unshift lib unless $:.include?(lib)
3
+
4
+ require 'rake/clean'
5
+
6
+ require 'anystyle/parser/version'
7
+
8
+ task :build => [:clean] do
9
+ system 'gem build anystyle-parser.gemspec'
10
+ end
11
+
12
+ task :release => [:build] do
13
+ system "git tag #{Anystyle::Parser::VERSION}"
14
+ system "gem push anystyle-parser-#{Anystyle::Parser::VERSION}.gem"
15
+ end
16
+
17
+ CLEAN.include('*.gem')
18
+ CLEAN.include('*.rbc')
@@ -136,7 +136,7 @@ module Anystyle
136
136
  s, n, ns, cc = StringScanner.new(names), '', [], 0
137
137
  until s.eos?
138
138
  case
139
- when s.scan(/,?\s*and\b|&/)
139
+ when s.scan(/,?\s*(and\b|&)/)
140
140
  ns << n
141
141
  n, cc = '', 0
142
142
  when s.scan(/\s+/)
@@ -144,14 +144,16 @@ module Anystyle
144
144
  when s.scan(/,?\s*(jr|sr|ph\.?d|m\.?d|esq)\.?/i)
145
145
  n << s.matched
146
146
  when s.scan(/,/)
147
- if cc > 0 || n =~ /\w\w+\s+\w\w+/
147
+ if cc > 0 || (n =~ /\S{2,}\s+\S{2,}/ && s.rest !~ /^\s*\w+(\.|,|$)/)
148
148
  ns << n
149
149
  n, cc = '', 0
150
150
  else
151
151
  n << s.matched
152
152
  cc += 1
153
153
  end
154
- when s.scan(/\w+/), s.scan(/./)
154
+ when s.scan(/\w+/)
155
+ n << s.matched
156
+ when s.scan(/./)
155
157
  n << s.matched
156
158
  end
157
159
  end
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.0.1'.freeze
3
+ VERSION = '0.0.2'.freeze
4
4
  end
5
5
  end
@@ -29,6 +29,26 @@ module Anystyle
29
29
  Normalizer.instance.tokenize_names('A, B, C').should == ['A, B', ' C']
30
30
  end
31
31
 
32
+ it "tokenizes 'Aa Bb, C.'" do
33
+ Normalizer.instance.tokenize_names('Aa Bb, C.').should == ['Aa Bb, C.']
34
+ end
35
+
36
+ it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
37
+ Normalizer.instance.tokenize_names('Aa Bb, C D, and E F G').should == ['Aa Bb', ' C D', ' E F G']
38
+ end
39
+
40
+ [
41
+ ['Poe, Edgar A.', ['Poe, Edgar A.']],
42
+ ['Edgar A. Poe', ['Edgar A. Poe']],
43
+ ['Edgar A. Poe, Herman Melville', ['Edgar A. Poe', ' Herman Melville']],
44
+ ['Poe, Edgar A., Melville, Herman', ['Poe, Edgar A.', ' Melville, Herman']],
45
+ ['Aeschlimann Magnin, E.', ['Aeschlimann Magnin, E.']]
46
+ ].each do |name, tokens|
47
+ it "tokenizes #{name.inspect}" do
48
+ Normalizer.instance.tokenize_names(name).should == tokens
49
+ end
50
+ end
51
+
32
52
  end
33
53
  end
34
54
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-05 00:00:00.000000000Z
12
+ date: 2011-09-06 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bibtex-ruby
16
- requirement: &2152150220 !ruby/object:Gem::Requirement
16
+ requirement: &2153531180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2152150220
24
+ version_requirements: *2153531180
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: wapiti
27
- requirement: &2152110260 !ruby/object:Gem::Requirement
27
+ requirement: &2153529340 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2152110260
35
+ version_requirements: *2153529340
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rake
38
- requirement: &2152107240 !ruby/object:Gem::Requirement
38
+ requirement: &2153528780 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.9'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2152107240
46
+ version_requirements: *2153528780
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: racc
49
- requirement: &2152096280 !ruby/object:Gem::Requirement
49
+ requirement: &2153528020 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '1.4'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2152096280
57
+ version_requirements: *2153528020
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cucumber
60
- requirement: &2152093880 !ruby/object:Gem::Requirement
60
+ requirement: &2153526200 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2152093880
68
+ version_requirements: *2153526200
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
- requirement: &2152091560 !ruby/object:Gem::Requirement
71
+ requirement: &2153524800 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '2.6'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2152091560
79
+ version_requirements: *2153524800
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: ZenTest
82
- requirement: &2152088380 !ruby/object:Gem::Requirement
82
+ requirement: &2153523460 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '4.6'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2152088380
90
+ version_requirements: *2153523460
91
91
  description: A sophisticated parser for academic references based on conditional random
92
92
  fields.
93
93
  email:
@@ -105,6 +105,7 @@ files:
105
105
  - HISTORY.md
106
106
  - LICENSE
107
107
  - README.md
108
+ - Rakefile
108
109
  - anystyle-parser.gemspec
109
110
  - features/step_definitions/parser_steps.rb
110
111
  - features/support/env.rb