anystyle-parser 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -29,7 +29,7 @@ recommended to install Kyoto Cabinet and the `kyotocabinet-ruby` gem.
29
29
  The database file will be created the first time you access the dictionary;
30
30
  note that you will need write permissions in the directory where the file
31
31
  is to be created. You can change the Dictionary's default path in the
32
- Dictrionary's options:
32
+ Dictionary's options:
33
33
 
34
34
  Anystyle::Parser::Dictionary.instance.options[:path]
35
35
 
@@ -72,7 +72,17 @@ The following irb sessions illustrates some parser goodness:
72
72
  => "Jorge"
73
73
  > b[0].author.to_s
74
74
  => "Liu, Dong C. and Nocedal, Jorge"
75
-
75
+ > puts Anystyle.parse('Auster, Paul. The Art of Hunger. Expanded. New York: Penguin, 1997.', :bibtex).to_s
76
+ @book{2162008820,
77
+ author = {Auster, Paul},
78
+ title = {The Art of Hunger},
79
+ location = {New York},
80
+ publisher = {Penguin},
81
+ edition = {Expanded},
82
+ year = {1997}
83
+ }
84
+ => nil
85
+
76
86
  ### Unhappy with the results?
77
87
 
78
88
  Citation references come in many forms, so, inevitably, you will find data
@@ -117,6 +127,8 @@ data again:
117
127
  > Anystyle.parse 'John Lafferty, Andrew McCallum, and Fernando Pereira. 2001. Conditional random fields: probabilistic models for segmenting and labeling sequence data. In Proceedings of the International Conference on Machine Learning, pages 282-289. Morgan Kaufmann, San Francisco, CA.'
118
128
  => [{:author=>"John Lafferty and Andrew McCallum and Fernando Pereira", :title=>"Conditional random fields: probabilistic models for segmenting and labeling sequence data", :booktitle=>"Proceedings of the International Conference on Machine Learning", :pages=>"282--289", :publisher=>"Morgan Kaufmann", :location=>"San Francisco, CA", :year=>2001, :type=>:inproceedings}]
119
129
 
130
+ If you want to make Anystyle-Parser smarter, please consider sending us your
131
+ tagged references (see below).
120
132
 
121
133
  Contributing
122
134
  ------------
@@ -132,6 +144,10 @@ If you've found a bug or have a question, please open an issue on the
132
144
  Or, for extra credit, clone the Anystyle-Parser repository, write a failing
133
145
  example, fix the bug and submit a pull request.
134
146
 
147
+ If you want to contribute tagged references, please either add them to
148
+ `resources/train.txt` or create a new file in the `resources` directory
149
+ and open a pull request on GitHub.
150
+
135
151
 
136
152
  License
137
153
  -------
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ lib = File.expand_path('../lib/', __FILE__)
2
+ $:.unshift lib unless $:.include?(lib)
3
+
4
+ require 'rake/clean'
5
+
6
+ require 'anystyle/parser/version'
7
+
8
+ task :build => [:clean] do
9
+ system 'gem build anystyle-parser.gemspec'
10
+ end
11
+
12
+ task :release => [:build] do
13
+ system "git tag #{Anystyle::Parser::VERSION}"
14
+ system "gem push anystyle-parser-#{Anystyle::Parser::VERSION}.gem"
15
+ end
16
+
17
+ CLEAN.include('*.gem')
18
+ CLEAN.include('*.rbc')
@@ -136,7 +136,7 @@ module Anystyle
136
136
  s, n, ns, cc = StringScanner.new(names), '', [], 0
137
137
  until s.eos?
138
138
  case
139
- when s.scan(/,?\s*and\b|&/)
139
+ when s.scan(/,?\s*(and\b|&)/)
140
140
  ns << n
141
141
  n, cc = '', 0
142
142
  when s.scan(/\s+/)
@@ -144,14 +144,16 @@ module Anystyle
144
144
  when s.scan(/,?\s*(jr|sr|ph\.?d|m\.?d|esq)\.?/i)
145
145
  n << s.matched
146
146
  when s.scan(/,/)
147
- if cc > 0 || n =~ /\w\w+\s+\w\w+/
147
+ if cc > 0 || (n =~ /\S{2,}\s+\S{2,}/ && s.rest !~ /^\s*\w+(\.|,|$)/)
148
148
  ns << n
149
149
  n, cc = '', 0
150
150
  else
151
151
  n << s.matched
152
152
  cc += 1
153
153
  end
154
- when s.scan(/\w+/), s.scan(/./)
154
+ when s.scan(/\w+/)
155
+ n << s.matched
156
+ when s.scan(/./)
155
157
  n << s.matched
156
158
  end
157
159
  end
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.0.1'.freeze
3
+ VERSION = '0.0.2'.freeze
4
4
  end
5
5
  end
@@ -29,6 +29,26 @@ module Anystyle
29
29
  Normalizer.instance.tokenize_names('A, B, C').should == ['A, B', ' C']
30
30
  end
31
31
 
32
+ it "tokenizes 'Aa Bb, C.'" do
33
+ Normalizer.instance.tokenize_names('Aa Bb, C.').should == ['Aa Bb, C.']
34
+ end
35
+
36
+ it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
37
+ Normalizer.instance.tokenize_names('Aa Bb, C D, and E F G').should == ['Aa Bb', ' C D', ' E F G']
38
+ end
39
+
40
+ [
41
+ ['Poe, Edgar A.', ['Poe, Edgar A.']],
42
+ ['Edgar A. Poe', ['Edgar A. Poe']],
43
+ ['Edgar A. Poe, Herman Melville', ['Edgar A. Poe', ' Herman Melville']],
44
+ ['Poe, Edgar A., Melville, Herman', ['Poe, Edgar A.', ' Melville, Herman']],
45
+ ['Aeschlimann Magnin, E.', ['Aeschlimann Magnin, E.']]
46
+ ].each do |name, tokens|
47
+ it "tokenizes #{name.inspect}" do
48
+ Normalizer.instance.tokenize_names(name).should == tokens
49
+ end
50
+ end
51
+
32
52
  end
33
53
  end
34
54
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-05 00:00:00.000000000Z
12
+ date: 2011-09-06 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bibtex-ruby
16
- requirement: &2152150220 !ruby/object:Gem::Requirement
16
+ requirement: &2153531180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2152150220
24
+ version_requirements: *2153531180
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: wapiti
27
- requirement: &2152110260 !ruby/object:Gem::Requirement
27
+ requirement: &2153529340 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2152110260
35
+ version_requirements: *2153529340
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rake
38
- requirement: &2152107240 !ruby/object:Gem::Requirement
38
+ requirement: &2153528780 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.9'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2152107240
46
+ version_requirements: *2153528780
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: racc
49
- requirement: &2152096280 !ruby/object:Gem::Requirement
49
+ requirement: &2153528020 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '1.4'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2152096280
57
+ version_requirements: *2153528020
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cucumber
60
- requirement: &2152093880 !ruby/object:Gem::Requirement
60
+ requirement: &2153526200 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2152093880
68
+ version_requirements: *2153526200
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
- requirement: &2152091560 !ruby/object:Gem::Requirement
71
+ requirement: &2153524800 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '2.6'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2152091560
79
+ version_requirements: *2153524800
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: ZenTest
82
- requirement: &2152088380 !ruby/object:Gem::Requirement
82
+ requirement: &2153523460 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '4.6'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2152088380
90
+ version_requirements: *2153523460
91
91
  description: A sophisticated parser for academic references based on conditional random
92
92
  fields.
93
93
  email:
@@ -105,6 +105,7 @@ files:
105
105
  - HISTORY.md
106
106
  - LICENSE
107
107
  - README.md
108
+ - Rakefile
108
109
  - anystyle-parser.gemspec
109
110
  - features/step_definitions/parser_steps.rb
110
111
  - features/support/env.rb