anystyle-parser 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 575f1accfff0d04c318a023de9bd4d1f8720ff98
4
- data.tar.gz: aa53148f49bb5e26947375016cda44f9c0b97f6d
3
+ metadata.gz: 0c9fa55f1ed5e371c241610ef5d77ae1f9696131
4
+ data.tar.gz: 7e6e3d024694dc1e8947bca12a9eb248f356c3cd
5
5
  SHA512:
6
- metadata.gz: a962499569b1dfbf853392b447169498f4e7ad537d1ca7ef3b9bd1a0d20c893aa3c77bf5bc1794410cdea50390cce1e3fec3082b9c982c52e482a51d27615f1c
7
- data.tar.gz: 92dac81d61b7cc52170f15396785187d0b5d99c1ea7819be1b03318fdd29d80faa669845010ff370fffc17b7ff4c2b5659ba95900b5c4be251cbf4e007e70756
6
+ metadata.gz: c6ab99a896fbe1cbc5e7ab2a9ab729c289a052ff1b98a7e870ace9260519ea9500284a05e89acf8752548a7972bf629529fb17e2e967bb4d87f75a36615638ad
7
+ data.tar.gz: 4f75412d5bfd364e492ffc6c83b2e77d4853d75359f5824fc556daa658138db46c3aa391600f03d247c15b272088ba300eb354cafeac57e6e1f088885930334f
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ 0.4.1 / 2014-03-03
2
+ ==================
3
+ * Normalizer tweaks
4
+
1
5
  0.4.0 / 2014-02-27
2
6
  ==================
3
7
  * Update wapiti
@@ -63,14 +63,14 @@ module Anystyle
63
63
 
64
64
  begin
65
65
  require 'redis'
66
- @modes.unshift :redis
66
+ @modes.push :redis
67
67
  rescue LoadError
68
68
  # info 'no redis support detected'
69
69
  end
70
70
 
71
71
  begin
72
72
  require 'kyotocabinet'
73
- @modes.unshift :kyoto
73
+ @modes.push :kyoto
74
74
  rescue LoadError
75
75
  # info 'no kyoto-cabinet support detected'
76
76
  end
@@ -55,9 +55,6 @@ module Anystyle
55
55
  token.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
56
56
  hash[key] = token
57
57
  hash
58
- rescue => e
59
- warn e.message
60
- hash
61
58
  end
62
59
 
63
60
  def normalize_author(hash)
@@ -68,14 +65,11 @@ module Anystyle
68
65
  hash[:editor] = hash.delete(:author)
69
66
  hash = normalize_editor(hash)
70
67
  else
71
- hash['more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
68
+ hash[:'more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
72
69
  authors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
73
70
  hash[:author] = normalize_names(authors)
74
71
  end
75
72
 
76
- hash
77
- rescue => e
78
- warn e.message
79
73
  hash
80
74
  end
81
75
 
@@ -96,9 +90,9 @@ module Anystyle
96
90
  end
97
91
  end
98
92
 
99
- hash['more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
93
+ hash[:'more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
100
94
 
101
- editors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
95
+ editors.gsub!(/^\W+|\W+$/, '')
102
96
  editors.gsub!(/^in\s+/i, '')
103
97
  editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?[^[:alpha:]]*/, '')
104
98
  editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)[^[:alpha:]]*/, '')
@@ -107,37 +101,39 @@ module Anystyle
107
101
  is_trans = !!editors.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
108
102
 
109
103
  hash[:editor] = normalize_names(editors)
110
- hash[:translator] = hash[:editor] if is_trans
104
+ hash[:translator] = hash.delete :editor if is_trans
111
105
 
112
- hash
113
- rescue => e
114
- warn e.message
115
106
  hash
116
107
  end
117
108
 
118
109
  def normalize_translator(hash)
119
110
  translators = hash[:translator]
120
111
 
121
- translators.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
112
+ editors.gsub!(/^\W+|\W+$/, '')
122
113
  translators.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
123
114
  translators.gsub!(/\bby\b/i, '')
124
115
 
125
116
  hash[:translator] = normalize_names(translators)
126
117
  hash
127
- rescue => e
128
- warn e.message
129
- hash
130
118
  end
131
119
 
132
- Namae::Parser.instance.options[:prefer_comma_as_separator] = true
133
-
134
120
  def normalize_names(names)
135
- Namae.parse!(names).map(&:sort_order).join(' and ')
121
+ Namae.parse!(names).map { |name|
122
+ unless name.given.nil? || name.family.nil?
123
+ name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
124
+ end
125
+
126
+ name.sort_order
127
+
128
+ }.join(' and ')
129
+
136
130
  rescue => e
137
131
  warn e.message
138
- hash
132
+ names
139
133
  end
140
134
 
135
+ Namae.options[:prefer_comma_as_separator] = true
136
+
141
137
  def normalize_title(hash)
142
138
  title, container = hash[:title]
143
139
 
@@ -148,14 +144,11 @@ module Anystyle
148
144
 
149
145
  extract_edition(title, hash)
150
146
 
151
- title.gsub!(/^[\s]+|[\.,:;\s]+$/, '')
147
+ title.gsub!(/^\s+|[\.,:;\s]+$/, '')
152
148
  title.gsub!(/^["'”’´‘“`]|["'”’´‘“`]$/, '')
153
149
 
154
150
  hash[:title] = title
155
151
 
156
- hash
157
- rescue => e
158
- warn e.message
159
152
  hash
160
153
  end
161
154
 
@@ -189,16 +182,13 @@ module Anystyle
189
182
  booktitle, *dangling = hash[:booktitle]
190
183
  unmatched(:booktitle, hash, dangling) unless dangling.empty?
191
184
 
192
- booktitle.gsub!(/^in\s*/i, '')
185
+ booktitle.gsub!(/^in\s+/i, '')
193
186
 
194
187
  extract_edition(booktitle, hash)
195
188
 
196
- booktitle.gsub!(/^[\s]+|[\.,:;\s]+$/, '')
189
+ booktitle.gsub!(/^\s+|[\.,:;\s]+$/, '')
197
190
  hash[:booktitle] = booktitle
198
191
 
199
- hash
200
- rescue => e
201
- warn e.message
202
192
  hash
203
193
  end
204
194
 
@@ -209,9 +199,6 @@ module Anystyle
209
199
  journal.gsub!(/^[\s]+|[\.,:;\s]+$/, '')
210
200
  hash[:journal] = journal
211
201
 
212
- hash
213
- rescue => e
214
- warn e.message
215
202
  hash
216
203
  end
217
204
 
@@ -228,9 +215,6 @@ module Anystyle
228
215
 
229
216
  hash[:container] = container
230
217
  hash
231
- rescue => e
232
- warn e.message
233
- hash
234
218
  end
235
219
 
236
220
  def normalize_date(hash)
@@ -246,9 +230,6 @@ module Anystyle
246
230
  hash.delete(:date)
247
231
  end
248
232
 
249
- hash
250
- rescue => e
251
- warn e.message
252
233
  hash
253
234
  end
254
235
 
@@ -276,9 +257,6 @@ module Anystyle
276
257
  end
277
258
  end
278
259
 
279
- hash
280
- rescue => e
281
- warn e.message
282
260
  hash
283
261
  end
284
262
 
@@ -302,9 +280,6 @@ module Anystyle
302
280
  hash[:pages] = $1
303
281
  end
304
282
 
305
- hash
306
- rescue => e
307
- warn e.message
308
283
  hash
309
284
  end
310
285
 
@@ -321,9 +296,6 @@ module Anystyle
321
296
 
322
297
  hash[:location] = location
323
298
  hash
324
- rescue => e
325
- warn e.message
326
- hash
327
299
  end
328
300
 
329
301
  def normalize_isbn(hash)
@@ -333,9 +305,6 @@ module Anystyle
333
305
  isbn = isbn[/[\d-]+/]
334
306
  hash[:isbn] = isbn
335
307
 
336
- hash
337
- rescue => e
338
- warn e.message
339
308
  hash
340
309
  end
341
310
 
@@ -346,9 +315,6 @@ module Anystyle
346
315
  url.gsub!(/^\s+|[,\s]+$/, '')
347
316
  hash[:isbn] = isbn
348
317
  hash
349
- rescue => e
350
- warn e.message
351
- hash
352
318
  end
353
319
 
354
320
  private
@@ -163,8 +163,13 @@ module Anystyle
163
163
 
164
164
  def normalize(hash)
165
165
  hash.keys.each do |label|
166
- normalizer.send("normalize_#{label}", hash)
166
+ begin
167
+ normalizer.send("normalize_#{label}", hash)
168
+ rescue => e
169
+ warn e.message
170
+ end
167
171
  end
172
+
168
173
  classify hash
169
174
  end
170
175
 
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.4.0'.freeze
3
+ VERSION = '0.4.1'.freeze
4
4
  end
5
5
  end
@@ -7,11 +7,11 @@ module Anystyle
7
7
  describe "#tokenize_names" do
8
8
 
9
9
  it "tokenizes 'A B'" do
10
- Normalizer.instance.normalize_names('A B').should == 'B, A'
10
+ Normalizer.instance.normalize_names('A B').should == 'B, A.'
11
11
  end
12
12
 
13
13
  it "tokenizes 'A, B'" do
14
- Normalizer.instance.normalize_names('A, B').should == 'A, B'
14
+ Normalizer.instance.normalize_names('A, B').should == 'A, B.'
15
15
  end
16
16
 
17
17
  # it "tokenizes 'A, jr., B'" do
@@ -23,11 +23,11 @@ module Anystyle
23
23
  # end
24
24
 
25
25
  it "tokenizes 'A, B, C, D'" do
26
- Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B and C, D'
26
+ Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B. and C, D.'
27
27
  end
28
28
 
29
29
  it "tokenizes 'A, B, C'" do
30
- Normalizer.instance.normalize_names('A, B, C').should == 'A, B and C'
30
+ Normalizer.instance.normalize_names('A, B, C').should == 'A, B. and C'
31
31
  end
32
32
 
33
33
  it "tokenizes 'Aa Bb, C.'" do
@@ -35,7 +35,7 @@ module Anystyle
35
35
  end
36
36
 
37
37
  it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
38
- Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C and G, E F'
38
+ Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C. and G, E. F.'
39
39
  end
40
40
 
41
41
  [
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-27 00:00:00.000000000 Z
11
+ date: 2014-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bibtex-ruby
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '3.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '3.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: wapiti
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: namae
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0.8'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.8'
55
55
  description: A sophisticated parser for academic reference lists and bibliographies
@@ -96,27 +96,27 @@ licenses:
96
96
  metadata: {}
97
97
  post_install_message:
98
98
  rdoc_options:
99
- - "--line-numbers"
100
- - "--inline-source"
101
- - "--title"
102
- - "\"Anystyle Parser\""
103
- - "--main"
99
+ - --line-numbers
100
+ - --inline-source
101
+ - --title
102
+ - '"Anystyle Parser"'
103
+ - --main
104
104
  - README.md
105
105
  require_paths:
106
106
  - lib
107
107
  required_ruby_version: !ruby/object:Gem::Requirement
108
108
  requirements:
109
- - - ">="
109
+ - - '>='
110
110
  - !ruby/object:Gem::Version
111
111
  version: 1.9.3
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  requirements:
114
- - - ">="
114
+ - - '>='
115
115
  - !ruby/object:Gem::Version
116
116
  version: '0'
117
117
  requirements: []
118
118
  rubyforge_project:
119
- rubygems_version: 2.2.2
119
+ rubygems_version: 2.0.14
120
120
  signing_key:
121
121
  specification_version: 4
122
122
  summary: Smart and fast academic bibliography parser.