anystyle-parser 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 575f1accfff0d04c318a023de9bd4d1f8720ff98
4
- data.tar.gz: aa53148f49bb5e26947375016cda44f9c0b97f6d
3
+ metadata.gz: 0c9fa55f1ed5e371c241610ef5d77ae1f9696131
4
+ data.tar.gz: 7e6e3d024694dc1e8947bca12a9eb248f356c3cd
5
5
  SHA512:
6
- metadata.gz: a962499569b1dfbf853392b447169498f4e7ad537d1ca7ef3b9bd1a0d20c893aa3c77bf5bc1794410cdea50390cce1e3fec3082b9c982c52e482a51d27615f1c
7
- data.tar.gz: 92dac81d61b7cc52170f15396785187d0b5d99c1ea7819be1b03318fdd29d80faa669845010ff370fffc17b7ff4c2b5659ba95900b5c4be251cbf4e007e70756
6
+ metadata.gz: c6ab99a896fbe1cbc5e7ab2a9ab729c289a052ff1b98a7e870ace9260519ea9500284a05e89acf8752548a7972bf629529fb17e2e967bb4d87f75a36615638ad
7
+ data.tar.gz: 4f75412d5bfd364e492ffc6c83b2e77d4853d75359f5824fc556daa658138db46c3aa391600f03d247c15b272088ba300eb354cafeac57e6e1f088885930334f
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ 0.4.1 / 2014-03-03
2
+ ==================
3
+ * Normalizer tweaks
4
+
1
5
  0.4.0 / 2014-02-27
2
6
  ==================
3
7
  * Update wapiti
@@ -63,14 +63,14 @@ module Anystyle
63
63
 
64
64
  begin
65
65
  require 'redis'
66
- @modes.unshift :redis
66
+ @modes.push :redis
67
67
  rescue LoadError
68
68
  # info 'no redis support detected'
69
69
  end
70
70
 
71
71
  begin
72
72
  require 'kyotocabinet'
73
- @modes.unshift :kyoto
73
+ @modes.push :kyoto
74
74
  rescue LoadError
75
75
  # info 'no kyoto-cabinet support detected'
76
76
  end
@@ -55,9 +55,6 @@ module Anystyle
55
55
  token.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
56
56
  hash[key] = token
57
57
  hash
58
- rescue => e
59
- warn e.message
60
- hash
61
58
  end
62
59
 
63
60
  def normalize_author(hash)
@@ -68,14 +65,11 @@ module Anystyle
68
65
  hash[:editor] = hash.delete(:author)
69
66
  hash = normalize_editor(hash)
70
67
  else
71
- hash['more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
68
+ hash[:'more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
72
69
  authors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
73
70
  hash[:author] = normalize_names(authors)
74
71
  end
75
72
 
76
- hash
77
- rescue => e
78
- warn e.message
79
73
  hash
80
74
  end
81
75
 
@@ -96,9 +90,9 @@ module Anystyle
96
90
  end
97
91
  end
98
92
 
99
- hash['more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
93
+ hash[:'more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
100
94
 
101
- editors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
95
+ editors.gsub!(/^\W+|\W+$/, '')
102
96
  editors.gsub!(/^in\s+/i, '')
103
97
  editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?[^[:alpha:]]*/, '')
104
98
  editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)[^[:alpha:]]*/, '')
@@ -107,37 +101,39 @@ module Anystyle
107
101
  is_trans = !!editors.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
108
102
 
109
103
  hash[:editor] = normalize_names(editors)
110
- hash[:translator] = hash[:editor] if is_trans
104
+ hash[:translator] = hash.delete :editor if is_trans
111
105
 
112
- hash
113
- rescue => e
114
- warn e.message
115
106
  hash
116
107
  end
117
108
 
118
109
  def normalize_translator(hash)
119
110
  translators = hash[:translator]
120
111
 
121
- translators.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
112
+ editors.gsub!(/^\W+|\W+$/, '')
122
113
  translators.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
123
114
  translators.gsub!(/\bby\b/i, '')
124
115
 
125
116
  hash[:translator] = normalize_names(translators)
126
117
  hash
127
- rescue => e
128
- warn e.message
129
- hash
130
118
  end
131
119
 
132
- Namae::Parser.instance.options[:prefer_comma_as_separator] = true
133
-
134
120
  def normalize_names(names)
135
- Namae.parse!(names).map(&:sort_order).join(' and ')
121
+ Namae.parse!(names).map { |name|
122
+ unless name.given.nil? || name.family.nil?
123
+ name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
124
+ end
125
+
126
+ name.sort_order
127
+
128
+ }.join(' and ')
129
+
136
130
  rescue => e
137
131
  warn e.message
138
- hash
132
+ names
139
133
  end
140
134
 
135
+ Namae.options[:prefer_comma_as_separator] = true
136
+
141
137
  def normalize_title(hash)
142
138
  title, container = hash[:title]
143
139
 
@@ -148,14 +144,11 @@ module Anystyle
148
144
 
149
145
  extract_edition(title, hash)
150
146
 
151
- title.gsub!(/^[\s]+|[\.,:;\s]+$/, '')
147
+ title.gsub!(/^\s+|[\.,:;\s]+$/, '')
152
148
  title.gsub!(/^["'”’´‘“`]|["'”’´‘“`]$/, '')
153
149
 
154
150
  hash[:title] = title
155
151
 
156
- hash
157
- rescue => e
158
- warn e.message
159
152
  hash
160
153
  end
161
154
 
@@ -189,16 +182,13 @@ module Anystyle
189
182
  booktitle, *dangling = hash[:booktitle]
190
183
  unmatched(:booktitle, hash, dangling) unless dangling.empty?
191
184
 
192
- booktitle.gsub!(/^in\s*/i, '')
185
+ booktitle.gsub!(/^in\s+/i, '')
193
186
 
194
187
  extract_edition(booktitle, hash)
195
188
 
196
- booktitle.gsub!(/^[\s]+|[\.,:;\s]+$/, '')
189
+ booktitle.gsub!(/^\s+|[\.,:;\s]+$/, '')
197
190
  hash[:booktitle] = booktitle
198
191
 
199
- hash
200
- rescue => e
201
- warn e.message
202
192
  hash
203
193
  end
204
194
 
@@ -209,9 +199,6 @@ module Anystyle
209
199
  journal.gsub!(/^[\s]+|[\.,:;\s]+$/, '')
210
200
  hash[:journal] = journal
211
201
 
212
- hash
213
- rescue => e
214
- warn e.message
215
202
  hash
216
203
  end
217
204
 
@@ -228,9 +215,6 @@ module Anystyle
228
215
 
229
216
  hash[:container] = container
230
217
  hash
231
- rescue => e
232
- warn e.message
233
- hash
234
218
  end
235
219
 
236
220
  def normalize_date(hash)
@@ -246,9 +230,6 @@ module Anystyle
246
230
  hash.delete(:date)
247
231
  end
248
232
 
249
- hash
250
- rescue => e
251
- warn e.message
252
233
  hash
253
234
  end
254
235
 
@@ -276,9 +257,6 @@ module Anystyle
276
257
  end
277
258
  end
278
259
 
279
- hash
280
- rescue => e
281
- warn e.message
282
260
  hash
283
261
  end
284
262
 
@@ -302,9 +280,6 @@ module Anystyle
302
280
  hash[:pages] = $1
303
281
  end
304
282
 
305
- hash
306
- rescue => e
307
- warn e.message
308
283
  hash
309
284
  end
310
285
 
@@ -321,9 +296,6 @@ module Anystyle
321
296
 
322
297
  hash[:location] = location
323
298
  hash
324
- rescue => e
325
- warn e.message
326
- hash
327
299
  end
328
300
 
329
301
  def normalize_isbn(hash)
@@ -333,9 +305,6 @@ module Anystyle
333
305
  isbn = isbn[/[\d-]+/]
334
306
  hash[:isbn] = isbn
335
307
 
336
- hash
337
- rescue => e
338
- warn e.message
339
308
  hash
340
309
  end
341
310
 
@@ -346,9 +315,6 @@ module Anystyle
346
315
  url.gsub!(/^\s+|[,\s]+$/, '')
347
316
  hash[:isbn] = isbn
348
317
  hash
349
- rescue => e
350
- warn e.message
351
- hash
352
318
  end
353
319
 
354
320
  private
@@ -163,8 +163,13 @@ module Anystyle
163
163
 
164
164
  def normalize(hash)
165
165
  hash.keys.each do |label|
166
- normalizer.send("normalize_#{label}", hash)
166
+ begin
167
+ normalizer.send("normalize_#{label}", hash)
168
+ rescue => e
169
+ warn e.message
170
+ end
167
171
  end
172
+
168
173
  classify hash
169
174
  end
170
175
 
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.4.0'.freeze
3
+ VERSION = '0.4.1'.freeze
4
4
  end
5
5
  end
@@ -7,11 +7,11 @@ module Anystyle
7
7
  describe "#tokenize_names" do
8
8
 
9
9
  it "tokenizes 'A B'" do
10
- Normalizer.instance.normalize_names('A B').should == 'B, A'
10
+ Normalizer.instance.normalize_names('A B').should == 'B, A.'
11
11
  end
12
12
 
13
13
  it "tokenizes 'A, B'" do
14
- Normalizer.instance.normalize_names('A, B').should == 'A, B'
14
+ Normalizer.instance.normalize_names('A, B').should == 'A, B.'
15
15
  end
16
16
 
17
17
  # it "tokenizes 'A, jr., B'" do
@@ -23,11 +23,11 @@ module Anystyle
23
23
  # end
24
24
 
25
25
  it "tokenizes 'A, B, C, D'" do
26
- Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B and C, D'
26
+ Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B. and C, D.'
27
27
  end
28
28
 
29
29
  it "tokenizes 'A, B, C'" do
30
- Normalizer.instance.normalize_names('A, B, C').should == 'A, B and C'
30
+ Normalizer.instance.normalize_names('A, B, C').should == 'A, B. and C'
31
31
  end
32
32
 
33
33
  it "tokenizes 'Aa Bb, C.'" do
@@ -35,7 +35,7 @@ module Anystyle
35
35
  end
36
36
 
37
37
  it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
38
- Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C and G, E F'
38
+ Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C. and G, E. F.'
39
39
  end
40
40
 
41
41
  [
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-27 00:00:00.000000000 Z
11
+ date: 2014-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bibtex-ruby
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '3.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '3.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: wapiti
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: namae
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0.8'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.8'
55
55
  description: A sophisticated parser for academic reference lists and bibliographies
@@ -96,27 +96,27 @@ licenses:
96
96
  metadata: {}
97
97
  post_install_message:
98
98
  rdoc_options:
99
- - "--line-numbers"
100
- - "--inline-source"
101
- - "--title"
102
- - "\"Anystyle Parser\""
103
- - "--main"
99
+ - --line-numbers
100
+ - --inline-source
101
+ - --title
102
+ - '"Anystyle Parser"'
103
+ - --main
104
104
  - README.md
105
105
  require_paths:
106
106
  - lib
107
107
  required_ruby_version: !ruby/object:Gem::Requirement
108
108
  requirements:
109
- - - ">="
109
+ - - '>='
110
110
  - !ruby/object:Gem::Version
111
111
  version: 1.9.3
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  requirements:
114
- - - ">="
114
+ - - '>='
115
115
  - !ruby/object:Gem::Version
116
116
  version: '0'
117
117
  requirements: []
118
118
  rubyforge_project:
119
- rubygems_version: 2.2.2
119
+ rubygems_version: 2.0.14
120
120
  signing_key:
121
121
  specification_version: 4
122
122
  summary: Smart and fast academic bibliography parser.