library_stdnums 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,9 @@ LCCNs are normalized according to the algorithm at http://www.loc.gov/marc/lccn-
63
63
  ````
64
64
 
65
65
  ## CHANGES
66
+ * 1.1.0 (2012.02.06)
67
+ * Changed the ISBN/ISSN regex to make sure string of digits/dashes is at least 6 chars long
68
+ * Cleaned up LCCN validation code
66
69
  * 1.0.2
67
70
  * Made docs clearer.
68
71
  * 1.0.0
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.2
1
+ 1.1.0
@@ -1,12 +1,15 @@
1
1
  # Static Module functions to work with library "standard numbers" ISSN, ISBN, and LCCN
2
2
  module StdNum
3
-
3
+
4
4
  # Helper methods common to ISBN/ISSN
5
5
  module Helpers
6
-
6
+
7
7
  # The pattern we use to try and find an ISBN/ISSN. Ditch everthing before the first
8
8
  # digit, then take all the digits/hyphens, optionally followed by an 'X'
9
- STDNUMPAT = /^.*?(\d[\d\-]+[xX]?)/
9
+ # Since the shortest possible string is 7 digits followed by a checksum digit
10
+ # for an ISSN, we'll make sure they're at least that long. Still imperfect
11
+ # (would fine "5------", for example) but should work in most cases.
12
+ STDNUMPAT = /^.*?(\d[\d\-]{6,}+[xX]?)/
10
13
 
11
14
  # Extract the most likely looking number from the string. This will be the first
12
15
  # string of digits-and-hyphens-and-maybe-a-trailing-X, with the hypens removed
@@ -17,7 +20,7 @@ module StdNum
17
20
  return nil unless match
18
21
  return match[1].gsub(/\-/, '').upcase
19
22
  end
20
-
23
+
21
24
  # Given any string, extract what looks like the most likely ISBN/ISSN
22
25
  # of the given size(s), or nil if nothing matches at the correct size.
23
26
  # @param [String] rawnum The raw string containing (hopefully) an ISSN/ISBN
@@ -26,29 +29,29 @@ module StdNum
26
29
  # @return [String,nil] the reduced and verified number, or nil if there's no match at the right size
27
30
  def reduce_to_basics rawnum, valid_sizes = nil
28
31
  return nil if rawnum.nil?
29
-
32
+
30
33
  num = extractNumber rawnum
31
-
34
+
32
35
  # Does it even look like a number?
33
36
  return nil unless num
34
-
37
+
35
38
  # Return what we've got if we don't care about the size
36
39
  return num unless valid_sizes
37
-
40
+
38
41
  # Check for valid size(s)
39
42
  [valid_sizes].flatten.each do |s|
40
43
  return num if num.size == s
41
44
  end
42
-
45
+
43
46
  # Didn't check out size-wise. Return nil
44
47
  return nil
45
48
  end
46
49
  end
47
-
50
+
48
51
  # Validate, convert, and normalize ISBNs (10-digit or 13-digit)
49
52
  module ISBN
50
53
  extend Helpers
51
-
54
+
52
55
  # Compute check digits for 10 or 13-digit ISBNs. See algorithm at
53
56
  # http://en.wikipedia.org/wiki/International_Standard_Book_Number
54
57
  # @param [String] isbn The ISBN (we'll try to clean it up if possible)
@@ -79,7 +82,7 @@ module StdNum
79
82
  return check.to_s
80
83
  end
81
84
  end
82
-
85
+
83
86
  # Check to see if the checkdigit is correct
84
87
  # @param [String] isbn The ISBN (we'll try to clean it up if possible)
85
88
  # @param [Boolean] preprocessed Set to true if the ISBN has already been through reduce_to_basics
@@ -91,8 +94,8 @@ module StdNum
91
94
  return false unless isbn[-1..-1] == self.checkdigit(isbn, true)
92
95
  return true
93
96
  end
94
-
95
-
97
+
98
+
96
99
  # For an ISBN normalizing it is the same as converting to ISBN 13
97
100
  # and making sure it's valid
98
101
  # @param [String] isbn The ISBN to normalize
@@ -105,8 +108,8 @@ module StdNum
105
108
  return nil
106
109
  end
107
110
  end
108
-
109
- # To convert to an ISBN13, throw a '978' on the front and
111
+
112
+ # To convert to an ISBN13, throw a '978' on the front and
110
113
  # compute the checkdigit
111
114
  # We leave 13-digit numbers alone, figuring they're already ok. NO CHECKSUM CHECK IS DONE FOR 13-DIGIT ISBNS!
112
115
  # and return nil on anything that's not the right length
@@ -130,14 +133,14 @@ module StdNum
130
133
 
131
134
  # Already 10 digits? Just return
132
135
  return isbn if isbn.size == 10
133
-
136
+
134
137
  # Can't be converted to ISBN-10? Bail
135
138
  return nil unless isbn[0..2] == '978'
136
-
139
+
137
140
  prefix = isbn[3..11]
138
141
  return prefix + self.checkdigit(prefix + '0')
139
142
  end
140
-
143
+
141
144
  # Return an array of the ISBN13 and ISBN10 (in that order) for the passed in value. You'll
142
145
  # only get one value back if it's a 13-digit
143
146
  # ISBN that can't be converted to an ISBN10.
@@ -158,19 +161,19 @@ module StdNum
158
161
  return [isbn, self.convert_to_10(isbn)].compact
159
162
  end
160
163
  end
161
-
162
-
164
+
165
+
163
166
  end
164
-
167
+
165
168
  # Validate and and normalize ISSNs
166
169
  module ISSN
167
170
  extend Helpers
168
-
171
+
169
172
  # Compute the checkdigit of an ISSN
170
173
  # @param [String] issn The ISSN (we'll try to clean it up if possible)
171
- # @param [Boolean] preprocessed Set to true if the number has already been through reduce_to_basic
174
+ # @param [Boolean] preprocessed Set to true if the number has already been through reduce_to_basic
172
175
  # @return [String] the one-character checkdigit
173
-
176
+
174
177
  def self.checkdigit issn, preprocessed = false
175
178
  issn = reduce_to_basics issn, 8 unless preprocessed
176
179
  return nil unless issn
@@ -178,7 +181,7 @@ module StdNum
178
181
  digits = issn[0..6].split(//).map {|i| i.to_i}
179
182
  checkdigit = 0
180
183
  (0..6).each do |i|
181
- checkdigit += digits[i] * (8 - i)
184
+ checkdigit += digits[i] * (8 - i)
182
185
  end
183
186
  checkdigit = checkdigit % 11
184
187
  return '0' if checkdigit == 0
@@ -186,10 +189,10 @@ module StdNum
186
189
  return 'X' if checkdigit == 10
187
190
  return checkdigit.to_s
188
191
  end
189
-
192
+
190
193
  # Check to see if the checkdigit is correct
191
194
  # @param [String] isbn The ISSN (we'll try to clean it up if possible)
192
- # @param [Boolean] preprocessed Set to true if the number has already been through reduce_to_basic
195
+ # @param [Boolean] preprocessed Set to true if the number has already been through reduce_to_basic
193
196
  # @return [Boolean] Whether or not the checkdigit is correct
194
197
 
195
198
  def self.valid? issn, preprocessed = false
@@ -197,9 +200,9 @@ module StdNum
197
200
  return false unless issn
198
201
  return issn[-1..-1] == self.checkdigit(issn, true)
199
202
  end
200
-
201
-
202
-
203
+
204
+
205
+
203
206
  # Make sure it's valid, remove the dashes, uppercase the X, and return
204
207
  # @param [String] isbn The ISBN to normalize
205
208
  # @return [String, nil] the normalized (to 13 digit) ISBN, or nil on failure
@@ -211,81 +214,81 @@ module StdNum
211
214
  return nil
212
215
  end
213
216
  end
214
-
215
-
216
-
217
+
218
+
219
+
217
220
  end
218
-
221
+
219
222
  # Validate and and normalize LCCNs
220
223
  module LCCN
221
-
224
+
225
+
226
+ # Get a string ready for processing as an LCCN
227
+ # @param [String] str The possible lccn
228
+ # @return [String] The munged string, ready for normalization
229
+
230
+ def self.reduce_to_basic str
231
+ rv = str.gsub(/\s/, '') # ditch spaces
232
+ rv.gsub!(/\/.*$/, '') # ditch everything after the first '/' (including the slash)
233
+ return rv
234
+ end
235
+
236
+ # Normalize based on data at http://www.loc.gov/marc/lccn-namespace.html#syntax
237
+ # @param [String] str The possible LCCN to normalize
238
+ # @return [String, nil] the normalized LCCN, or nil if it looks malformed
239
+ def self.normalize rawlccn
240
+ lccn = reduce_to_basic(rawlccn)
241
+ # If there's a dash in it, deal with that.
242
+ if lccn =~ /^(.*?)\-(.+)/
243
+ pre = $1
244
+ post = $2
245
+ return nil unless post =~ /^\d+$/ # must be all digits
246
+ lccn = "%s%06d" % [pre, post.to_i]
247
+ end
248
+
249
+ if valid?(lccn, true)
250
+ return lccn
251
+ else
252
+ return nil
253
+ end
254
+ end
255
+
222
256
  # The rules for validity according to http://www.loc.gov/marc/lccn-namespace.html#syntax:
223
257
  #
224
258
  # A normalized LCCN is a character string eight to twelve characters in length. (For purposes of this description characters are ordered from left to right -- "first" means "leftmost".)
225
- # The rightmost eight characters are always digits.
259
+ # The rightmost eight characters are always digits.
226
260
  # If the length is 9, then the first character must be alphabetic.
227
261
  # If the length is 10, then the first two characters must be either both digits or both alphabetic.
228
262
  # If the length is 11, then the first character must be alphabetic and the next two characters must be either both digits or both alphabetic.
229
263
  # If the length is 12, then the first two characters must be alphabetic and the remaining characters digits.
230
264
  #
231
265
  # @param [String] lccn The lccn to attempt to validate
232
- # @param [Boolean] preprocessed Set to true if the number has already been normalized
266
+ # @param [Boolean] preprocessed Set to true if the number has already been normalized
233
267
  # @return [Boolean] Whether or not the syntax seems ok
234
268
 
235
- def self.reduce_to_basic str
236
- str.gsub!(/\s/, '') # ditch leading spaces
237
- str.gsub!(/\/.*$/, '') # ditch everything after the first '/' (including the slash)
238
- return str
239
- end
240
-
241
-
242
269
  def self.valid? lccn, preprocessed = false
243
270
  lccn = normalize(lccn) unless preprocessed
244
- return false unless (8..12).include? lccn.size
245
271
  clean = lccn.gsub(/\-/, '')
246
- suffix = clean[-8..-1]
247
- prefix = clean[0..-9]
248
- return false unless suffix =~ /^\d+$/
249
- case clean.size
272
+ suffix = clean[-8..-1] # "the rightmost eight characters are always digits"
273
+ return false unless suffix and suffix =~ /^\d+$/
274
+ case clean.size # "...is a character string eight to twelve digits in length"
250
275
  when 8
251
276
  return true
252
277
  when 9
253
- return true if prefix =~ /[A-Za-z]/
278
+ return true if clean =~ /^[A-Za-z]/
254
279
  when 10
255
- return true if prefix =~ /\d{2}/ or prefix =~ /[A-Za-z]{2}/
280
+ return true if clean =~ /^\d{2}/ or clean =~ /^[A-Za-z]{2}/
256
281
  when 11
257
- return true if prefix =~ /[A-Za-z](\d{2}|[A-Za-z]{2})/
282
+ return true if clean =~ /^[A-Za-z](\d{2}|[A-Za-z]{2})/
258
283
  when 12
259
- return true if prefix =~ /[A-Za-z]{2}\d{2}/
284
+ return true if clean =~ /^[A-Za-z]{2}\d{2}/
260
285
  else
261
286
  return false
262
287
  end
263
288
  end
264
289
 
265
-
266
-
267
- # Normalize based on data at http://www.loc.gov/marc/lccn-namespace.html#syntax
268
- # @param [String] str The LCCN to normalize
269
- # @return [String] the normalized LCCN, or nil if it looks malformed
270
- def self.normalize rawlccn
271
- lccn = reduce_to_basic(rawlccn)
272
- # If there's a dash in it, deal with that.
273
- if lccn =~ /^(.*?)\-(.+)/
274
- pre = $1
275
- post = $2
276
- return nil unless post =~ /^\d+$/ # must be all digits
277
- lccn = "%s%06d" % [pre, post.to_i]
278
- end
279
-
280
- if valid?(lccn, true)
281
- return lccn
282
- else
283
- return nil
284
- end
285
- end
286
- end
287
-
288
-
290
+ end
291
+
289
292
  end
290
-
293
+
291
294
 
@@ -2,33 +2,41 @@ require 'spec_helper'
2
2
 
3
3
  describe "Extract" do
4
4
  it "should leave a number alone" do
5
- StdNum::ISBN.extractNumber('123456').must_equal '123456'
5
+ StdNum::ISBN.extractNumber('1234567').must_equal '1234567'
6
6
  end
7
-
7
+
8
8
  it "should skip leading and trailing crap" do
9
- StdNum::ISBN.extractNumber(' 12345 (online)').must_equal '12345'
9
+ StdNum::ISBN.extractNumber(' 1234567 (online)').must_equal '1234567'
10
10
  end
11
-
11
+
12
12
  it "should allow hyphens" do
13
13
  StdNum::ISBN.extractNumber(' 1-234-5').must_equal '12345'
14
14
  end
15
-
15
+
16
16
  it "should return nil on a non-match" do
17
17
  StdNum::ISBN.extractNumber('bill dueber').must_equal nil
18
18
  end
19
-
20
- it "should allow a trailing X" do
19
+
20
+ it "should allow a trailing X" do
21
21
  StdNum::ISBN.extractNumber('1-234-5-X').must_equal '12345X'
22
22
  end
23
-
23
+
24
24
  it "should upcase any trailing X" do
25
- StdNum::ISBN.extractNumber('1-234-x').must_equal '1234X'
25
+ StdNum::ISBN.extractNumber('1-234-56-x').must_equal '123456X'
26
26
  end
27
-
27
+
28
28
  it "only allows a single trailing X" do
29
- StdNum::ISBN.extractNumber('1234-X-X').must_equal '1234X'
29
+ StdNum::ISBN.extractNumber('123456-X-X').must_equal '123456X'
30
+ end
31
+
32
+ it "doesn't allow numbers that are too short" do
33
+ StdNum::ISBN.extractNumber('12345').must_equal nil
30
34
  end
31
-
35
+
36
+ it "skips over short prefixing numbers" do
37
+ StdNum::ISBN.extractNumber('ISBN13: 1234567890123').must_equal '1234567890123'
38
+ end
39
+
32
40
  end
33
41
 
34
42
 
@@ -36,63 +44,63 @@ describe "ISBN" do
36
44
  it "computes 10-digit checksum" do
37
45
  StdNum::ISBN.checkdigit('0-306-40615-X').must_equal '2'
38
46
  end
39
-
47
+
40
48
  it "correctly uses X for checksum" do
41
49
  StdNum::ISBN.checkdigit('061871460X').must_equal 'X'
42
50
  end
43
-
51
+
44
52
  it "finds a zero checkdigit" do
45
53
  StdNum::ISBN.checkdigit('0139381430').must_equal '0'
46
54
  end
47
-
55
+
48
56
  it "computes 13-digit checksum" do
49
57
  StdNum::ISBN.checkdigit('9780306406157').must_equal '7'
50
58
  end
51
-
59
+
52
60
  it "computes a 13-digit checksum that is 0" do
53
61
  StdNum::ISBN.checkdigit('9783837612950').must_equal '0'
54
62
  end
55
-
63
+
56
64
  it "finds a good number valid" do
57
65
  StdNum::ISBN.valid?('9780306406157').must_equal true
58
66
  end
59
-
67
+
60
68
  it "finds a bad number invalid" do
61
69
  StdNum::ISBN.valid?('9780306406154').must_equal false
62
70
  end
63
-
71
+
64
72
  it "returns nil when computing checksum for bad ISBN" do
65
73
  StdNum::ISBN.checkdigit('12345').must_equal nil
66
74
  end
67
-
75
+
68
76
  it "converts 10 to 13" do
69
77
  StdNum::ISBN.convert_to_13('0-306-40615-2').must_equal '9780306406157'
70
78
  end
71
-
79
+
72
80
  it "passes through 13 digit number instead of converting to 13" do
73
81
  StdNum::ISBN.convert_to_13('9780306406157').must_equal '9780306406157'
74
82
  end
75
-
76
- it "converts 13 to 10" do
83
+
84
+ it "converts 13 to 10" do
77
85
  StdNum::ISBN.convert_to_10('978-0-306-40615-7').must_equal '0306406152'
78
86
  end
79
-
87
+
80
88
  it "gets both normalized values" do
81
89
  a = StdNum::ISBN.allNormalizedValues('978-0-306-40615-7')
82
90
  a.sort.must_equal ['9780306406157', '0306406152' ].sort
83
91
  end
84
-
85
-
86
-
92
+
93
+
94
+
87
95
  end
88
96
 
89
97
 
90
98
 
91
99
  describe 'ISSN' do
92
- it "computes checksum" do
100
+ it "computes checksum" do
93
101
  StdNum::ISSN.checkdigit('0378-5955').must_equal '5'
94
102
  end
95
-
103
+
96
104
  it "normalizes" do
97
105
  StdNum::ISSN.normalize('0378-5955').must_equal '03785955'
98
106
  end
@@ -100,7 +108,7 @@ end
100
108
 
101
109
 
102
110
  describe 'LCCN' do
103
-
111
+
104
112
  # Tests take from http://www.loc.gov/marc/lccn-namespace.html#syntax
105
113
  test = {
106
114
  "n78-890351" => "n78890351",
@@ -110,14 +118,14 @@ describe 'LCCN' do
110
118
  "85-2 " => "85000002",
111
119
  "2001-000002" => "2001000002",
112
120
  "75-425165//r75" => "75425165",
113
- " 79139101 /AC/r932" => "79139101",
121
+ " 79139101 /AC/r932" => "79139101",
114
122
  }
115
-
123
+
116
124
  test.each do |k, v|
117
125
  it "normalizes #{k}" do
118
126
  StdNum::LCCN.normalize(k.dup).must_equal v
119
127
  end
120
128
  end
121
-
122
-
129
+
130
+
123
131
  end
metadata CHANGED
@@ -1,39 +1,36 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: library_stdnums
3
- version: !ruby/object:Gem::Version
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.0
4
5
  prerelease:
5
- version: 1.0.2
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - Bill Dueber
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2012-01-19 00:00:00 -05:00
14
- default_executable:
15
- dependencies:
16
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
17
15
  name: yard
18
- prerelease: false
19
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &2153722800 !ruby/object:Gem::Requirement
20
17
  none: false
21
- requirements:
22
- - - ">="
23
- - !ruby/object:Gem::Version
24
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
25
22
  type: :development
26
- version_requirements: *id001
27
- description: Normalization and checksum computation for ISBN (10 and 13), ISSN, and LCCN
23
+ prerelease: false
24
+ version_requirements: *2153722800
25
+ description: Normalization and checksum computation for ISBN (10 and 13), ISSN, and
26
+ LCCN
28
27
  email: bill@dueber.com
29
28
  executables: []
30
-
31
29
  extensions: []
32
-
33
- extra_rdoc_files:
30
+ extra_rdoc_files:
34
31
  - LICENSE
35
32
  - README.markdown
36
- files:
33
+ files:
37
34
  - .document
38
35
  - LICENSE
39
36
  - README.markdown
@@ -42,33 +39,28 @@ files:
42
39
  - lib/library_stdnums.rb
43
40
  - spec/library_stdnums_spec.rb
44
41
  - spec/spec_helper.rb
45
- has_rdoc: true
46
42
  homepage: http://github.com/billdueber/library_stdnums
47
43
  licenses: []
48
-
49
44
  post_install_message:
50
45
  rdoc_options: []
51
-
52
- require_paths:
46
+ require_paths:
53
47
  - lib
54
- required_ruby_version: !ruby/object:Gem::Requirement
48
+ required_ruby_version: !ruby/object:Gem::Requirement
55
49
  none: false
56
- requirements:
57
- - - ">="
58
- - !ruby/object:Gem::Version
59
- version: "0"
60
- required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
55
  none: false
62
- requirements:
63
- - - ">="
64
- - !ruby/object:Gem::Version
65
- version: "0"
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
66
60
  requirements: []
67
-
68
61
  rubyforge_project:
69
- rubygems_version: 1.6.2
62
+ rubygems_version: 1.8.15
70
63
  signing_key:
71
64
  specification_version: 3
72
65
  summary: Normalize and compute checkdigits for ISBN, ISSN, and LCCN
73
66
  test_files: []
74
-