library_stdnums 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -63,6 +63,9 @@ LCCNs are normalized according to the algorithm at http://www.loc.gov/marc/lccn-
63
63
  ````
64
64
 
65
65
  ## CHANGES
66
+ * 1.1.0 (2012.02.06)
67
+ * Changed the ISBN/ISSN regex to make sure string of digits/dashes is at least 6 chars long
68
+ * Cleaned up LCCN validation code
66
69
  * 1.0.2
67
70
  * Made docs clearer.
68
71
  * 1.0.0
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.2
1
+ 1.1.0
@@ -1,12 +1,15 @@
1
1
  # Static Module functions to work with library "standard numbers" ISSN, ISBN, and LCCN
2
2
  module StdNum
3
-
3
+
4
4
  # Helper methods common to ISBN/ISSN
5
5
  module Helpers
6
-
6
+
7
7
  # The pattern we use to try and find an ISBN/ISSN. Ditch everthing before the first
8
8
  # digit, then take all the digits/hyphens, optionally followed by an 'X'
9
- STDNUMPAT = /^.*?(\d[\d\-]+[xX]?)/
9
+ # Since the shortest possible string is 7 digits followed by a checksum digit
10
+ # for an ISSN, we'll make sure they're at least that long. Still imperfect
11
+ # (would fine "5------", for example) but should work in most cases.
12
+ STDNUMPAT = /^.*?(\d[\d\-]{6,}+[xX]?)/
10
13
 
11
14
  # Extract the most likely looking number from the string. This will be the first
12
15
  # string of digits-and-hyphens-and-maybe-a-trailing-X, with the hypens removed
@@ -17,7 +20,7 @@ module StdNum
17
20
  return nil unless match
18
21
  return match[1].gsub(/\-/, '').upcase
19
22
  end
20
-
23
+
21
24
  # Given any string, extract what looks like the most likely ISBN/ISSN
22
25
  # of the given size(s), or nil if nothing matches at the correct size.
23
26
  # @param [String] rawnum The raw string containing (hopefully) an ISSN/ISBN
@@ -26,29 +29,29 @@ module StdNum
26
29
  # @return [String,nil] the reduced and verified number, or nil if there's no match at the right size
27
30
  def reduce_to_basics rawnum, valid_sizes = nil
28
31
  return nil if rawnum.nil?
29
-
32
+
30
33
  num = extractNumber rawnum
31
-
34
+
32
35
  # Does it even look like a number?
33
36
  return nil unless num
34
-
37
+
35
38
  # Return what we've got if we don't care about the size
36
39
  return num unless valid_sizes
37
-
40
+
38
41
  # Check for valid size(s)
39
42
  [valid_sizes].flatten.each do |s|
40
43
  return num if num.size == s
41
44
  end
42
-
45
+
43
46
  # Didn't check out size-wise. Return nil
44
47
  return nil
45
48
  end
46
49
  end
47
-
50
+
48
51
  # Validate, convert, and normalize ISBNs (10-digit or 13-digit)
49
52
  module ISBN
50
53
  extend Helpers
51
-
54
+
52
55
  # Compute check digits for 10 or 13-digit ISBNs. See algorithm at
53
56
  # http://en.wikipedia.org/wiki/International_Standard_Book_Number
54
57
  # @param [String] isbn The ISBN (we'll try to clean it up if possible)
@@ -79,7 +82,7 @@ module StdNum
79
82
  return check.to_s
80
83
  end
81
84
  end
82
-
85
+
83
86
  # Check to see if the checkdigit is correct
84
87
  # @param [String] isbn The ISBN (we'll try to clean it up if possible)
85
88
  # @param [Boolean] preprocessed Set to true if the ISBN has already been through reduce_to_basics
@@ -91,8 +94,8 @@ module StdNum
91
94
  return false unless isbn[-1..-1] == self.checkdigit(isbn, true)
92
95
  return true
93
96
  end
94
-
95
-
97
+
98
+
96
99
  # For an ISBN normalizing it is the same as converting to ISBN 13
97
100
  # and making sure it's valid
98
101
  # @param [String] isbn The ISBN to normalize
@@ -105,8 +108,8 @@ module StdNum
105
108
  return nil
106
109
  end
107
110
  end
108
-
109
- # To convert to an ISBN13, throw a '978' on the front and
111
+
112
+ # To convert to an ISBN13, throw a '978' on the front and
110
113
  # compute the checkdigit
111
114
  # We leave 13-digit numbers alone, figuring they're already ok. NO CHECKSUM CHECK IS DONE FOR 13-DIGIT ISBNS!
112
115
  # and return nil on anything that's not the right length
@@ -130,14 +133,14 @@ module StdNum
130
133
 
131
134
  # Already 10 digits? Just return
132
135
  return isbn if isbn.size == 10
133
-
136
+
134
137
  # Can't be converted to ISBN-10? Bail
135
138
  return nil unless isbn[0..2] == '978'
136
-
139
+
137
140
  prefix = isbn[3..11]
138
141
  return prefix + self.checkdigit(prefix + '0')
139
142
  end
140
-
143
+
141
144
  # Return an array of the ISBN13 and ISBN10 (in that order) for the passed in value. You'll
142
145
  # only get one value back if it's a 13-digit
143
146
  # ISBN that can't be converted to an ISBN10.
@@ -158,19 +161,19 @@ module StdNum
158
161
  return [isbn, self.convert_to_10(isbn)].compact
159
162
  end
160
163
  end
161
-
162
-
164
+
165
+
163
166
  end
164
-
167
+
165
168
  # Validate and and normalize ISSNs
166
169
  module ISSN
167
170
  extend Helpers
168
-
171
+
169
172
  # Compute the checkdigit of an ISSN
170
173
  # @param [String] issn The ISSN (we'll try to clean it up if possible)
171
- # @param [Boolean] preprocessed Set to true if the number has already been through reduce_to_basic
174
+ # @param [Boolean] preprocessed Set to true if the number has already been through reduce_to_basic
172
175
  # @return [String] the one-character checkdigit
173
-
176
+
174
177
  def self.checkdigit issn, preprocessed = false
175
178
  issn = reduce_to_basics issn, 8 unless preprocessed
176
179
  return nil unless issn
@@ -178,7 +181,7 @@ module StdNum
178
181
  digits = issn[0..6].split(//).map {|i| i.to_i}
179
182
  checkdigit = 0
180
183
  (0..6).each do |i|
181
- checkdigit += digits[i] * (8 - i)
184
+ checkdigit += digits[i] * (8 - i)
182
185
  end
183
186
  checkdigit = checkdigit % 11
184
187
  return '0' if checkdigit == 0
@@ -186,10 +189,10 @@ module StdNum
186
189
  return 'X' if checkdigit == 10
187
190
  return checkdigit.to_s
188
191
  end
189
-
192
+
190
193
  # Check to see if the checkdigit is correct
191
194
  # @param [String] isbn The ISSN (we'll try to clean it up if possible)
192
- # @param [Boolean] preprocessed Set to true if the number has already been through reduce_to_basic
195
+ # @param [Boolean] preprocessed Set to true if the number has already been through reduce_to_basic
193
196
  # @return [Boolean] Whether or not the checkdigit is correct
194
197
 
195
198
  def self.valid? issn, preprocessed = false
@@ -197,9 +200,9 @@ module StdNum
197
200
  return false unless issn
198
201
  return issn[-1..-1] == self.checkdigit(issn, true)
199
202
  end
200
-
201
-
202
-
203
+
204
+
205
+
203
206
  # Make sure it's valid, remove the dashes, uppercase the X, and return
204
207
  # @param [String] isbn The ISBN to normalize
205
208
  # @return [String, nil] the normalized (to 13 digit) ISBN, or nil on failure
@@ -211,81 +214,81 @@ module StdNum
211
214
  return nil
212
215
  end
213
216
  end
214
-
215
-
216
-
217
+
218
+
219
+
217
220
  end
218
-
221
+
219
222
  # Validate and and normalize LCCNs
220
223
  module LCCN
221
-
224
+
225
+
226
+ # Get a string ready for processing as an LCCN
227
+ # @param [String] str The possible lccn
228
+ # @return [String] The munged string, ready for normalization
229
+
230
+ def self.reduce_to_basic str
231
+ rv = str.gsub(/\s/, '') # ditch spaces
232
+ rv.gsub!(/\/.*$/, '') # ditch everything after the first '/' (including the slash)
233
+ return rv
234
+ end
235
+
236
+ # Normalize based on data at http://www.loc.gov/marc/lccn-namespace.html#syntax
237
+ # @param [String] str The possible LCCN to normalize
238
+ # @return [String, nil] the normalized LCCN, or nil if it looks malformed
239
+ def self.normalize rawlccn
240
+ lccn = reduce_to_basic(rawlccn)
241
+ # If there's a dash in it, deal with that.
242
+ if lccn =~ /^(.*?)\-(.+)/
243
+ pre = $1
244
+ post = $2
245
+ return nil unless post =~ /^\d+$/ # must be all digits
246
+ lccn = "%s%06d" % [pre, post.to_i]
247
+ end
248
+
249
+ if valid?(lccn, true)
250
+ return lccn
251
+ else
252
+ return nil
253
+ end
254
+ end
255
+
222
256
  # The rules for validity according to http://www.loc.gov/marc/lccn-namespace.html#syntax:
223
257
  #
224
258
  # A normalized LCCN is a character string eight to twelve characters in length. (For purposes of this description characters are ordered from left to right -- "first" means "leftmost".)
225
- # The rightmost eight characters are always digits.
259
+ # The rightmost eight characters are always digits.
226
260
  # If the length is 9, then the first character must be alphabetic.
227
261
  # If the length is 10, then the first two characters must be either both digits or both alphabetic.
228
262
  # If the length is 11, then the first character must be alphabetic and the next two characters must be either both digits or both alphabetic.
229
263
  # If the length is 12, then the first two characters must be alphabetic and the remaining characters digits.
230
264
  #
231
265
  # @param [String] lccn The lccn to attempt to validate
232
- # @param [Boolean] preprocessed Set to true if the number has already been normalized
266
+ # @param [Boolean] preprocessed Set to true if the number has already been normalized
233
267
  # @return [Boolean] Whether or not the syntax seems ok
234
268
 
235
- def self.reduce_to_basic str
236
- str.gsub!(/\s/, '') # ditch leading spaces
237
- str.gsub!(/\/.*$/, '') # ditch everything after the first '/' (including the slash)
238
- return str
239
- end
240
-
241
-
242
269
  def self.valid? lccn, preprocessed = false
243
270
  lccn = normalize(lccn) unless preprocessed
244
- return false unless (8..12).include? lccn.size
245
271
  clean = lccn.gsub(/\-/, '')
246
- suffix = clean[-8..-1]
247
- prefix = clean[0..-9]
248
- return false unless suffix =~ /^\d+$/
249
- case clean.size
272
+ suffix = clean[-8..-1] # "the rightmost eight characters are always digits"
273
+ return false unless suffix and suffix =~ /^\d+$/
274
+ case clean.size # "...is a character string eight to twelve digits in length"
250
275
  when 8
251
276
  return true
252
277
  when 9
253
- return true if prefix =~ /[A-Za-z]/
278
+ return true if clean =~ /^[A-Za-z]/
254
279
  when 10
255
- return true if prefix =~ /\d{2}/ or prefix =~ /[A-Za-z]{2}/
280
+ return true if clean =~ /^\d{2}/ or clean =~ /^[A-Za-z]{2}/
256
281
  when 11
257
- return true if prefix =~ /[A-Za-z](\d{2}|[A-Za-z]{2})/
282
+ return true if clean =~ /^[A-Za-z](\d{2}|[A-Za-z]{2})/
258
283
  when 12
259
- return true if prefix =~ /[A-Za-z]{2}\d{2}/
284
+ return true if clean =~ /^[A-Za-z]{2}\d{2}/
260
285
  else
261
286
  return false
262
287
  end
263
288
  end
264
289
 
265
-
266
-
267
- # Normalize based on data at http://www.loc.gov/marc/lccn-namespace.html#syntax
268
- # @param [String] str The LCCN to normalize
269
- # @return [String] the normalized LCCN, or nil if it looks malformed
270
- def self.normalize rawlccn
271
- lccn = reduce_to_basic(rawlccn)
272
- # If there's a dash in it, deal with that.
273
- if lccn =~ /^(.*?)\-(.+)/
274
- pre = $1
275
- post = $2
276
- return nil unless post =~ /^\d+$/ # must be all digits
277
- lccn = "%s%06d" % [pre, post.to_i]
278
- end
279
-
280
- if valid?(lccn, true)
281
- return lccn
282
- else
283
- return nil
284
- end
285
- end
286
- end
287
-
288
-
290
+ end
291
+
289
292
  end
290
-
293
+
291
294
 
@@ -2,33 +2,41 @@ require 'spec_helper'
2
2
 
3
3
  describe "Extract" do
4
4
  it "should leave a number alone" do
5
- StdNum::ISBN.extractNumber('123456').must_equal '123456'
5
+ StdNum::ISBN.extractNumber('1234567').must_equal '1234567'
6
6
  end
7
-
7
+
8
8
  it "should skip leading and trailing crap" do
9
- StdNum::ISBN.extractNumber(' 12345 (online)').must_equal '12345'
9
+ StdNum::ISBN.extractNumber(' 1234567 (online)').must_equal '1234567'
10
10
  end
11
-
11
+
12
12
  it "should allow hyphens" do
13
13
  StdNum::ISBN.extractNumber(' 1-234-5').must_equal '12345'
14
14
  end
15
-
15
+
16
16
  it "should return nil on a non-match" do
17
17
  StdNum::ISBN.extractNumber('bill dueber').must_equal nil
18
18
  end
19
-
20
- it "should allow a trailing X" do
19
+
20
+ it "should allow a trailing X" do
21
21
  StdNum::ISBN.extractNumber('1-234-5-X').must_equal '12345X'
22
22
  end
23
-
23
+
24
24
  it "should upcase any trailing X" do
25
- StdNum::ISBN.extractNumber('1-234-x').must_equal '1234X'
25
+ StdNum::ISBN.extractNumber('1-234-56-x').must_equal '123456X'
26
26
  end
27
-
27
+
28
28
  it "only allows a single trailing X" do
29
- StdNum::ISBN.extractNumber('1234-X-X').must_equal '1234X'
29
+ StdNum::ISBN.extractNumber('123456-X-X').must_equal '123456X'
30
+ end
31
+
32
+ it "doesn't allow numbers that are too short" do
33
+ StdNum::ISBN.extractNumber('12345').must_equal nil
30
34
  end
31
-
35
+
36
+ it "skips over short prefixing numbers" do
37
+ StdNum::ISBN.extractNumber('ISBN13: 1234567890123').must_equal '1234567890123'
38
+ end
39
+
32
40
  end
33
41
 
34
42
 
@@ -36,63 +44,63 @@ describe "ISBN" do
36
44
  it "computes 10-digit checksum" do
37
45
  StdNum::ISBN.checkdigit('0-306-40615-X').must_equal '2'
38
46
  end
39
-
47
+
40
48
  it "correctly uses X for checksum" do
41
49
  StdNum::ISBN.checkdigit('061871460X').must_equal 'X'
42
50
  end
43
-
51
+
44
52
  it "finds a zero checkdigit" do
45
53
  StdNum::ISBN.checkdigit('0139381430').must_equal '0'
46
54
  end
47
-
55
+
48
56
  it "computes 13-digit checksum" do
49
57
  StdNum::ISBN.checkdigit('9780306406157').must_equal '7'
50
58
  end
51
-
59
+
52
60
  it "computes a 13-digit checksum that is 0" do
53
61
  StdNum::ISBN.checkdigit('9783837612950').must_equal '0'
54
62
  end
55
-
63
+
56
64
  it "finds a good number valid" do
57
65
  StdNum::ISBN.valid?('9780306406157').must_equal true
58
66
  end
59
-
67
+
60
68
  it "finds a bad number invalid" do
61
69
  StdNum::ISBN.valid?('9780306406154').must_equal false
62
70
  end
63
-
71
+
64
72
  it "returns nil when computing checksum for bad ISBN" do
65
73
  StdNum::ISBN.checkdigit('12345').must_equal nil
66
74
  end
67
-
75
+
68
76
  it "converts 10 to 13" do
69
77
  StdNum::ISBN.convert_to_13('0-306-40615-2').must_equal '9780306406157'
70
78
  end
71
-
79
+
72
80
  it "passes through 13 digit number instead of converting to 13" do
73
81
  StdNum::ISBN.convert_to_13('9780306406157').must_equal '9780306406157'
74
82
  end
75
-
76
- it "converts 13 to 10" do
83
+
84
+ it "converts 13 to 10" do
77
85
  StdNum::ISBN.convert_to_10('978-0-306-40615-7').must_equal '0306406152'
78
86
  end
79
-
87
+
80
88
  it "gets both normalized values" do
81
89
  a = StdNum::ISBN.allNormalizedValues('978-0-306-40615-7')
82
90
  a.sort.must_equal ['9780306406157', '0306406152' ].sort
83
91
  end
84
-
85
-
86
-
92
+
93
+
94
+
87
95
  end
88
96
 
89
97
 
90
98
 
91
99
  describe 'ISSN' do
92
- it "computes checksum" do
100
+ it "computes checksum" do
93
101
  StdNum::ISSN.checkdigit('0378-5955').must_equal '5'
94
102
  end
95
-
103
+
96
104
  it "normalizes" do
97
105
  StdNum::ISSN.normalize('0378-5955').must_equal '03785955'
98
106
  end
@@ -100,7 +108,7 @@ end
100
108
 
101
109
 
102
110
  describe 'LCCN' do
103
-
111
+
104
112
  # Tests take from http://www.loc.gov/marc/lccn-namespace.html#syntax
105
113
  test = {
106
114
  "n78-890351" => "n78890351",
@@ -110,14 +118,14 @@ describe 'LCCN' do
110
118
  "85-2 " => "85000002",
111
119
  "2001-000002" => "2001000002",
112
120
  "75-425165//r75" => "75425165",
113
- " 79139101 /AC/r932" => "79139101",
121
+ " 79139101 /AC/r932" => "79139101",
114
122
  }
115
-
123
+
116
124
  test.each do |k, v|
117
125
  it "normalizes #{k}" do
118
126
  StdNum::LCCN.normalize(k.dup).must_equal v
119
127
  end
120
128
  end
121
-
122
-
129
+
130
+
123
131
  end
metadata CHANGED
@@ -1,39 +1,36 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: library_stdnums
3
- version: !ruby/object:Gem::Version
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.0
4
5
  prerelease:
5
- version: 1.0.2
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - Bill Dueber
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2012-01-19 00:00:00 -05:00
14
- default_executable:
15
- dependencies:
16
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
17
15
  name: yard
18
- prerelease: false
19
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &2153722800 !ruby/object:Gem::Requirement
20
17
  none: false
21
- requirements:
22
- - - ">="
23
- - !ruby/object:Gem::Version
24
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
25
22
  type: :development
26
- version_requirements: *id001
27
- description: Normalization and checksum computation for ISBN (10 and 13), ISSN, and LCCN
23
+ prerelease: false
24
+ version_requirements: *2153722800
25
+ description: Normalization and checksum computation for ISBN (10 and 13), ISSN, and
26
+ LCCN
28
27
  email: bill@dueber.com
29
28
  executables: []
30
-
31
29
  extensions: []
32
-
33
- extra_rdoc_files:
30
+ extra_rdoc_files:
34
31
  - LICENSE
35
32
  - README.markdown
36
- files:
33
+ files:
37
34
  - .document
38
35
  - LICENSE
39
36
  - README.markdown
@@ -42,33 +39,28 @@ files:
42
39
  - lib/library_stdnums.rb
43
40
  - spec/library_stdnums_spec.rb
44
41
  - spec/spec_helper.rb
45
- has_rdoc: true
46
42
  homepage: http://github.com/billdueber/library_stdnums
47
43
  licenses: []
48
-
49
44
  post_install_message:
50
45
  rdoc_options: []
51
-
52
- require_paths:
46
+ require_paths:
53
47
  - lib
54
- required_ruby_version: !ruby/object:Gem::Requirement
48
+ required_ruby_version: !ruby/object:Gem::Requirement
55
49
  none: false
56
- requirements:
57
- - - ">="
58
- - !ruby/object:Gem::Version
59
- version: "0"
60
- required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
55
  none: false
62
- requirements:
63
- - - ">="
64
- - !ruby/object:Gem::Version
65
- version: "0"
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
66
60
  requirements: []
67
-
68
61
  rubyforge_project:
69
- rubygems_version: 1.6.2
62
+ rubygems_version: 1.8.15
70
63
  signing_key:
71
64
  specification_version: 3
72
65
  summary: Normalize and compute checkdigits for ISBN, ISSN, and LCCN
73
66
  test_files: []
74
-