marc 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
- require 'ensure_valid_encoding'
1
+ require 'scrub_rb'
2
2
 
3
3
  # Note: requiring 'marc/marc8/to_unicode' below, in #initialize,
4
4
  # only when necessary
5
5
 
6
-
7
6
  module MARC
8
7
  # A class for reading MARC binary (ISO 2709) files.
9
8
  #
@@ -413,7 +412,14 @@ module MARC
413
412
  if params[:internal_encoding]
414
413
  str = str.encode(params[:internal_encoding], params)
415
414
  elsif (params[:invalid] || params[:replace] || (params[:validate_encoding] == true))
416
- str = EnsureValidEncoding.ensure_valid_encoding(str, params)
415
+
416
+ if params[:validate_encoding] == true && ! str.valid_encoding?
417
+ raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
418
+ end
419
+ if params[:invalid] == :replace
420
+ str = str.scrub(params[:replace])
421
+ end
422
+
417
423
  end
418
424
  end
419
425
  return str
@@ -1,3 +1,3 @@
1
1
  module MARC
2
- VERSION = "0.8.1"
2
+ VERSION = "0.8.2"
3
3
  end
@@ -0,0 +1,108 @@
1
+ module MARC
2
+
3
+ # A class for writing MARC records as MARC21.
4
+
5
+ class Writer
6
+
7
+ # the constructor which you must pass a file path
8
+ # or an object that responds to a write message
9
+
10
+ def initialize(file)
11
+ if file.class == String
12
+ @fh = File.new(file,"w")
13
+ elsif file.respond_to?('write')
14
+ @fh = file
15
+ else
16
+ throw "must pass in file name or handle"
17
+ end
18
+ end
19
+
20
+
21
+ # write a record to the file or handle
22
+
23
+ def write(record)
24
+ @fh.write(MARC::Writer.encode(record))
25
+ end
26
+
27
+
28
+ # close underlying filehandle
29
+
30
+ def close
31
+ @fh.close
32
+ end
33
+
34
+
35
+ # a static method that accepts a MARC::Record object
36
+ # and returns the record encoded as MARC21 in transmission format
37
+
38
+ def self.encode(record)
39
+ directory = ''
40
+ fields = ''
41
+ offset = 0
42
+ for field in record.fields
43
+
44
+ # encode the field
45
+ field_data = ''
46
+ if field.class == MARC::DataField
47
+ warn("Warn: Missing indicator") unless field.indicator1 && field.indicator2
48
+ field_data = (field.indicator1 || " ") + (field.indicator2 || " ")
49
+ for s in field.subfields
50
+ field_data += SUBFIELD_INDICATOR + s.code + s.value
51
+ end
52
+ elsif field.class == MARC::ControlField
53
+ field_data = field.value
54
+ end
55
+ field_data += END_OF_FIELD
56
+
57
+ # calculate directory entry for the field
58
+ field_length = (field_data.respond_to?(:bytesize) ?
59
+ field_data.bytesize() :
60
+ field_data.length())
61
+ directory += sprintf("%03s", field.tag) + format_byte_count(field_length, 4) + format_byte_count(offset)
62
+
63
+
64
+ # add field to data for other fields
65
+ fields += field_data
66
+
67
+ # update offset for next field
68
+ offset += field_length
69
+ end
70
+
71
+ # determine the base (leader + directory)
72
+ base = record.leader + directory + END_OF_FIELD
73
+
74
+ # determine complete record
75
+ marc = base + fields + END_OF_RECORD
76
+
77
+ # update leader with the byte offest to the end of the directory
78
+ marc[12..16] = format_byte_count(base.respond_to?(:bytesize) ?
79
+ base.bytesize() :
80
+ base.length()
81
+ )
82
+
83
+ # update the record length
84
+ marc[0..4] = format_byte_count(marc.respond_to?(:bytesize) ?
85
+ marc.bytesize() :
86
+ marc.length()
87
+ )
88
+
89
+ # store updated leader in the record that was passed in
90
+ record.leader = marc[0..LEADER_LENGTH-1]
91
+
92
+ # return encoded marc
93
+ return marc
94
+ end
95
+
96
+ def self.format_byte_count(number, num_digits=5)
97
+ formatted = sprintf("%0#{num_digits}i", number)
98
+ if formatted.length > num_digits
99
+ # uh, oh, we've exceeded our max. Either zero out
100
+ # or raise, depending on settings.
101
+ #formatted = sprintf("%0#{num_digits}i", "")
102
+ formatted = "9" * num_digits
103
+ end
104
+ return formatted
105
+ end
106
+
107
+ end
108
+ end
@@ -0,0 +1,24 @@
1
+ 01161cam a2200289 4500
2
+ 001 178448
3
+ 008 s1996 xx spa d
4
+ 035 $a X!b
5
+ 049 $a JHWV [AV] [NIRC] $n o
6
+ 096 $a WY 20.5 VC6 1996
7
+ 110 2 $a National Institutes of Health (U.S.)
8
+ 110 2 $a National Institute of Nursing Research (U.S.)
9
+ 110 2 $a Department of Health & Human Services (U.S.)
10
+ 245 0 $a Ten years at NIH : $b advancing health through science : the human dimension / $c Patricia A. Grady, Harold Varmus.
11
+ 246 $a 10 years at NIH
12
+ 300 $a 2 videocassettes (229 min.) : $b sd., col. ; $c 1/2 in.
13
+ 520 $a A series of speakers recounts advances in nursing research from 1986 to 1996. ˜
14
+ 538 $a VHS.
15
+ 650 2 $a Nursing Care $x videocassettes
16
+ 650 2 $a Nursing Research $x videocassettes
17
+ 650 2 $a Nursing $x videocassettes
18
+ 700 10 $a Grady, Patricia Anne, $d 1943-
19
+ 700 1 $a Varmus, Harold
20
+ 910 $a 178448 $b Horizon bib#
21
+ 949 31 $7 1 $5 WY 20.5 VC6 1996 $0 26 $0 G $2 A $8 5 $4 1
22
+ 991 $a WY 20.5 VC6 1996 $f nlm $b wnlm $c c. 1 $q 0 $i 3199765 $l wempbk $m elsc
23
+ 991 $a WY 20.5 VC6 1996 $f nlm $b wnlm $c c. 1 $q 0 $i 3199766 $l wempbk $m elsc
24
+
@@ -0,0 +1 @@
1
+ 01161cam a2200289 4500001000700000008004100007035000800048049002400056096002100080110004100101110005100142110004900193245011600242246002000358300005600378520008500434538000900519650003300528650003700561650002800598700003300626700001900659910002500678949004000703991006400743991006400807178448 s1996 xx spa d aX!b aJHWV [AV] [NIRC]no aWY 20.5 VC6 19962 aNational Institutes of Health (U.S.)2 aNational Institute of Nursing Research (U.S.)2 aDepartment of Health & Human Services (U.S.) 0aTen years at NIH :badvancing health through science : the human dimension /cPatricia A. Grady, Harold Varmus. a10 years at NIH a2 videocassettes (229 min.) :bsd., col. ;c1/2 in. aA series of speakers recounts advances in nursing research from 1986 to 1996. ˜ aVHS. 2aNursing Carexvideocassettes 2aNursing Researchxvideocassettes 2aNursingxvideocassettes10aGrady, Patricia Anne,d1943-1 aVarmus, Harold a178448bHorizon bib#31715WY 20.5 VC6 19960260G2A8541 aWY 20.5 VC6 1996fnlmbwnlmcc. 1q0i3199765lwempbkmelsc aWY 20.5 VC6 1996fnlmbwnlmcc. 1q0i3199766lwempbkmelsc
@@ -0,0 +1 @@
1
+ 01161cam a2200289 4500001000700000008004100007035000800048049002400056096002100080110004100101110005100142110004900193245011600242246002000358300005600378520008500434538000900519650003300528650003700561650002800598700003300626700001900659910002500678949004000703991006400743991006400807178448 s1996 xx spa d aX!b aJHWV [AV] [NIRC]no aWY 20.5 VC6 19962 aNational Institutes of Health (U.S.)2 aNational Institute of Nursing Research (U.S.)2 aDepartment of Health & Human Services (U.S.) 0aTen years at NIH :badvancing health through science : the human dimension /cPatricia A. Grady, Harold Varmus. a10 years at NIH a2 videocassettes (229 min.) :bsd., col. ;c1/2 in. aA series of speakers recounts advances in nursing research from 1986 to 1996. ˜ aVHS. 2aNursing Carexvideocassettes 2aNursing Researchxvideocassettes 2aNursingxvideocassettes10aGrady, Patricia Anne,d1943-1 aVarmus, Harold a178448bHorizon bib#31715WY 20.5 VC6 19960260G2A8541 aWY 20.5 VC6 1996fnlmbwnlmcc. 1q0i3199765lwempbkmelsc aWY 20.5 VC6 1996fnlmbwnlmcc. 1q0i3199766lwempbkmelsc
@@ -0,0 +1 @@
1
+ 01998cam a2200469 a 4500001000800000005001700008008004100025020001800066020001500084024001800099024001200117035001200129035001700141035002100158040003200179042000800211049000900219050002100228066000700249100002900256245010000285246008400385260015000469300002100619504006400640541010900704600004100813600004700854650002500901650002300926650002900949650002500978752002201003880003701025880013001062880017301192910002601365936002701391938003801418991006001456994001201516408398520120302131100.0110313s2011 is b 001 0 heb c a9789651321337 a96513213348 a00032002131958 a3221319 a4083985 aocn710973037 a(OCoLC)710973037 aWEINBcWEINBdHLSdIXAdCUY apcc aJHEE 4aDS149b.R38 2011 c(21 6880-01aRatsabi, Shalom.106880-02aAnarkhizm be-"Tsiyon" :bben Marṭin Buber le-Aharon Daṿid Gordon /cShalom Ratsabi.1 iTitle on t.p. verso:aAnarchy in "Zion" :bbetween Martin Buber and A.D. Gordon 6880-03a[Tel Aviv] :bʻAm ʻoved :bha-Makhon le-ḥeḳer ha-Tsiyonut ve-Yiśra'el ʻa. sh. Ḥayim Ṿaitsman, Universiṭat Tel Aviv,cc2011. a339 p. ;c23 cm. aIncludes bibliographical references (p. 320-330) and index. 3Eisenhower copy:cPurchased with support from the National Endowment for the Humanities;dFY2012.5MdBJ.10aBuber, Martin,d1878-1965xReligion.10aGordon, Aaron David,d1856-1922xReligion. 0aZionism and Judaism. 0aReligious Zionism. 0aZionismxHistoriography. 0aZionismxPhilosophy. aIsraeldTel Aviv.1 6100-01/(2/raרצבי, שלום.106245-02/(2/raאנרכיזם ב״ציון״ :bבין מרטין בובר לאהרן דוד גורדון /cשלום רצבי. 6260-03/(2/ra[תל אביב] :bעם עובד :bהמכון לחקר הציונות וישראל עʺש חיים ויצמן, אוניברסיטת תל אביב,cc2011. a4083985bHorizon bib# aPR 747581025 741225747 aA.I. WeinbergbWEINnwb2011369996 aDS149.R38 2011flcbelccc. 1q0i6085034lemainmemsel aC0bJHE
@@ -0,0 +1,40 @@
1
+ 01161cam a2200289 4500
2
+ 001 178448
3
+ 008 s1996 xx spa d
4
+ 035 $a X!b
5
+ 049 $a JHWV [AV] [NIRC] $n o
6
+ 096 $a WY 20.5 VC6 1996
7
+ 110 2 $a National Institutes of Health (U.S.)
8
+ 110 2 $a National Institute of Nursing Research (U.S.)#
9
+ (No separator at end of field length=51)
10
+ (Bad indicator data. Skipping 2 bytes)
11
+ 110 2 $a Department of Health & Human Services (U.S.)
12
+ (Bad indicator data. Skipping 2 bytes)
13
+ 245 0 $a Ten years at NIH : $b advancing health through science : the human dimension / $c Patricia A. Grady, Harold Varmus.
14
+ (Bad indicator data. Skipping 2 bytes)
15
+ 246 $a 10 years at NIH
16
+ (Bad indicator data. Skipping 2 bytes)
17
+ 300 $a 2 videocassettes (229 min.) : $b sd., col. ; $c 1/2 in.
18
+ (Bad indicator data. Skipping 2 bytes)
19
+ 520 $a A series of speakers recounts advances in nursing research from 1986 to 1996. ˜
20
+ (Bad indicator data. Skipping 2 bytes)
21
+ 538 $a VHS.
22
+ (Bad indicator data. Skipping 2 bytes)
23
+ 650 2 $a Nursing Care $x videocassettes
24
+ (Bad indicator data. Skipping 2 bytes)
25
+ 650 2 $a Nursing Research $x videocassettes
26
+ (Bad indicator data. Skipping 2 bytes)
27
+ 650 2 $a Nursing $x videocassettes
28
+ (Bad indicator data. Skipping 2 bytes)
29
+ 700 10 $a Grady, Patricia Anne, $d 1943-
30
+ (Bad indicator data. Skipping 2 bytes)
31
+ 700 1 $a Varmus, Harold
32
+ (Bad indicator data. Skipping 2 bytes)
33
+ 910 $a 178448 $b Horizon bib#
34
+ (Bad indicator data. Skipping 2 bytes)
35
+ 949 31 $7 1 $5 WY 20.5 VC6 1996 $0 26 $0 G $2 A $8 5 $4 1
36
+ (Bad indicator data. Skipping 2 bytes)
37
+ 991 $a WY 20.5 VC6 1996 $f nlm $b wnlm $c c. 1 $q 0 $i 3199765 $l wempbk $m elsc
38
+ (Bad indicator data. Skipping 2 bytes)
39
+ 991 $a WY 20.5 VC6 1996 $f nlm $b wnlm $c c. 1 $q 0 $i 3199766 $l wempbk $m elsc
40
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.8.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -14,7 +14,7 @@ authors:
14
14
  autorequire: marc
15
15
  bindir: bin
16
16
  cert_chain: []
17
- date: 2013-11-26 00:00:00.000000000 Z
17
+ date: 2014-09-16 00:00:00.000000000 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: ensure_valid_encoding
@@ -32,6 +32,28 @@ dependencies:
32
32
  - - ! '>='
33
33
  - !ruby/object:Gem::Version
34
34
  version: '0'
35
+ - !ruby/object:Gem::Dependency
36
+ name: scrub_rb
37
+ requirement: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: 1.0.1
43
+ - - <
44
+ - !ruby/object:Gem::Version
45
+ version: '2'
46
+ type: :runtime
47
+ prerelease: false
48
+ version_requirements: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 1.0.1
54
+ - - <
55
+ - !ruby/object:Gem::Version
56
+ version: '2'
35
57
  - !ruby/object:Gem::Dependency
36
58
  name: unf
37
59
  requirement: !ruby/object:Gem::Requirement
@@ -65,6 +87,7 @@ files:
65
87
  - lib/marc/record.rb
66
88
  - lib/marc/subfield.rb
67
89
  - lib/marc/version.rb
90
+ - lib/marc/writer-NEW.rb
68
91
  - lib/marc/writer.rb
69
92
  - lib/marc/xml_parsers.rb
70
93
  - lib/marc/xmlreader.rb
@@ -73,14 +96,19 @@ files:
73
96
  - test/bad_eacc_encoding.marc8.marc
74
97
  - test/batch.dat
75
98
  - test/batch.xml
99
+ - test/bib178448.okay.human
100
+ - test/bib178448.okay.marc
101
+ - test/bib178448.writtenout.marc
76
102
  - test/cp866_multirecord.marc
77
103
  - test/cp866_unimarc.marc
78
104
  - test/escaped_character_reference.marc8.marc
105
+ - test/hebrew880s.marc
79
106
  - test/marc8/data/test_marc8.txt
80
107
  - test/marc8/data/test_utf8.txt
81
108
  - test/marc8/tc_marc8_mapping.rb
82
109
  - test/marc8/tc_to_unicode.rb
83
110
  - test/marc8_accented_chars.marc
111
+ - test/marc_with_bad_utf8.utf8.human
84
112
  - test/marc_with_bad_utf8.utf8.marc
85
113
  - test/no-leading-zero.xml
86
114
  - test/non-numeric.dat