marc 0.8.1 → 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,9 +1,8 @@
1
- require 'ensure_valid_encoding'
1
+ require 'scrub_rb'
2
2
 
3
3
  # Note: requiring 'marc/marc8/to_unicode' below, in #initialize,
4
4
  # only when necessary
5
5
 
6
-
7
6
  module MARC
8
7
  # A class for reading MARC binary (ISO 2709) files.
9
8
  #
@@ -413,7 +412,14 @@ module MARC
413
412
  if params[:internal_encoding]
414
413
  str = str.encode(params[:internal_encoding], params)
415
414
  elsif (params[:invalid] || params[:replace] || (params[:validate_encoding] == true))
416
- str = EnsureValidEncoding.ensure_valid_encoding(str, params)
415
+
416
+ if params[:validate_encoding] == true && ! str.valid_encoding?
417
+ raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
418
+ end
419
+ if params[:invalid] == :replace
420
+ str = str.scrub(params[:replace])
421
+ end
422
+
417
423
  end
418
424
  end
419
425
  return str
@@ -1,3 +1,3 @@
1
1
  module MARC
2
- VERSION = "0.8.1"
2
+ VERSION = "0.8.2"
3
3
  end
@@ -0,0 +1,108 @@
1
+ module MARC
2
+
3
+ # A class for writing MARC records as MARC21.
4
+
5
+ class Writer
6
+
7
+ # the constructor which you must pass a file path
8
+ # or an object that responds to a write message
9
+
10
+ def initialize(file)
11
+ if file.class == String
12
+ @fh = File.new(file,"w")
13
+ elsif file.respond_to?('write')
14
+ @fh = file
15
+ else
16
+ throw "must pass in file name or handle"
17
+ end
18
+ end
19
+
20
+
21
+ # write a record to the file or handle
22
+
23
+ def write(record)
24
+ @fh.write(MARC::Writer.encode(record))
25
+ end
26
+
27
+
28
+ # close underlying filehandle
29
+
30
+ def close
31
+ @fh.close
32
+ end
33
+
34
+
35
+ # a static method that accepts a MARC::Record object
36
+ # and returns the record encoded as MARC21 in transmission format
37
+
38
+ def self.encode(record)
39
+ directory = ''
40
+ fields = ''
41
+ offset = 0
42
+ for field in record.fields
43
+
44
+ # encode the field
45
+ field_data = ''
46
+ if field.class == MARC::DataField
47
+ warn("Warn: Missing indicator") unless field.indicator1 && field.indicator2
48
+ field_data = (field.indicator1 || " ") + (field.indicator2 || " ")
49
+ for s in field.subfields
50
+ field_data += SUBFIELD_INDICATOR + s.code + s.value
51
+ end
52
+ elsif field.class == MARC::ControlField
53
+ field_data = field.value
54
+ end
55
+ field_data += END_OF_FIELD
56
+
57
+ # calculate directory entry for the field
58
+ field_length = (field_data.respond_to?(:bytesize) ?
59
+ field_data.bytesize() :
60
+ field_data.length())
61
+ directory += sprintf("%03s", field.tag) + format_byte_count(field_length, 4) + format_byte_count(offset)
62
+
63
+
64
+ # add field to data for other fields
65
+ fields += field_data
66
+
67
+ # update offset for next field
68
+ offset += field_length
69
+ end
70
+
71
+ # determine the base (leader + directory)
72
+ base = record.leader + directory + END_OF_FIELD
73
+
74
+ # determine complete record
75
+ marc = base + fields + END_OF_RECORD
76
+
77
+ # update leader with the byte offest to the end of the directory
78
+ marc[12..16] = format_byte_count(base.respond_to?(:bytesize) ?
79
+ base.bytesize() :
80
+ base.length()
81
+ )
82
+
83
+ # update the record length
84
+ marc[0..4] = format_byte_count(marc.respond_to?(:bytesize) ?
85
+ marc.bytesize() :
86
+ marc.length()
87
+ )
88
+
89
+ # store updated leader in the record that was passed in
90
+ record.leader = marc[0..LEADER_LENGTH-1]
91
+
92
+ # return encoded marc
93
+ return marc
94
+ end
95
+
96
+ def self.format_byte_count(number, num_digits=5)
97
+ formatted = sprintf("%0#{num_digits}i", number)
98
+ if formatted.length > num_digits
99
+ # uh, oh, we've exceeded our max. Either zero out
100
+ # or raise, depending on settings.
101
+ #formatted = sprintf("%0#{num_digits}i", "")
102
+ formatted = "9" * num_digits
103
+ end
104
+ return formatted
105
+ end
106
+
107
+ end
108
+ end
@@ -0,0 +1,24 @@
1
+ 01161cam a2200289 4500
2
+ 001 178448
3
+ 008 s1996 xx spa d
4
+ 035 $a X!b
5
+ 049 $a JHWV [AV] [NIRC] $n o
6
+ 096 $a WY 20.5 VC6 1996
7
+ 110 2 $a National Institutes of Health (U.S.)
8
+ 110 2 $a National Institute of Nursing Research (U.S.)
9
+ 110 2 $a Department of Health & Human Services (U.S.)
10
+ 245 0 $a Ten years at NIH : $b advancing health through science : the human dimension / $c Patricia A. Grady, Harold Varmus.
11
+ 246 $a 10 years at NIH
12
+ 300 $a 2 videocassettes (229 min.) : $b sd., col. ; $c 1/2 in.
13
+ 520 $a A series of speakers recounts advances in nursing research from 1986 to 1996. ˜
14
+ 538 $a VHS.
15
+ 650 2 $a Nursing Care $x videocassettes
16
+ 650 2 $a Nursing Research $x videocassettes
17
+ 650 2 $a Nursing $x videocassettes
18
+ 700 10 $a Grady, Patricia Anne, $d 1943-
19
+ 700 1 $a Varmus, Harold
20
+ 910 $a 178448 $b Horizon bib#
21
+ 949 31 $7 1 $5 WY 20.5 VC6 1996 $0 26 $0 G $2 A $8 5 $4 1
22
+ 991 $a WY 20.5 VC6 1996 $f nlm $b wnlm $c c. 1 $q 0 $i 3199765 $l wempbk $m elsc
23
+ 991 $a WY 20.5 VC6 1996 $f nlm $b wnlm $c c. 1 $q 0 $i 3199766 $l wempbk $m elsc
24
+
@@ -0,0 +1 @@
1
+ 01161cam a2200289 4500001000700000008004100007035000800048049002400056096002100080110004100101110005100142110004900193245011600242246002000358300005600378520008500434538000900519650003300528650003700561650002800598700003300626700001900659910002500678949004000703991006400743991006400807178448 s1996 xx spa d aX!b aJHWV [AV] [NIRC]no aWY 20.5 VC6 19962 aNational Institutes of Health (U.S.)2 aNational Institute of Nursing Research (U.S.)2 aDepartment of Health & Human Services (U.S.) 0aTen years at NIH :badvancing health through science : the human dimension /cPatricia A. Grady, Harold Varmus. a10 years at NIH a2 videocassettes (229 min.) :bsd., col. ;c1/2 in. aA series of speakers recounts advances in nursing research from 1986 to 1996. ˜ aVHS. 2aNursing Carexvideocassettes 2aNursing Researchxvideocassettes 2aNursingxvideocassettes10aGrady, Patricia Anne,d1943-1 aVarmus, Harold a178448bHorizon bib#31715WY 20.5 VC6 19960260G2A8541 aWY 20.5 VC6 1996fnlmbwnlmcc. 1q0i3199765lwempbkmelsc aWY 20.5 VC6 1996fnlmbwnlmcc. 1q0i3199766lwempbkmelsc
@@ -0,0 +1 @@
1
+ 01161cam a2200289 4500001000700000008004100007035000800048049002400056096002100080110004100101110005100142110004900193245011600242246002000358300005600378520008500434538000900519650003300528650003700561650002800598700003300626700001900659910002500678949004000703991006400743991006400807178448 s1996 xx spa d aX!b aJHWV [AV] [NIRC]no aWY 20.5 VC6 19962 aNational Institutes of Health (U.S.)2 aNational Institute of Nursing Research (U.S.)2 aDepartment of Health & Human Services (U.S.) 0aTen years at NIH :badvancing health through science : the human dimension /cPatricia A. Grady, Harold Varmus. a10 years at NIH a2 videocassettes (229 min.) :bsd., col. ;c1/2 in. aA series of speakers recounts advances in nursing research from 1986 to 1996. ˜ aVHS. 2aNursing Carexvideocassettes 2aNursing Researchxvideocassettes 2aNursingxvideocassettes10aGrady, Patricia Anne,d1943-1 aVarmus, Harold a178448bHorizon bib#31715WY 20.5 VC6 19960260G2A8541 aWY 20.5 VC6 1996fnlmbwnlmcc. 1q0i3199765lwempbkmelsc aWY 20.5 VC6 1996fnlmbwnlmcc. 1q0i3199766lwempbkmelsc
@@ -0,0 +1 @@
1
+ 01998cam a2200469 a 4500001000800000005001700008008004100025020001800066020001500084024001800099024001200117035001200129035001700141035002100158040003200179042000800211049000900219050002100228066000700249100002900256245010000285246008400385260015000469300002100619504006400640541010900704600004100813600004700854650002500901650002300926650002900949650002500978752002201003880003701025880013001062880017301192910002601365936002701391938003801418991006001456994001201516408398520120302131100.0110313s2011 is b 001 0 heb c a9789651321337 a96513213348 a00032002131958 a3221319 a4083985 aocn710973037 a(OCoLC)710973037 aWEINBcWEINBdHLSdIXAdCUY apcc aJHEE 4aDS149b.R38 2011 c(21 6880-01aRatsabi, Shalom.106880-02aAnarkhizm be-"Tsiyon" :bben Marṭin Buber le-Aharon Daṿid Gordon /cShalom Ratsabi.1 iTitle on t.p. verso:aAnarchy in "Zion" :bbetween Martin Buber and A.D. Gordon 6880-03a[Tel Aviv] :bʻAm ʻoved :bha-Makhon le-ḥeḳer ha-Tsiyonut ve-Yiśra'el ʻa. sh. Ḥayim Ṿaitsman, Universiṭat Tel Aviv,cc2011. a339 p. ;c23 cm. aIncludes bibliographical references (p. 320-330) and index. 3Eisenhower copy:cPurchased with support from the National Endowment for the Humanities;dFY2012.5MdBJ.10aBuber, Martin,d1878-1965xReligion.10aGordon, Aaron David,d1856-1922xReligion. 0aZionism and Judaism. 0aReligious Zionism. 0aZionismxHistoriography. 0aZionismxPhilosophy. aIsraeldTel Aviv.1 6100-01/(2/raרצבי, שלום.106245-02/(2/raאנרכיזם ב״ציון״ :bבין מרטין בובר לאהרן דוד גורדון /cשלום רצבי. 6260-03/(2/ra[תל אביב] :bעם עובד :bהמכון לחקר הציונות וישראל עʺש חיים ויצמן, אוניברסיטת תל אביב,cc2011. a4083985bHorizon bib# aPR 747581025 741225747 aA.I. WeinbergbWEINnwb2011369996 aDS149.R38 2011flcbelccc. 1q0i6085034lemainmemsel aC0bJHE
@@ -0,0 +1,40 @@
1
+ 01161cam a2200289 4500
2
+ 001 178448
3
+ 008 s1996 xx spa d
4
+ 035 $a X!b
5
+ 049 $a JHWV [AV] [NIRC] $n o
6
+ 096 $a WY 20.5 VC6 1996
7
+ 110 2 $a National Institutes of Health (U.S.)
8
+ 110 2 $a National Institute of Nursing Research (U.S.)#
9
+ (No separator at end of field length=51)
10
+ (Bad indicator data. Skipping 2 bytes)
11
+ 110 2 $a Department of Health & Human Services (U.S.)
12
+ (Bad indicator data. Skipping 2 bytes)
13
+ 245 0 $a Ten years at NIH : $b advancing health through science : the human dimension / $c Patricia A. Grady, Harold Varmus.
14
+ (Bad indicator data. Skipping 2 bytes)
15
+ 246 $a 10 years at NIH
16
+ (Bad indicator data. Skipping 2 bytes)
17
+ 300 $a 2 videocassettes (229 min.) : $b sd., col. ; $c 1/2 in.
18
+ (Bad indicator data. Skipping 2 bytes)
19
+ 520 $a A series of speakers recounts advances in nursing research from 1986 to 1996. ˜
20
+ (Bad indicator data. Skipping 2 bytes)
21
+ 538 $a VHS.
22
+ (Bad indicator data. Skipping 2 bytes)
23
+ 650 2 $a Nursing Care $x videocassettes
24
+ (Bad indicator data. Skipping 2 bytes)
25
+ 650 2 $a Nursing Research $x videocassettes
26
+ (Bad indicator data. Skipping 2 bytes)
27
+ 650 2 $a Nursing $x videocassettes
28
+ (Bad indicator data. Skipping 2 bytes)
29
+ 700 10 $a Grady, Patricia Anne, $d 1943-
30
+ (Bad indicator data. Skipping 2 bytes)
31
+ 700 1 $a Varmus, Harold
32
+ (Bad indicator data. Skipping 2 bytes)
33
+ 910 $a 178448 $b Horizon bib#
34
+ (Bad indicator data. Skipping 2 bytes)
35
+ 949 31 $7 1 $5 WY 20.5 VC6 1996 $0 26 $0 G $2 A $8 5 $4 1
36
+ (Bad indicator data. Skipping 2 bytes)
37
+ 991 $a WY 20.5 VC6 1996 $f nlm $b wnlm $c c. 1 $q 0 $i 3199765 $l wempbk $m elsc
38
+ (Bad indicator data. Skipping 2 bytes)
39
+ 991 $a WY 20.5 VC6 1996 $f nlm $b wnlm $c c. 1 $q 0 $i 3199766 $l wempbk $m elsc
40
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.8.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -14,7 +14,7 @@ authors:
14
14
  autorequire: marc
15
15
  bindir: bin
16
16
  cert_chain: []
17
- date: 2013-11-26 00:00:00.000000000 Z
17
+ date: 2014-09-16 00:00:00.000000000 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: ensure_valid_encoding
@@ -32,6 +32,28 @@ dependencies:
32
32
  - - ! '>='
33
33
  - !ruby/object:Gem::Version
34
34
  version: '0'
35
+ - !ruby/object:Gem::Dependency
36
+ name: scrub_rb
37
+ requirement: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: 1.0.1
43
+ - - <
44
+ - !ruby/object:Gem::Version
45
+ version: '2'
46
+ type: :runtime
47
+ prerelease: false
48
+ version_requirements: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 1.0.1
54
+ - - <
55
+ - !ruby/object:Gem::Version
56
+ version: '2'
35
57
  - !ruby/object:Gem::Dependency
36
58
  name: unf
37
59
  requirement: !ruby/object:Gem::Requirement
@@ -65,6 +87,7 @@ files:
65
87
  - lib/marc/record.rb
66
88
  - lib/marc/subfield.rb
67
89
  - lib/marc/version.rb
90
+ - lib/marc/writer-NEW.rb
68
91
  - lib/marc/writer.rb
69
92
  - lib/marc/xml_parsers.rb
70
93
  - lib/marc/xmlreader.rb
@@ -73,14 +96,19 @@ files:
73
96
  - test/bad_eacc_encoding.marc8.marc
74
97
  - test/batch.dat
75
98
  - test/batch.xml
99
+ - test/bib178448.okay.human
100
+ - test/bib178448.okay.marc
101
+ - test/bib178448.writtenout.marc
76
102
  - test/cp866_multirecord.marc
77
103
  - test/cp866_unimarc.marc
78
104
  - test/escaped_character_reference.marc8.marc
105
+ - test/hebrew880s.marc
79
106
  - test/marc8/data/test_marc8.txt
80
107
  - test/marc8/data/test_utf8.txt
81
108
  - test/marc8/tc_marc8_mapping.rb
82
109
  - test/marc8/tc_to_unicode.rb
83
110
  - test/marc8_accented_chars.marc
111
+ - test/marc_with_bad_utf8.utf8.human
84
112
  - test/marc_with_bad_utf8.utf8.marc
85
113
  - test/no-leading-zero.xml
86
114
  - test/non-numeric.dat