marc 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ require 'marc/constants'
1
2
  require 'marc/record'
2
3
  require 'marc/field'
3
4
  require 'marc/control'
@@ -5,4 +6,5 @@ require 'marc/subfield'
5
6
  require 'marc/reader'
6
7
  require 'marc/writer'
7
8
  require 'marc/exception'
8
- require 'marc/marc21'
9
+ require 'marc/xmlwriter'
10
+ require 'marc/xmlreader'
@@ -0,0 +1,14 @@
1
+ module MARC
2
+
3
+ # constants used in MARC21 reading/writing
4
+ LEADER_LENGTH = 24
5
+ DIRECTORY_ENTRY_LENGTH = 12
6
+ SUBFIELD_INDICATOR = 0x1F.chr
7
+ END_OF_FIELD = 0x1E.chr
8
+ END_OF_RECORD = 0x1D.chr
9
+
10
+ # constants used in XML reading/writing
11
+ MARC_NS = "http://www.loc.gov/MARC21/slim"
12
+ MARC_XSD = "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
13
+
14
+ end
@@ -1,36 +1,36 @@
1
1
  module MARC
2
2
 
3
- # A class for representing fields with a tag less than 010.
4
- # Ordinary MARC::Field objects are for fields with tags >= 010
5
- # which have indicators and subfields.
3
+ # A class for representing fields with a tag less than 010.
4
+ # Ordinary MARC::Field objects are for fields with tags >= 010
5
+ # which have indicators and subfields.
6
6
 
7
- class Control
7
+ class Control
8
8
 
9
- # the tag value (007, 008, etc)
10
- attr_accessor :tag
9
+ # the tag value (007, 008, etc)
10
+ attr_accessor :tag
11
11
 
12
- # the value of the control field
13
- attr_accessor :value
12
+ # the value of the control field
13
+ attr_accessor :value
14
14
 
15
- # The constructor which must be passed a tag value and
16
- # an optional value for the field.
15
+ # The constructor which must be passed a tag value and
16
+ # an optional value for the field.
17
17
 
18
- def initialize(tag,value='')
19
- @tag = tag
20
- @value = value
21
- if tag.to_i > 9
22
- raise MARC::Exception.new(), "tag must be greater than 009"
23
- end
24
- end
25
-
26
- def to_s
27
- return "#{tag} #{value}"
28
- end
18
+ def initialize(tag,value='')
19
+ @tag = tag
20
+ @value = value
21
+ if tag.to_i > 9
22
+ raise MARC::Exception.new(), "tag must be greater than 009"
23
+ end
24
+ end
29
25
 
30
- def =~(regex)
31
- return self.to_s =~ regex
32
- end
26
+ def to_s
27
+ return "#{tag} #{value}"
28
+ end
33
29
 
30
+ def =~(regex)
31
+ return self.to_s =~ regex
34
32
  end
35
33
 
34
+ end
35
+
36
36
  end
@@ -1,9 +1,9 @@
1
1
  module MARC
2
2
 
3
- # basic exception class for exceptions that
4
- # can occur during MARC processing.
3
+ # basic exception class for exceptions that
4
+ # can occur during MARC processing.
5
5
 
6
- class Exception < RuntimeError
7
- end
6
+ class Exception < RuntimeError
7
+ end
8
8
 
9
9
  end
@@ -3,141 +3,141 @@ require 'marc/record'
3
3
 
4
4
  module MARC
5
5
 
6
- # MARC records are made up of fields, each of which has a tag,
7
- # indicators and subfields. If the tag is between 000-009 it is
8
- # known as a control field, and actually does not have any
9
- # indicators.
10
-
11
- class Field
12
- include Enumerable
13
-
14
- # The tag for the field
15
- attr_accessor :tag
16
-
17
- # The first indicator
18
- attr_accessor :indicator1
19
-
20
- # The second indicator
21
- attr_accessor :indicator2
22
-
23
- # A list of MARC::Subfield objects
24
- attr_accessor :subfields
25
-
26
-
27
- # Create a new field with tag, indicators and subfields.
28
- # Subfields are passed in as comma separated list of
29
- # MARC::Subfield objects,
30
- #
31
- # field = MARC::Field.new('245','0','0',
32
- # MARC::Subfield.new('a', 'Consilience :'),
33
- # MARC::Subfield.new('b', 'the unity of knowledge ',
34
- # MARC::Subfield.new('c', 'by Edward O. Wilson.'))
35
- #
36
- # or using a shorthand:
37
- #
38
- # field = MARC::Field.new('245','0','0',
39
- # ['a', 'Consilience :'],['b','the unity of knowledge ',
40
- # ['c', 'by Edward O. Wilson.'] )
41
-
42
- def initialize(tag, i1=' ', i2=' ', *subfields)
43
- @tag = tag
44
- # can't allow nil to be passed in or else it'll
45
- # screw us up later when we try to encode
46
- @indicator1 = i1 == nil ? ' ' : i1
47
- @indicator2 = i2 == nil ? ' ' : i2
48
- @subfields = []
49
-
50
- # must use MARC::ControlField for tags < 010
51
- if @tag.to_i < 10
52
- raise MARC::Exception.new(),
53
- "MARC::Field objects can't have tags < 010"
54
- end
55
-
56
- # allows MARC::Subfield objects to be passed directly
57
- # or a shorthand of ['a','Foo'], ['b','Bar']
58
- subfields.each do |subfield|
59
- case subfield
60
- when MARC::Subfield
61
- @subfields.push(subfield)
62
- when Array
63
- if subfield.length > 2
64
- raise MARC::Exception.new(),
65
- "arrays must only have 2 elements"
66
- end
67
- @subfields.push(
68
- MARC::Subfield.new(subfield[0],subfield[1]))
69
- else
70
- raise MARC::Exception.new(),
71
- "invalid subfield type #{subfield.class}"
72
- end
73
- end
6
+ # MARC records are made up of fields, each of which has a tag,
7
+ # indicators and subfields. If the tag is between 000-009 it is
8
+ # known as a control field, and actually does not have any
9
+ # indicators.
10
+
11
+ class Field
12
+ include Enumerable
13
+
14
+ # The tag for the field
15
+ attr_accessor :tag
16
+
17
+ # The first indicator
18
+ attr_accessor :indicator1
19
+
20
+ # The second indicator
21
+ attr_accessor :indicator2
22
+
23
+ # A list of MARC::Subfield objects
24
+ attr_accessor :subfields
25
+
26
+
27
+ # Create a new field with tag, indicators and subfields.
28
+ # Subfields are passed in as comma separated list of
29
+ # MARC::Subfield objects,
30
+ #
31
+ # field = MARC::Field.new('245','0','0',
32
+ # MARC::Subfield.new('a', 'Consilience :'),
33
+ # MARC::Subfield.new('b', 'the unity of knowledge ',
34
+ # MARC::Subfield.new('c', 'by Edward O. Wilson.'))
35
+ #
36
+ # or using a shorthand:
37
+ #
38
+ # field = MARC::Field.new('245','0','0',
39
+ # ['a', 'Consilience :'],['b','the unity of knowledge ',
40
+ # ['c', 'by Edward O. Wilson.'] )
41
+
42
+ def initialize(tag, i1=' ', i2=' ', *subfields)
43
+ @tag = tag
44
+ # can't allow nil to be passed in or else it'll
45
+ # screw us up later when we try to encode
46
+ @indicator1 = i1 == nil ? ' ' : i1
47
+ @indicator2 = i2 == nil ? ' ' : i2
48
+ @subfields = []
49
+
50
+ # must use MARC::ControlField for tags < 010
51
+ if @tag.to_i < 10
52
+ raise MARC::Exception.new(),
53
+ "MARC::Field objects can't have tags < 010"
54
+ end
55
+
56
+ # allows MARC::Subfield objects to be passed directly
57
+ # or a shorthand of ['a','Foo'], ['b','Bar']
58
+ subfields.each do |subfield|
59
+ case subfield
60
+ when MARC::Subfield
61
+ @subfields.push(subfield)
62
+ when Array
63
+ if subfield.length > 2
64
+ raise MARC::Exception.new(),
65
+ "arrays must only have 2 elements"
66
+ end
67
+ @subfields.push(
68
+ MARC::Subfield.new(subfield[0],subfield[1]))
69
+ else
70
+ raise MARC::Exception.new(),
71
+ "invalid subfield type #{subfield.class}"
74
72
  end
73
+ end
74
+ end
75
75
 
76
76
 
77
- # Returns a string representation of the field such as:
78
- # 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
77
+ # Returns a string representation of the field such as:
78
+ # 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
79
79
 
80
- def to_s
81
- str = "#{tag} "
82
- str += "#{indicator1}#{indicator2} "
83
- @subfields.each { |subfield| str += subfield.to_s }
84
- return str
85
- end
80
+ def to_s
81
+ str = "#{tag} "
82
+ str += "#{indicator1}#{indicator2} "
83
+ @subfields.each { |subfield| str += subfield.to_s }
84
+ return str
85
+ end
86
86
 
87
87
 
88
- # Add a subfield (MARC::Subfield) to the field
89
- # field.append(MARC::Subfield('a','Dave Thomas'))
90
-
91
- def append(subfield)
92
- @subfields.push(subfield)
93
- end
88
+ # Add a subfield (MARC::Subfield) to the field
89
+ # field.append(MARC::Subfield('a','Dave Thomas'))
94
90
 
91
+ def append(subfield)
92
+ @subfields.push(subfield)
93
+ end
95
94
 
96
- # You can iterate through the subfields in a Field:
97
- # field.each {|s| print s}
98
95
 
99
- def each
100
- for subfield in subfields
101
- yield subfield
102
- end
103
- end
96
+ # You can iterate through the subfields in a Field:
97
+ # field.each {|s| print s}
104
98
 
99
+ def each
100
+ for subfield in subfields
101
+ yield subfield
102
+ end
103
+ end
105
104
 
106
- # You can lookup subfields with this shorthand. Note it
107
- # will return a string and not a MARC::Subfield object.
108
- # subfield = field['a']
109
-
110
- def [](code)
111
- subfield = self.find {|s| s.code == code}
112
- return subfield.value if subfield
113
- return
114
- end
115
105
 
106
+ # You can lookup subfields with this shorthand. Note it
107
+ # will return a string and not a MARC::Subfield object.
108
+ # subfield = field['a']
109
+
110
+ def [](code)
111
+ subfield = self.find {|s| s.code == code}
112
+ return subfield.value if subfield
113
+ return
114
+ end
116
115
 
117
- # Two fields are equal if their tag, indicators and
118
- # subfields are all equal.
119
-
120
- def ==(other)
121
- if @tag != other.tag
122
- return false
123
- elsif @indicator1 != other.indicator1
124
- return false
125
- elsif @indicator2 != other.indicator2
126
- return false
127
- elsif @subfields != other.subfields
128
- return false
129
- end
130
- return true
131
- end
132
116
 
117
+ # Two fields are equal if their tag, indicators and
118
+ # subfields are all equal.
119
+
120
+ def ==(other)
121
+ if @tag != other.tag
122
+ return false
123
+ elsif @indicator1 != other.indicator1
124
+ return false
125
+ elsif @indicator2 != other.indicator2
126
+ return false
127
+ elsif @subfields != other.subfields
128
+ return false
129
+ end
130
+ return true
131
+ end
133
132
 
134
- # To support regex matching with fields
135
- #
136
- # if field =~ /Huckleberry/ ...
137
133
 
138
- def =~(regex)
139
- return self.to_s =~ regex
140
- end
134
+ # To support regex matching with fields
135
+ #
136
+ # if field =~ /Huckleberry/ ...
141
137
 
138
+ def =~(regex)
139
+ return self.to_s =~ regex
142
140
  end
141
+
142
+ end
143
143
  end
@@ -1,92 +1,177 @@
1
1
  module MARC
2
2
 
3
- class Reader
4
- include Enumerable
5
-
6
- # The constructor which you may pass either a path
7
- #
8
- # reader = MARC::Reader.new('marc.dat')
9
- #
10
- # or, if it's more convenient a File object:
11
- #
12
- # fh = File.new('marc.dat')
13
- # reader = MARC::Reader.new(fh)
14
- #
15
- # or really any object that responds to read(n).
16
-
17
- def initialize(file)
18
- if file.class == String:
19
- @handle = File.new(file)
20
- elsif file.respond_to?("read", 5)
21
- @handle = file
22
- else
23
- throw "must pass in path or file"
24
- end
25
- end
26
-
27
- # to support iteration:
28
- # for record in reader
29
- # print record
30
- # end
31
- #
32
- # and even searching:
33
- # record.find { |f| f['245'] =~ /Huckleberry/ }
34
-
35
- def each
36
- # while there is data left in the file
37
- while length = @handle.read(5)
38
-
39
- # get the raw MARC21 for a record back from the file
40
- # using the record length
41
- raw = length + @handle.read(length.to_i-5)
42
-
43
- # create a record from the data and return it
44
- record = MARC::Record.new_from_marc(raw)
45
- yield record
46
- end
47
- end
3
+ class Reader
4
+ include Enumerable
5
+
6
+ # The constructor which you may pass either a path
7
+ #
8
+ # reader = MARC::Reader.new('marc.dat')
9
+ #
10
+ # or, if it's more convenient a File object:
11
+ #
12
+ # fh = File.new('marc.dat')
13
+ # reader = MARC::Reader.new(fh)
14
+ #
15
+ # or really any object that responds to read(n).
16
+
17
+ def initialize(file)
18
+ if file.class == String:
19
+ @handle = File.new(file)
20
+ elsif file.respond_to?("read", 5)
21
+ @handle = file
22
+ else
23
+ throw "must pass in path or file"
24
+ end
25
+ end
48
26
 
27
+ # to support iteration:
28
+ # for record in reader
29
+ # print record
30
+ # end
31
+ #
32
+ # and even searching:
33
+ # record.find { |f| f['245'] =~ /Huckleberry/ }
34
+
35
+ def each
36
+ # while there is data left in the file
37
+ while length = @handle.read(5)
38
+
39
+ # get the raw MARC21 for a record back from the file
40
+ # using the record length
41
+ raw = length + @handle.read(length.to_i-5)
42
+
43
+ # create a record from the data and return it
44
+ #record = MARC::Record.new_from_marc(raw)
45
+ record = MARC::Reader.decode(raw)
46
+ yield record
47
+ end
49
48
  end
50
49
 
51
50
 
52
- # Like Reader ForgivingReader lets you read in a batch of MARC21 records
53
- # but it does not use record lengths and field byte offets found in the
54
- # leader and directory. It is not unusual to run across MARC records
55
- # which have had their offsets calcualted wrong. In situations like this
56
- # the vanilla Reader may fail, and you can try to use ForgivingReader.
57
-
58
- # The one downside to this is that ForgivingReader will assume that the
59
- # order of the fields in the directory is the same as the order of fields
60
- # in the field data. Hopefully this will be the case, but it is not
61
- # 100% guranteed which is why the normal behavior of Reader is encouraged.
62
-
63
- class ForgivingReader
64
- include Enumerable
65
-
66
- def initialize(file)
67
- if file.class == String
68
- @handle = File.new(file)
69
- elsif file.class == File
70
- @handle = file
71
- else
72
- throw "must pass in path or File object"
73
- end
74
- end
51
+ # A static method for turning raw MARC data in transission
52
+ # format into a MARC::Record object.
53
+
54
+ def self.decode(marc, params={})
55
+ record = Record.new()
56
+ record.leader = marc[0..LEADER_LENGTH-1]
57
+
58
+ # where the field data starts
59
+ base_address = record.leader[12..16].to_i
75
60
 
61
+ # get the byte offsets from the record directory
62
+ directory = marc[LEADER_LENGTH..base_address-1]
76
63
 
77
- def each
78
- @handle.each_line(MARC::MARC21::END_OF_RECORD) do |raw|
79
- begin
80
- record = MARC::Record.new_from_marc(raw, :forgiving => true)
81
- yield record
82
- rescue StandardError => e
83
- # caught exception just keep barrelling along
84
- # TODO add logging
85
- end
86
- end
64
+ throw "invalid directory in record" if directory == nil
65
+
66
+ # the number of fields in the record corresponds to
67
+ # how many directory entries there are
68
+ num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
69
+
70
+ # when operating in forgiving mode we just split on end of
71
+ # field instead of using calculated byte offsets from the
72
+ # directory
73
+ all_fields = marc[base_address..-1].split(END_OF_FIELD)
74
+
75
+ 0.upto(num_fields-1) do |field_num|
76
+
77
+ # pull the directory entry for a field out
78
+ entry_start = field_num * DIRECTORY_ENTRY_LENGTH
79
+ entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
80
+ entry = directory[entry_start..entry_end]
81
+
82
+ # extract the tag
83
+ tag = entry[0..2]
84
+
85
+ # get the actual field data
86
+ # if we were told to be forgiving we just use the
87
+ # next available chuck of field data that we
88
+ # split apart based on the END_OF_FIELD
89
+ field_data = ''
90
+ if params[:forgiving]
91
+ field_data = all_fields.shift()
92
+
93
+ # otherwise we actually use the byte offsets in
94
+ # directory to figure out what field data to extract
95
+ else
96
+ length = entry[3..6].to_i
97
+ offset = entry[7..11].to_i
98
+ field_start = base_address + offset
99
+ field_end = field_start + length - 1
100
+ field_data = marc[field_start..field_end]
87
101
  end
88
102
 
103
+ # remove end of field
104
+ field_data.delete!(END_OF_FIELD)
105
+
106
+ # add a control field or variable field
107
+ if tag < '010'
108
+ record.append(MARC::Control.new(tag,field_data))
109
+ else
110
+ field = MARC::Field.new(tag)
111
+
112
+ # get all subfields
113
+ subfields = field_data.split(SUBFIELD_INDICATOR)
114
+
115
+ # must have at least 2 elements (indicators, and 1 subfield)
116
+ # TODO some sort of logging?
117
+ next if subfields.length() < 2
118
+
119
+ # get indicators
120
+ indicators = subfields.shift()
121
+ field.indicator1 = indicators[0,1]
122
+ field.indicator2 = indicators[1,1]
123
+
124
+ # add each subfield to the field
125
+ subfields.each() do |data|
126
+ subfield = MARC::Subfield.new(data[0,1],data[1..-1])
127
+ field.append(subfield)
128
+ end
129
+
130
+ # add the field to the record
131
+ record.append(field)
132
+ end
133
+ end
134
+
135
+ return record
136
+ end
137
+ end
138
+
139
+
140
+ # Like Reader ForgivingReader lets you read in a batch of MARC21 records
141
+ # but it does not use record lengths and field byte offets found in the
142
+ # leader and directory. It is not unusual to run across MARC records
143
+ # which have had their offsets calcualted wrong. In situations like this
144
+ # the vanilla Reader may fail, and you can try to use ForgivingReader.
145
+
146
+ # The one downside to this is that ForgivingReader will assume that the
147
+ # order of the fields in the directory is the same as the order of fields
148
+ # in the field data. Hopefully this will be the case, but it is not
149
+ # 100% guranteed which is why the normal behavior of Reader is encouraged.
150
+
151
+ class ForgivingReader
152
+ include Enumerable
153
+
154
+ def initialize(file)
155
+ if file.class == String
156
+ @handle = File.new(file)
157
+ elsif file.class == File
158
+ @handle = file
159
+ else
160
+ throw "must pass in path or File object"
161
+ end
89
162
  end
90
163
 
91
164
 
165
+ def each
166
+ @handle.each_line(END_OF_RECORD) do |raw|
167
+ begin
168
+ record = MARC::Reader.decode(raw, :forgiving => true)
169
+ yield record
170
+ rescue StandardError => e
171
+ # caught exception just keep barrelling along
172
+ # TODO add logging
173
+ end
174
+ end
175
+ end
176
+ end
92
177
  end