marc 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,4 @@
1
+ require 'marc/constants'
1
2
  require 'marc/record'
2
3
  require 'marc/field'
3
4
  require 'marc/control'
@@ -5,4 +6,5 @@ require 'marc/subfield'
5
6
  require 'marc/reader'
6
7
  require 'marc/writer'
7
8
  require 'marc/exception'
8
- require 'marc/marc21'
9
+ require 'marc/xmlwriter'
10
+ require 'marc/xmlreader'
@@ -0,0 +1,14 @@
1
+ module MARC
2
+
3
+ # constants used in MARC21 reading/writing
4
+ LEADER_LENGTH = 24
5
+ DIRECTORY_ENTRY_LENGTH = 12
6
+ SUBFIELD_INDICATOR = 0x1F.chr
7
+ END_OF_FIELD = 0x1E.chr
8
+ END_OF_RECORD = 0x1D.chr
9
+
10
+ # constants used in XML reading/writing
11
+ MARC_NS = "http://www.loc.gov/MARC21/slim"
12
+ MARC_XSD = "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
13
+
14
+ end
@@ -1,36 +1,36 @@
1
1
  module MARC
2
2
 
3
- # A class for representing fields with a tag less than 010.
4
- # Ordinary MARC::Field objects are for fields with tags >= 010
5
- # which have indicators and subfields.
3
+ # A class for representing fields with a tag less than 010.
4
+ # Ordinary MARC::Field objects are for fields with tags >= 010
5
+ # which have indicators and subfields.
6
6
 
7
- class Control
7
+ class Control
8
8
 
9
- # the tag value (007, 008, etc)
10
- attr_accessor :tag
9
+ # the tag value (007, 008, etc)
10
+ attr_accessor :tag
11
11
 
12
- # the value of the control field
13
- attr_accessor :value
12
+ # the value of the control field
13
+ attr_accessor :value
14
14
 
15
- # The constructor which must be passed a tag value and
16
- # an optional value for the field.
15
+ # The constructor which must be passed a tag value and
16
+ # an optional value for the field.
17
17
 
18
- def initialize(tag,value='')
19
- @tag = tag
20
- @value = value
21
- if tag.to_i > 9
22
- raise MARC::Exception.new(), "tag must be greater than 009"
23
- end
24
- end
25
-
26
- def to_s
27
- return "#{tag} #{value}"
28
- end
18
+ def initialize(tag,value='')
19
+ @tag = tag
20
+ @value = value
21
+ if tag.to_i > 9
22
+ raise MARC::Exception.new(), "tag must be greater than 009"
23
+ end
24
+ end
29
25
 
30
- def =~(regex)
31
- return self.to_s =~ regex
32
- end
26
+ def to_s
27
+ return "#{tag} #{value}"
28
+ end
33
29
 
30
+ def =~(regex)
31
+ return self.to_s =~ regex
34
32
  end
35
33
 
34
+ end
35
+
36
36
  end
@@ -1,9 +1,9 @@
1
1
  module MARC
2
2
 
3
- # basic exception class for exceptions that
4
- # can occur during MARC processing.
3
+ # basic exception class for exceptions that
4
+ # can occur during MARC processing.
5
5
 
6
- class Exception < RuntimeError
7
- end
6
+ class Exception < RuntimeError
7
+ end
8
8
 
9
9
  end
@@ -3,141 +3,141 @@ require 'marc/record'
3
3
 
4
4
  module MARC
5
5
 
6
- # MARC records are made up of fields, each of which has a tag,
7
- # indicators and subfields. If the tag is between 000-009 it is
8
- # known as a control field, and actually does not have any
9
- # indicators.
10
-
11
- class Field
12
- include Enumerable
13
-
14
- # The tag for the field
15
- attr_accessor :tag
16
-
17
- # The first indicator
18
- attr_accessor :indicator1
19
-
20
- # The second indicator
21
- attr_accessor :indicator2
22
-
23
- # A list of MARC::Subfield objects
24
- attr_accessor :subfields
25
-
26
-
27
- # Create a new field with tag, indicators and subfields.
28
- # Subfields are passed in as comma separated list of
29
- # MARC::Subfield objects,
30
- #
31
- # field = MARC::Field.new('245','0','0',
32
- # MARC::Subfield.new('a', 'Consilience :'),
33
- # MARC::Subfield.new('b', 'the unity of knowledge ',
34
- # MARC::Subfield.new('c', 'by Edward O. Wilson.'))
35
- #
36
- # or using a shorthand:
37
- #
38
- # field = MARC::Field.new('245','0','0',
39
- # ['a', 'Consilience :'],['b','the unity of knowledge ',
40
- # ['c', 'by Edward O. Wilson.'] )
41
-
42
- def initialize(tag, i1=' ', i2=' ', *subfields)
43
- @tag = tag
44
- # can't allow nil to be passed in or else it'll
45
- # screw us up later when we try to encode
46
- @indicator1 = i1 == nil ? ' ' : i1
47
- @indicator2 = i2 == nil ? ' ' : i2
48
- @subfields = []
49
-
50
- # must use MARC::ControlField for tags < 010
51
- if @tag.to_i < 10
52
- raise MARC::Exception.new(),
53
- "MARC::Field objects can't have tags < 010"
54
- end
55
-
56
- # allows MARC::Subfield objects to be passed directly
57
- # or a shorthand of ['a','Foo'], ['b','Bar']
58
- subfields.each do |subfield|
59
- case subfield
60
- when MARC::Subfield
61
- @subfields.push(subfield)
62
- when Array
63
- if subfield.length > 2
64
- raise MARC::Exception.new(),
65
- "arrays must only have 2 elements"
66
- end
67
- @subfields.push(
68
- MARC::Subfield.new(subfield[0],subfield[1]))
69
- else
70
- raise MARC::Exception.new(),
71
- "invalid subfield type #{subfield.class}"
72
- end
73
- end
6
+ # MARC records are made up of fields, each of which has a tag,
7
+ # indicators and subfields. If the tag is between 000-009 it is
8
+ # known as a control field, and actually does not have any
9
+ # indicators.
10
+
11
+ class Field
12
+ include Enumerable
13
+
14
+ # The tag for the field
15
+ attr_accessor :tag
16
+
17
+ # The first indicator
18
+ attr_accessor :indicator1
19
+
20
+ # The second indicator
21
+ attr_accessor :indicator2
22
+
23
+ # A list of MARC::Subfield objects
24
+ attr_accessor :subfields
25
+
26
+
27
+ # Create a new field with tag, indicators and subfields.
28
+ # Subfields are passed in as comma separated list of
29
+ # MARC::Subfield objects,
30
+ #
31
+ # field = MARC::Field.new('245','0','0',
32
+ # MARC::Subfield.new('a', 'Consilience :'),
33
+ # MARC::Subfield.new('b', 'the unity of knowledge ',
34
+ # MARC::Subfield.new('c', 'by Edward O. Wilson.'))
35
+ #
36
+ # or using a shorthand:
37
+ #
38
+ # field = MARC::Field.new('245','0','0',
39
+ # ['a', 'Consilience :'],['b','the unity of knowledge ',
40
+ # ['c', 'by Edward O. Wilson.'] )
41
+
42
+ def initialize(tag, i1=' ', i2=' ', *subfields)
43
+ @tag = tag
44
+ # can't allow nil to be passed in or else it'll
45
+ # screw us up later when we try to encode
46
+ @indicator1 = i1 == nil ? ' ' : i1
47
+ @indicator2 = i2 == nil ? ' ' : i2
48
+ @subfields = []
49
+
50
+ # must use MARC::ControlField for tags < 010
51
+ if @tag.to_i < 10
52
+ raise MARC::Exception.new(),
53
+ "MARC::Field objects can't have tags < 010"
54
+ end
55
+
56
+ # allows MARC::Subfield objects to be passed directly
57
+ # or a shorthand of ['a','Foo'], ['b','Bar']
58
+ subfields.each do |subfield|
59
+ case subfield
60
+ when MARC::Subfield
61
+ @subfields.push(subfield)
62
+ when Array
63
+ if subfield.length > 2
64
+ raise MARC::Exception.new(),
65
+ "arrays must only have 2 elements"
66
+ end
67
+ @subfields.push(
68
+ MARC::Subfield.new(subfield[0],subfield[1]))
69
+ else
70
+ raise MARC::Exception.new(),
71
+ "invalid subfield type #{subfield.class}"
74
72
  end
73
+ end
74
+ end
75
75
 
76
76
 
77
- # Returns a string representation of the field such as:
78
- # 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
77
+ # Returns a string representation of the field such as:
78
+ # 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
79
79
 
80
- def to_s
81
- str = "#{tag} "
82
- str += "#{indicator1}#{indicator2} "
83
- @subfields.each { |subfield| str += subfield.to_s }
84
- return str
85
- end
80
+ def to_s
81
+ str = "#{tag} "
82
+ str += "#{indicator1}#{indicator2} "
83
+ @subfields.each { |subfield| str += subfield.to_s }
84
+ return str
85
+ end
86
86
 
87
87
 
88
- # Add a subfield (MARC::Subfield) to the field
89
- # field.append(MARC::Subfield('a','Dave Thomas'))
90
-
91
- def append(subfield)
92
- @subfields.push(subfield)
93
- end
88
+ # Add a subfield (MARC::Subfield) to the field
89
+ # field.append(MARC::Subfield('a','Dave Thomas'))
94
90
 
91
+ def append(subfield)
92
+ @subfields.push(subfield)
93
+ end
95
94
 
96
- # You can iterate through the subfields in a Field:
97
- # field.each {|s| print s}
98
95
 
99
- def each
100
- for subfield in subfields
101
- yield subfield
102
- end
103
- end
96
+ # You can iterate through the subfields in a Field:
97
+ # field.each {|s| print s}
104
98
 
99
+ def each
100
+ for subfield in subfields
101
+ yield subfield
102
+ end
103
+ end
105
104
 
106
- # You can lookup subfields with this shorthand. Note it
107
- # will return a string and not a MARC::Subfield object.
108
- # subfield = field['a']
109
-
110
- def [](code)
111
- subfield = self.find {|s| s.code == code}
112
- return subfield.value if subfield
113
- return
114
- end
115
105
 
106
+ # You can lookup subfields with this shorthand. Note it
107
+ # will return a string and not a MARC::Subfield object.
108
+ # subfield = field['a']
109
+
110
+ def [](code)
111
+ subfield = self.find {|s| s.code == code}
112
+ return subfield.value if subfield
113
+ return
114
+ end
116
115
 
117
- # Two fields are equal if their tag, indicators and
118
- # subfields are all equal.
119
-
120
- def ==(other)
121
- if @tag != other.tag
122
- return false
123
- elsif @indicator1 != other.indicator1
124
- return false
125
- elsif @indicator2 != other.indicator2
126
- return false
127
- elsif @subfields != other.subfields
128
- return false
129
- end
130
- return true
131
- end
132
116
 
117
+ # Two fields are equal if their tag, indicators and
118
+ # subfields are all equal.
119
+
120
+ def ==(other)
121
+ if @tag != other.tag
122
+ return false
123
+ elsif @indicator1 != other.indicator1
124
+ return false
125
+ elsif @indicator2 != other.indicator2
126
+ return false
127
+ elsif @subfields != other.subfields
128
+ return false
129
+ end
130
+ return true
131
+ end
133
132
 
134
- # To support regex matching with fields
135
- #
136
- # if field =~ /Huckleberry/ ...
137
133
 
138
- def =~(regex)
139
- return self.to_s =~ regex
140
- end
134
+ # To support regex matching with fields
135
+ #
136
+ # if field =~ /Huckleberry/ ...
141
137
 
138
+ def =~(regex)
139
+ return self.to_s =~ regex
142
140
  end
141
+
142
+ end
143
143
  end
@@ -1,92 +1,177 @@
1
1
  module MARC
2
2
 
3
- class Reader
4
- include Enumerable
5
-
6
- # The constructor which you may pass either a path
7
- #
8
- # reader = MARC::Reader.new('marc.dat')
9
- #
10
- # or, if it's more convenient a File object:
11
- #
12
- # fh = File.new('marc.dat')
13
- # reader = MARC::Reader.new(fh)
14
- #
15
- # or really any object that responds to read(n).
16
-
17
- def initialize(file)
18
- if file.class == String:
19
- @handle = File.new(file)
20
- elsif file.respond_to?("read", 5)
21
- @handle = file
22
- else
23
- throw "must pass in path or file"
24
- end
25
- end
26
-
27
- # to support iteration:
28
- # for record in reader
29
- # print record
30
- # end
31
- #
32
- # and even searching:
33
- # record.find { |f| f['245'] =~ /Huckleberry/ }
34
-
35
- def each
36
- # while there is data left in the file
37
- while length = @handle.read(5)
38
-
39
- # get the raw MARC21 for a record back from the file
40
- # using the record length
41
- raw = length + @handle.read(length.to_i-5)
42
-
43
- # create a record from the data and return it
44
- record = MARC::Record.new_from_marc(raw)
45
- yield record
46
- end
47
- end
3
+ class Reader
4
+ include Enumerable
5
+
6
+ # The constructor which you may pass either a path
7
+ #
8
+ # reader = MARC::Reader.new('marc.dat')
9
+ #
10
+ # or, if it's more convenient a File object:
11
+ #
12
+ # fh = File.new('marc.dat')
13
+ # reader = MARC::Reader.new(fh)
14
+ #
15
+ # or really any object that responds to read(n).
16
+
17
+ def initialize(file)
18
+ if file.class == String:
19
+ @handle = File.new(file)
20
+ elsif file.respond_to?("read", 5)
21
+ @handle = file
22
+ else
23
+ throw "must pass in path or file"
24
+ end
25
+ end
48
26
 
27
+ # to support iteration:
28
+ # for record in reader
29
+ # print record
30
+ # end
31
+ #
32
+ # and even searching:
33
+ # record.find { |f| f['245'] =~ /Huckleberry/ }
34
+
35
+ def each
36
+ # while there is data left in the file
37
+ while length = @handle.read(5)
38
+
39
+ # get the raw MARC21 for a record back from the file
40
+ # using the record length
41
+ raw = length + @handle.read(length.to_i-5)
42
+
43
+ # create a record from the data and return it
44
+ #record = MARC::Record.new_from_marc(raw)
45
+ record = MARC::Reader.decode(raw)
46
+ yield record
47
+ end
49
48
  end
50
49
 
51
50
 
52
- # Like Reader ForgivingReader lets you read in a batch of MARC21 records
53
- # but it does not use record lengths and field byte offets found in the
54
- # leader and directory. It is not unusual to run across MARC records
55
- # which have had their offsets calcualted wrong. In situations like this
56
- # the vanilla Reader may fail, and you can try to use ForgivingReader.
57
-
58
- # The one downside to this is that ForgivingReader will assume that the
59
- # order of the fields in the directory is the same as the order of fields
60
- # in the field data. Hopefully this will be the case, but it is not
61
- # 100% guranteed which is why the normal behavior of Reader is encouraged.
62
-
63
- class ForgivingReader
64
- include Enumerable
65
-
66
- def initialize(file)
67
- if file.class == String
68
- @handle = File.new(file)
69
- elsif file.class == File
70
- @handle = file
71
- else
72
- throw "must pass in path or File object"
73
- end
74
- end
51
+ # A static method for turning raw MARC data in transission
52
+ # format into a MARC::Record object.
53
+
54
+ def self.decode(marc, params={})
55
+ record = Record.new()
56
+ record.leader = marc[0..LEADER_LENGTH-1]
57
+
58
+ # where the field data starts
59
+ base_address = record.leader[12..16].to_i
75
60
 
61
+ # get the byte offsets from the record directory
62
+ directory = marc[LEADER_LENGTH..base_address-1]
76
63
 
77
- def each
78
- @handle.each_line(MARC::MARC21::END_OF_RECORD) do |raw|
79
- begin
80
- record = MARC::Record.new_from_marc(raw, :forgiving => true)
81
- yield record
82
- rescue StandardError => e
83
- # caught exception just keep barrelling along
84
- # TODO add logging
85
- end
86
- end
64
+ throw "invalid directory in record" if directory == nil
65
+
66
+ # the number of fields in the record corresponds to
67
+ # how many directory entries there are
68
+ num_fields = directory.length / DIRECTORY_ENTRY_LENGTH
69
+
70
+ # when operating in forgiving mode we just split on end of
71
+ # field instead of using calculated byte offsets from the
72
+ # directory
73
+ all_fields = marc[base_address..-1].split(END_OF_FIELD)
74
+
75
+ 0.upto(num_fields-1) do |field_num|
76
+
77
+ # pull the directory entry for a field out
78
+ entry_start = field_num * DIRECTORY_ENTRY_LENGTH
79
+ entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
80
+ entry = directory[entry_start..entry_end]
81
+
82
+ # extract the tag
83
+ tag = entry[0..2]
84
+
85
+ # get the actual field data
86
+ # if we were told to be forgiving we just use the
87
+ # next available chuck of field data that we
88
+ # split apart based on the END_OF_FIELD
89
+ field_data = ''
90
+ if params[:forgiving]
91
+ field_data = all_fields.shift()
92
+
93
+ # otherwise we actually use the byte offsets in
94
+ # directory to figure out what field data to extract
95
+ else
96
+ length = entry[3..6].to_i
97
+ offset = entry[7..11].to_i
98
+ field_start = base_address + offset
99
+ field_end = field_start + length - 1
100
+ field_data = marc[field_start..field_end]
87
101
  end
88
102
 
103
+ # remove end of field
104
+ field_data.delete!(END_OF_FIELD)
105
+
106
+ # add a control field or variable field
107
+ if tag < '010'
108
+ record.append(MARC::Control.new(tag,field_data))
109
+ else
110
+ field = MARC::Field.new(tag)
111
+
112
+ # get all subfields
113
+ subfields = field_data.split(SUBFIELD_INDICATOR)
114
+
115
+ # must have at least 2 elements (indicators, and 1 subfield)
116
+ # TODO some sort of logging?
117
+ next if subfields.length() < 2
118
+
119
+ # get indicators
120
+ indicators = subfields.shift()
121
+ field.indicator1 = indicators[0,1]
122
+ field.indicator2 = indicators[1,1]
123
+
124
+ # add each subfield to the field
125
+ subfields.each() do |data|
126
+ subfield = MARC::Subfield.new(data[0,1],data[1..-1])
127
+ field.append(subfield)
128
+ end
129
+
130
+ # add the field to the record
131
+ record.append(field)
132
+ end
133
+ end
134
+
135
+ return record
136
+ end
137
+ end
138
+
139
+
140
+ # Like Reader ForgivingReader lets you read in a batch of MARC21 records
141
+ # but it does not use record lengths and field byte offets found in the
142
+ # leader and directory. It is not unusual to run across MARC records
143
+ # which have had their offsets calcualted wrong. In situations like this
144
+ # the vanilla Reader may fail, and you can try to use ForgivingReader.
145
+
146
+ # The one downside to this is that ForgivingReader will assume that the
147
+ # order of the fields in the directory is the same as the order of fields
148
+ # in the field data. Hopefully this will be the case, but it is not
149
+ # 100% guranteed which is why the normal behavior of Reader is encouraged.
150
+
151
+ class ForgivingReader
152
+ include Enumerable
153
+
154
+ def initialize(file)
155
+ if file.class == String
156
+ @handle = File.new(file)
157
+ elsif file.class == File
158
+ @handle = file
159
+ else
160
+ throw "must pass in path or File object"
161
+ end
89
162
  end
90
163
 
91
164
 
165
+ def each
166
+ @handle.each_line(END_OF_RECORD) do |raw|
167
+ begin
168
+ record = MARC::Reader.decode(raw, :forgiving => true)
169
+ yield record
170
+ rescue StandardError => e
171
+ # caught exception just keep barrelling along
172
+ # TODO add logging
173
+ end
174
+ end
175
+ end
176
+ end
92
177
  end