marc 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,123 +1,120 @@
1
1
  module MARC
2
2
 
3
- # A class that represents an individual MARC record. Every record
4
- # is made up of a collection of MARC::Field objects.
5
-
6
- class Record
7
- include Enumerable
8
-
9
- # the record fields
10
- attr_accessor :fields,
11
-
12
- # the record leader
13
- :leader
14
-
15
- def initialize
16
- @fields = []
17
- # leader is 24 bytes
18
- @leader = ' ' * 24
19
- # leader defaults:
20
- # http://www.loc.gov/marc/bibliographic/ecbdldrd.html
21
- @leader[10..11] = '22'
22
- @leader[20..23] = '4500'
23
- end
24
-
25
- # add a field to the record
26
- # record.append(MARC::Field.new( '100', '2', '0', ['a', 'Fred']))
27
-
28
- def append(field)
29
- @fields.push(field)
30
- end
31
-
32
- # each() is here to support iterating and searching since MARC::Record
33
- # mixes in Enumberable
34
- #
35
- # iterating through the fields in a record:
36
- # record.each { |f| print f }
37
- #
38
- # getting the 245
39
- # title = record.find {|f| f.tag == '245'}
40
- #
41
- # getting all subjects
42
- # subjects = record.find_all {|f| ('600'..'699' === f.tag)}
43
-
44
- def each
45
- for field in @fields
46
- yield field
47
- end
48
- end
49
-
50
- # You can lookup fields using this shorthand:
51
- # title = record['245']
52
-
53
- def [](tag)
54
- return self.find {|f| f.tag == tag}
55
- end
56
-
57
- # Factory method for creating a MARC::Record from MARC21 in
58
- # transmission format. Really this is just a wrapper around
59
- # MARC::MARC21::decode
60
- #
61
- # record = MARC::Record.new_from_marc(marc21)
62
- #
63
- # in cases where you might be working with somewhat flawed
64
- # MARC data you may want to use the :forgiving parameter which
65
- # will bypass using field byte offsets and simply look for the
66
- # end of field byte to figure out the end of fields.
67
- #
68
- # record = MARC::Record.new_from_marc(marc21, :forgiving => true)
69
-
70
- def self.new_from_marc(raw, params={})
71
- return MARC::MARC21.new().decode(raw, params)
72
- end
73
-
74
-
75
- # Handy method for returning a the MARC21 serialization for a
76
- # MARC::Record object. Really this is just a wrapper around
77
- # MARC::MARC21::encode
78
- #
79
- # marc = record.to_marc()
80
-
81
- def to_marc
82
- return MARC::MARC21.new().encode(self)
83
- end
84
-
85
-
86
- # Returns a string version of the record, suitable for printing
87
-
88
- def to_s
89
- str = "LEADER #{leader}\n"
90
- for field in fields:
91
- str += field.to_s() + "\n"
92
- end
93
- return str
94
- end
95
-
96
-
97
- # For testing if two records can be considered equal.
98
-
99
- def ==(other)
100
- if @leader != other.leader:
101
- return false
102
- elsif @fields.length != other.fields.length()
103
- return false
104
- else
105
- for i in [0..@fields.length()]:
106
- return false if @fields[i] != other.fields[i]
107
- end
108
- end
109
- return true
110
- end
111
-
112
-
113
- # Handy for using a record in a regex:
114
- # if record =~ /Gravity's Rainbow/ then print "Slothrop" end
115
-
116
- def =~(regex)
117
- return self.to_s =~ regex
118
- end
3
+ # A class that represents an individual MARC record. Every record
4
+ # is made up of a collection of MARC::Field objects.
5
+
6
+ class Record
7
+ include Enumerable
8
+
9
+ # the record fields
10
+ attr_accessor :fields,
11
+
12
+ # the record leader
13
+ :leader
14
+
15
+ def initialize
16
+ @fields = []
17
+ # leader is 24 bytes
18
+ @leader = ' ' * 24
19
+ # leader defaults:
20
+ # http://www.loc.gov/marc/bibliographic/ecbdldrd.html
21
+ @leader[10..11] = '22'
22
+ @leader[20..23] = '4500'
23
+ end
24
+
25
+ # add a field to the record
26
+ # record.append(MARC::Field.new( '100', '2', '0', ['a', 'Fred']))
119
27
 
28
+ def append(field)
29
+ @fields.push(field)
120
30
  end
121
31
 
122
- end
32
+ # each() is here to support iterating and searching since MARC::Record
33
+ # mixes in Enumberable
34
+ #
35
+ # iterating through the fields in a record:
36
+ # record.each { |f| print f }
37
+ #
38
+ # getting the 245
39
+ # title = record.find {|f| f.tag == '245'}
40
+ #
41
+ # getting all subjects
42
+ # subjects = record.find_all {|f| ('600'..'699' === f.tag)}
43
+
44
+ def each
45
+ for field in @fields
46
+ yield field
47
+ end
48
+ end
49
+
50
+ # You can lookup fields using this shorthand:
51
+ # title = record['245']
52
+
53
+ def [](tag)
54
+ return self.find {|f| f.tag == tag}
55
+ end
56
+
57
+ # Factory method for creating a MARC::Record from MARC21 in
58
+ # transmission format.
59
+ #
60
+ # record = MARC::Record.new_from_marc(marc21)
61
+ #
62
+ # in cases where you might be working with somewhat flawed
63
+ # MARC data you may want to use the :forgiving parameter which
64
+ # will bypass using field byte offsets and simply look for the
65
+ # end of field byte to figure out the end of fields.
66
+ #
67
+ # record = MARC::Record.new_from_marc(marc21, :forgiving => true)
68
+
69
+ def self.new_from_marc(raw, params={})
70
+ return MARC::Reader.decode(raw, params)
71
+ end
72
+
73
+
74
+ # Handy method for returning a the MARC21 serialization for a
75
+ # MARC::Record object. Really this is just a wrapper around
76
+ # MARC::MARC21::encode
77
+ #
78
+ # marc = record.to_marc()
79
+
80
+ def to_marc
81
+ return MARC::Writer.encode(self)
82
+ end
83
+
84
+ # Handy method for returning the MARCXML serialization for a
85
+ # MARC::Record object. Really this is just a wrapper around
86
+ # MARC::MARCXML::encode
87
+ #
88
+ # xml = record.to_xml()
123
89
 
90
+ def to_xml
91
+ return MARC::MARCXML.new().encode(self)
92
+ end
93
+
94
+ # Returns a string version of the record, suitable for printing
95
+
96
+ def to_s
97
+ str = "LEADER #{leader}\n"
98
+ for field in fields:
99
+ str += field.to_s() + "\n"
100
+ end
101
+ return str
102
+ end
103
+
104
+
105
+ # For testing if two records can be considered equal.
106
+
107
+ def ==(other)
108
+ return self.to_s == other.to_s
109
+ end
110
+
111
+
112
+ # Handy for using a record in a regex:
113
+ # if record =~ /Gravity's Rainbow/ then print "Slothrop" end
114
+
115
+ def =~(regex)
116
+ return self.to_s =~ regex
117
+ end
118
+
119
+ end
120
+ end
@@ -1,27 +1,26 @@
1
1
  module MARC
2
2
 
3
- class Subfield
4
- attr_accessor :code, :value
3
+ class Subfield
4
+ attr_accessor :code, :value
5
5
 
6
- def initialize(code='' ,value='')
7
- # can't allow code of value to be nil
8
- # or else it'll screw us up later on
9
- @code = code == nil ? '' : code
10
- @value = value == nil ? '' : value
11
- end
12
-
13
- def ==(other)
14
- if @code != other.code
15
- return false
16
- elsif @value != other.value
17
- return false
18
- end
19
- return true
20
- end
6
+ def initialize(code='' ,value='')
7
+ # can't allow code of value to be nil
8
+ # or else it'll screw us up later on
9
+ @code = code == nil ? '' : code
10
+ @value = value == nil ? '' : value
11
+ end
21
12
 
22
- def to_s
23
- return "$#{code}#{value}"
24
- end
13
+ def ==(other)
14
+ if @code != other.code
15
+ return false
16
+ elsif @value != other.value
17
+ return false
18
+ end
19
+ return true
25
20
  end
26
21
 
22
+ def to_s
23
+ return "$#{code}#{value}"
24
+ end
25
+ end
27
26
  end
@@ -1,36 +1,87 @@
1
1
  module MARC
2
2
 
3
- # A class for writing MARC records as MARC21.
3
+ # A class for writing MARC records as MARC21.
4
4
 
5
- class Writer
5
+ class Writer
6
6
 
7
- # the constructor which you must pass a file path
8
- # or an object that responds to a write message
7
+ # the constructor which you must pass a file path
8
+ # or an object that responds to a write message
9
9
 
10
- def initialize(file)
11
- if file.class == String
12
- @fh = File.new(file,"w")
13
- elsif file.respond_to?(file)
14
- @fh = file
15
- else
16
- throw "must pass in file name or handle"
17
- end
18
- end
10
+ def initialize(file)
11
+ if file.class == String
12
+ @fh = File.new(file,"w")
13
+ elsif file.respond_to?('write')
14
+ @fh = file
15
+ else
16
+ throw "must pass in file name or handle"
17
+ end
18
+ end
19
19
 
20
20
 
21
- # write a record to the file or handle
21
+ # write a record to the file or handle
22
+
23
+ def write(record)
24
+ @fh.write(MARC::Writer.encode(record))
25
+ end
26
+
27
+
28
+ # close underlying filehandle
29
+
30
+ def close
31
+ @fh.close
32
+ end
22
33
 
23
- def write(record)
24
- @fh.write(record.to_marc)
25
- end
26
34
 
35
+ # a static method that accepts a MARC::Record object
36
+ # and returns the record encoded as MARC21 in transmission format
27
37
 
28
- # close underlying filehandle
38
+ def self.encode(record)
39
+ directory = ''
40
+ fields = ''
41
+ offset = 0
42
+ for field in record.fields
29
43
 
30
- def close
31
- @fh.close
44
+ # encode the field
45
+ field_data = ''
46
+ if field.class == MARC::Field
47
+ field_data = field.indicator1 + field.indicator2
48
+ for s in field.subfields
49
+ field_data += SUBFIELD_INDICATOR + s.code + s.value
50
+ end
51
+ elsif field.class == MARC::Control
52
+ field_data = field.value
32
53
  end
54
+ field_data += END_OF_FIELD
33
55
 
34
- end
56
+ # calculate directory entry for the field
57
+ field_length = field_data.length()
58
+ directory += sprintf("%03s%04i%05i", field.tag, field_length,
59
+ offset)
60
+
61
+ # add field to data for other fields
62
+ fields += field_data
63
+
64
+ # update offset for next field
65
+ offset += field_length
66
+ end
67
+
68
+ # determine the base (leader + directory)
69
+ base = record.leader + directory + END_OF_FIELD
35
70
 
71
+ # determine complete record
72
+ marc = base + fields + END_OF_RECORD
73
+
74
+ # update leader with the byte offest to the end of the directory
75
+ marc[12..16] = sprintf("%05i", base.length())
76
+
77
+ # update the record length
78
+ marc[0..4] = sprintf("%05i", marc.length())
79
+
80
+ # store updated leader in the record that was passed in
81
+ record.leader = marc[0..LEADER_LENGTH-1]
82
+
83
+ # return encoded marc
84
+ return marc
85
+ end
86
+ end
36
87
  end
@@ -0,0 +1,83 @@
1
+ require 'rexml/document'
2
+ require 'rexml/parsers/pullparser'
3
+
4
+ module MARC
5
+
6
+ class XMLReader
7
+ include Enumerable
8
+
9
+ def initialize(filename)
10
+ source = File.new(filename)
11
+ @parser = REXML::Parsers::PullParser.new(source)
12
+ end
13
+
14
+ def each
15
+ while @parser.has_next?
16
+ event = @parser.pull
17
+ # if it's the start of a record element
18
+ if event.start_element? and strip_ns(event[0]) == 'record'
19
+ yield build_record
20
+ end
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def strip_ns(str)
27
+ return str.sub!(/^.*:/, '')
28
+ end
29
+
30
+ # will accept parse events until a record has been built up
31
+ #
32
+ def build_record
33
+ record = MARC::Record.new
34
+ data_field = nil
35
+ control_field = nil
36
+ subfield = nil
37
+ text = ''
38
+ attrs = nil
39
+
40
+ while @parser.has_next?
41
+ event = @parser.pull
42
+
43
+ if event.text?
44
+ text += event[0].strip
45
+ next
46
+ end
47
+
48
+ if event.start_element?
49
+ attrs = event[1]
50
+ case strip_ns(event[0])
51
+ when 'controlfield'
52
+ text = ''
53
+ control_field = MARC::Control.new(attrs['tag'])
54
+ when 'datafield'
55
+ text = ''
56
+ data_field = MARC::Field.new(attrs['tag'], attrs['ind1'],
57
+ attrs['ind2'])
58
+ when 'subfield'
59
+ text = ''
60
+ subfield = MARC::Subfield.new(attrs['code'])
61
+ end
62
+ end
63
+
64
+ if event.end_element?
65
+ case strip_ns(event[0])
66
+ when 'leader'
67
+ record.leader = text
68
+ when 'record'
69
+ return record
70
+ when 'controlfield'
71
+ control_field.value = text
72
+ record.append(control_field)
73
+ when 'datafield'
74
+ record.append(data_field)
75
+ when 'subfield'
76
+ subfield.value = text
77
+ data_field.append(subfield)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end