marc 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,123 +1,120 @@
1
1
  module MARC
2
2
 
3
- # A class that represents an individual MARC record. Every record
4
- # is made up of a collection of MARC::Field objects.
5
-
6
- class Record
7
- include Enumerable
8
-
9
- # the record fields
10
- attr_accessor :fields,
11
-
12
- # the record leader
13
- :leader
14
-
15
- def initialize
16
- @fields = []
17
- # leader is 24 bytes
18
- @leader = ' ' * 24
19
- # leader defaults:
20
- # http://www.loc.gov/marc/bibliographic/ecbdldrd.html
21
- @leader[10..11] = '22'
22
- @leader[20..23] = '4500'
23
- end
24
-
25
- # add a field to the record
26
- # record.append(MARC::Field.new( '100', '2', '0', ['a', 'Fred']))
27
-
28
- def append(field)
29
- @fields.push(field)
30
- end
31
-
32
- # each() is here to support iterating and searching since MARC::Record
33
- # mixes in Enumberable
34
- #
35
- # iterating through the fields in a record:
36
- # record.each { |f| print f }
37
- #
38
- # getting the 245
39
- # title = record.find {|f| f.tag == '245'}
40
- #
41
- # getting all subjects
42
- # subjects = record.find_all {|f| ('600'..'699' === f.tag)}
43
-
44
- def each
45
- for field in @fields
46
- yield field
47
- end
48
- end
49
-
50
- # You can lookup fields using this shorthand:
51
- # title = record['245']
52
-
53
- def [](tag)
54
- return self.find {|f| f.tag == tag}
55
- end
56
-
57
- # Factory method for creating a MARC::Record from MARC21 in
58
- # transmission format. Really this is just a wrapper around
59
- # MARC::MARC21::decode
60
- #
61
- # record = MARC::Record.new_from_marc(marc21)
62
- #
63
- # in cases where you might be working with somewhat flawed
64
- # MARC data you may want to use the :forgiving parameter which
65
- # will bypass using field byte offsets and simply look for the
66
- # end of field byte to figure out the end of fields.
67
- #
68
- # record = MARC::Record.new_from_marc(marc21, :forgiving => true)
69
-
70
- def self.new_from_marc(raw, params={})
71
- return MARC::MARC21.new().decode(raw, params)
72
- end
73
-
74
-
75
- # Handy method for returning a the MARC21 serialization for a
76
- # MARC::Record object. Really this is just a wrapper around
77
- # MARC::MARC21::encode
78
- #
79
- # marc = record.to_marc()
80
-
81
- def to_marc
82
- return MARC::MARC21.new().encode(self)
83
- end
84
-
85
-
86
- # Returns a string version of the record, suitable for printing
87
-
88
- def to_s
89
- str = "LEADER #{leader}\n"
90
- for field in fields:
91
- str += field.to_s() + "\n"
92
- end
93
- return str
94
- end
95
-
96
-
97
- # For testing if two records can be considered equal.
98
-
99
- def ==(other)
100
- if @leader != other.leader:
101
- return false
102
- elsif @fields.length != other.fields.length()
103
- return false
104
- else
105
- for i in [0..@fields.length()]:
106
- return false if @fields[i] != other.fields[i]
107
- end
108
- end
109
- return true
110
- end
111
-
112
-
113
- # Handy for using a record in a regex:
114
- # if record =~ /Gravity's Rainbow/ then print "Slothrop" end
115
-
116
- def =~(regex)
117
- return self.to_s =~ regex
118
- end
3
+ # A class that represents an individual MARC record. Every record
4
+ # is made up of a collection of MARC::Field objects.
5
+
6
+ class Record
7
+ include Enumerable
8
+
9
+ # the record fields
10
+ attr_accessor :fields,
11
+
12
+ # the record leader
13
+ :leader
14
+
15
+ def initialize
16
+ @fields = []
17
+ # leader is 24 bytes
18
+ @leader = ' ' * 24
19
+ # leader defaults:
20
+ # http://www.loc.gov/marc/bibliographic/ecbdldrd.html
21
+ @leader[10..11] = '22'
22
+ @leader[20..23] = '4500'
23
+ end
24
+
25
+ # add a field to the record
26
+ # record.append(MARC::Field.new( '100', '2', '0', ['a', 'Fred']))
119
27
 
28
+ def append(field)
29
+ @fields.push(field)
120
30
  end
121
31
 
122
- end
32
+ # each() is here to support iterating and searching since MARC::Record
33
+ # mixes in Enumberable
34
+ #
35
+ # iterating through the fields in a record:
36
+ # record.each { |f| print f }
37
+ #
38
+ # getting the 245
39
+ # title = record.find {|f| f.tag == '245'}
40
+ #
41
+ # getting all subjects
42
+ # subjects = record.find_all {|f| ('600'..'699' === f.tag)}
43
+
44
+ def each
45
+ for field in @fields
46
+ yield field
47
+ end
48
+ end
49
+
50
+ # You can lookup fields using this shorthand:
51
+ # title = record['245']
52
+
53
+ def [](tag)
54
+ return self.find {|f| f.tag == tag}
55
+ end
56
+
57
+ # Factory method for creating a MARC::Record from MARC21 in
58
+ # transmission format.
59
+ #
60
+ # record = MARC::Record.new_from_marc(marc21)
61
+ #
62
+ # in cases where you might be working with somewhat flawed
63
+ # MARC data you may want to use the :forgiving parameter which
64
+ # will bypass using field byte offsets and simply look for the
65
+ # end of field byte to figure out the end of fields.
66
+ #
67
+ # record = MARC::Record.new_from_marc(marc21, :forgiving => true)
68
+
69
+ def self.new_from_marc(raw, params={})
70
+ return MARC::Reader.decode(raw, params)
71
+ end
72
+
73
+
74
+ # Handy method for returning a the MARC21 serialization for a
75
+ # MARC::Record object. Really this is just a wrapper around
76
+ # MARC::MARC21::encode
77
+ #
78
+ # marc = record.to_marc()
79
+
80
+ def to_marc
81
+ return MARC::Writer.encode(self)
82
+ end
83
+
84
+ # Handy method for returning the MARCXML serialization for a
85
+ # MARC::Record object. Really this is just a wrapper around
86
+ # MARC::MARCXML::encode
87
+ #
88
+ # xml = record.to_xml()
123
89
 
90
+ def to_xml
91
+ return MARC::MARCXML.new().encode(self)
92
+ end
93
+
94
+ # Returns a string version of the record, suitable for printing
95
+
96
+ def to_s
97
+ str = "LEADER #{leader}\n"
98
+ for field in fields:
99
+ str += field.to_s() + "\n"
100
+ end
101
+ return str
102
+ end
103
+
104
+
105
+ # For testing if two records can be considered equal.
106
+
107
+ def ==(other)
108
+ return self.to_s == other.to_s
109
+ end
110
+
111
+
112
+ # Handy for using a record in a regex:
113
+ # if record =~ /Gravity's Rainbow/ then print "Slothrop" end
114
+
115
+ def =~(regex)
116
+ return self.to_s =~ regex
117
+ end
118
+
119
+ end
120
+ end
@@ -1,27 +1,26 @@
1
1
  module MARC
2
2
 
3
- class Subfield
4
- attr_accessor :code, :value
3
+ class Subfield
4
+ attr_accessor :code, :value
5
5
 
6
- def initialize(code='' ,value='')
7
- # can't allow code of value to be nil
8
- # or else it'll screw us up later on
9
- @code = code == nil ? '' : code
10
- @value = value == nil ? '' : value
11
- end
12
-
13
- def ==(other)
14
- if @code != other.code
15
- return false
16
- elsif @value != other.value
17
- return false
18
- end
19
- return true
20
- end
6
+ def initialize(code='' ,value='')
7
+ # can't allow code of value to be nil
8
+ # or else it'll screw us up later on
9
+ @code = code == nil ? '' : code
10
+ @value = value == nil ? '' : value
11
+ end
21
12
 
22
- def to_s
23
- return "$#{code}#{value}"
24
- end
13
+ def ==(other)
14
+ if @code != other.code
15
+ return false
16
+ elsif @value != other.value
17
+ return false
18
+ end
19
+ return true
25
20
  end
26
21
 
22
+ def to_s
23
+ return "$#{code}#{value}"
24
+ end
25
+ end
27
26
  end
@@ -1,36 +1,87 @@
1
1
  module MARC
2
2
 
3
- # A class for writing MARC records as MARC21.
3
+ # A class for writing MARC records as MARC21.
4
4
 
5
- class Writer
5
+ class Writer
6
6
 
7
- # the constructor which you must pass a file path
8
- # or an object that responds to a write message
7
+ # the constructor which you must pass a file path
8
+ # or an object that responds to a write message
9
9
 
10
- def initialize(file)
11
- if file.class == String
12
- @fh = File.new(file,"w")
13
- elsif file.respond_to?(file)
14
- @fh = file
15
- else
16
- throw "must pass in file name or handle"
17
- end
18
- end
10
+ def initialize(file)
11
+ if file.class == String
12
+ @fh = File.new(file,"w")
13
+ elsif file.respond_to?('write')
14
+ @fh = file
15
+ else
16
+ throw "must pass in file name or handle"
17
+ end
18
+ end
19
19
 
20
20
 
21
- # write a record to the file or handle
21
+ # write a record to the file or handle
22
+
23
+ def write(record)
24
+ @fh.write(MARC::Writer.encode(record))
25
+ end
26
+
27
+
28
+ # close underlying filehandle
29
+
30
+ def close
31
+ @fh.close
32
+ end
22
33
 
23
- def write(record)
24
- @fh.write(record.to_marc)
25
- end
26
34
 
35
+ # a static method that accepts a MARC::Record object
36
+ # and returns the record encoded as MARC21 in transmission format
27
37
 
28
- # close underlying filehandle
38
+ def self.encode(record)
39
+ directory = ''
40
+ fields = ''
41
+ offset = 0
42
+ for field in record.fields
29
43
 
30
- def close
31
- @fh.close
44
+ # encode the field
45
+ field_data = ''
46
+ if field.class == MARC::Field
47
+ field_data = field.indicator1 + field.indicator2
48
+ for s in field.subfields
49
+ field_data += SUBFIELD_INDICATOR + s.code + s.value
50
+ end
51
+ elsif field.class == MARC::Control
52
+ field_data = field.value
32
53
  end
54
+ field_data += END_OF_FIELD
33
55
 
34
- end
56
+ # calculate directory entry for the field
57
+ field_length = field_data.length()
58
+ directory += sprintf("%03s%04i%05i", field.tag, field_length,
59
+ offset)
60
+
61
+ # add field to data for other fields
62
+ fields += field_data
63
+
64
+ # update offset for next field
65
+ offset += field_length
66
+ end
67
+
68
+ # determine the base (leader + directory)
69
+ base = record.leader + directory + END_OF_FIELD
35
70
 
71
+ # determine complete record
72
+ marc = base + fields + END_OF_RECORD
73
+
74
+ # update leader with the byte offest to the end of the directory
75
+ marc[12..16] = sprintf("%05i", base.length())
76
+
77
+ # update the record length
78
+ marc[0..4] = sprintf("%05i", marc.length())
79
+
80
+ # store updated leader in the record that was passed in
81
+ record.leader = marc[0..LEADER_LENGTH-1]
82
+
83
+ # return encoded marc
84
+ return marc
85
+ end
86
+ end
36
87
  end
@@ -0,0 +1,83 @@
1
+ require 'rexml/document'
2
+ require 'rexml/parsers/pullparser'
3
+
4
+ module MARC
5
+
6
+ class XMLReader
7
+ include Enumerable
8
+
9
+ def initialize(filename)
10
+ source = File.new(filename)
11
+ @parser = REXML::Parsers::PullParser.new(source)
12
+ end
13
+
14
+ def each
15
+ while @parser.has_next?
16
+ event = @parser.pull
17
+ # if it's the start of a record element
18
+ if event.start_element? and strip_ns(event[0]) == 'record'
19
+ yield build_record
20
+ end
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def strip_ns(str)
27
+ return str.sub!(/^.*:/, '')
28
+ end
29
+
30
+ # will accept parse events until a record has been built up
31
+ #
32
+ def build_record
33
+ record = MARC::Record.new
34
+ data_field = nil
35
+ control_field = nil
36
+ subfield = nil
37
+ text = ''
38
+ attrs = nil
39
+
40
+ while @parser.has_next?
41
+ event = @parser.pull
42
+
43
+ if event.text?
44
+ text += event[0].strip
45
+ next
46
+ end
47
+
48
+ if event.start_element?
49
+ attrs = event[1]
50
+ case strip_ns(event[0])
51
+ when 'controlfield'
52
+ text = ''
53
+ control_field = MARC::Control.new(attrs['tag'])
54
+ when 'datafield'
55
+ text = ''
56
+ data_field = MARC::Field.new(attrs['tag'], attrs['ind1'],
57
+ attrs['ind2'])
58
+ when 'subfield'
59
+ text = ''
60
+ subfield = MARC::Subfield.new(attrs['code'])
61
+ end
62
+ end
63
+
64
+ if event.end_element?
65
+ case strip_ns(event[0])
66
+ when 'leader'
67
+ record.leader = text
68
+ when 'record'
69
+ return record
70
+ when 'controlfield'
71
+ control_field.value = text
72
+ record.append(control_field)
73
+ when 'datafield'
74
+ record.append(data_field)
75
+ when 'subfield'
76
+ subfield.value = text
77
+ data_field.append(subfield)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end