marc4j4r 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,192 +0,0 @@
1
- module MARC4J4R
2
- DataField = Java::org.marc4j.marc.impl::DataFieldImpl
3
- SubField = Java::org.marc4j.marc.impl::SubfieldImpl
4
-
5
- class DataField
6
- include Enumerable
7
-
8
- alias_method :<<, :addSubfield
9
- alias_method :add, :addSubfield
10
-
11
- # Override the initialize to allow creation with just a tag (marc4j only allows either
12
- # no args or the tag and both indicators)
13
-
14
- alias_method :oldinit, :initialize
15
- def initialize(tag = nil, ind1 = ' ', ind2 = ' ')
16
- self.oldinit(tag, ind1[0].ord, ind2[0].ord)
17
- end
18
-
19
- def controlField?
20
- return false
21
- end
22
-
23
- def == other
24
-
25
- basics = ((self.tag == other.tag) and (self.indicator1 == other.indicator1) and (self.indicator2 == other.indicator2))
26
- unless basics
27
- # puts "Failed basics"
28
- return false
29
- end
30
- selfsubs = self.to_a
31
- othersubs = other.to_a
32
- # puts "#{self} vs #{other}"
33
- while (selfsubs.length > 0)
34
- ssf = selfsubs.shift
35
- osf = othersubs.shift
36
- unless ssf == osf
37
- # puts "#{ssf} <> #{osf}"
38
- return false
39
- end
40
- end
41
-
42
- if ((selfsubs.size > 0) or (othersubs.size > 0))
43
- # puts "sizes unequal"
44
- return false
45
- end
46
- return true
47
- end
48
-
49
- # Pretty-print
50
- # @param [String] joiner What string to use to join the subfields
51
- # @param [String] The pretty string
52
- def to_s (joiner = ' ')
53
- arr = [self.tag + ' ' + self.indicator1 + self.indicator2]
54
- self.each do |s|
55
- arr.push s.to_s
56
- end
57
- return arr.join(joiner)
58
- end
59
-
60
-
61
- # Get the value of the first subfield of this field with the given code
62
- # @param [String] code 1-character string of the subfield code
63
- # @return [String] The value of the first matched subfield
64
- def [] code
65
- raise ArgumentError, "Code must be a one-character string, not #{code}" unless code.is_a? String and code.size == 1
66
- # need to send a char value that the underlying java can deal with
67
- sub = self.getSubfield(code[0].ord)
68
- if (sub)
69
- return sub.getData
70
- else
71
- return nil
72
- end
73
- end
74
-
75
- # Also call it "sub" for symmatry wtih "sub_values" and "subs"
76
- # and "first" because it makes sense
77
- alias_method :sub, :[]
78
- alias_method :first, :[]
79
-
80
- # Get all subfields, optionally restricting to those with a given code
81
- # @param [String, Array<String>] code A (array of?) 1-character strings; the code(s) to collect. Default is all
82
- # @return [Array<MARC4J4R::SubField] The matching subfields, or an empty array
83
-
84
- def subs code = false
85
- unless code
86
- return self.to_a
87
- end
88
-
89
- # Is it a singleton?
90
- unless code.is_a? Array
91
- code = [code]
92
- end
93
-
94
- return self.select {|s| code.include? s.code}
95
- end
96
-
97
- # Get all values from the subfields for the given code or array of codes
98
- # @param [String, Array<String>] code (Array of?) 1-character string(s) of the subfield code
99
- # @return [Array<String>] A possibly-empty array of Strings made up of the values in the subfields whose
100
- # code is included in the given codes (or all subfields is code is empty)
101
- #
102
- #
103
- # @example Quick examples:
104
- # # 260 $a New York, $b Van Nostrand Reinhold Co. $c 1969
105
- # rec['260'].sub_values('a') #=> ["New York,"]
106
- # rec['260'].sub_values(['a', 'c']) #=> ["New York,", "1969"]
107
- # rec['260'].sub_values(['c', 'a']) #=> ["New York,", "1969"]
108
-
109
- def sub_values(code=nil)
110
- return self.subs(code).collect {|s| s.value}
111
- end
112
-
113
-
114
- # Get first indicator as a one-character string
115
- def indicator1
116
- return self.getIndicator1.chr
117
- end
118
-
119
- # Get second indicator as a one-character string
120
- def indicator2
121
- return self.getIndicator2.chr
122
- end
123
-
124
- def indicator1= char
125
- self.setIndicator1 char[0].ord
126
- end
127
-
128
- def indicator2= char
129
- self.setIndicator2 char[0].ord
130
- end
131
-
132
- alias_method :ind1, :indicator1
133
- alias_method :"ind1=", :"indicator1="
134
- alias_method :ind2, :indicator2
135
- alias_method :"ind2=", :"indicator2="
136
-
137
- # Iterate over the subfields
138
- def each
139
- self.getSubfields.each do |s|
140
- yield s
141
- end
142
- end
143
-
144
- # Get the concatentated values of the subfields in order the appear in the field
145
- # @param [String] joiner The string used to join the subfield values
146
- def value joiner=' '
147
- data = self.getSubfields.map {|s| s.data}
148
- return data.join(joiner)
149
- end
150
- end
151
-
152
- class SubField
153
-
154
- alias_method :oldinit, :initialize
155
- def initialize code=nil, data=nil
156
- if code
157
- code = code[0].ord
158
- if data
159
- self.oldinit(code, data)
160
- else
161
- self.oldinit(code)
162
- end
163
- else
164
- self.oldinit
165
- end
166
- end
167
-
168
- def == other
169
- return ((self.code == other.code) and (self.data == other.data))
170
- end
171
-
172
- def value
173
- return self.data
174
- end
175
-
176
- def value= str
177
- self.data = str
178
- end
179
-
180
- def code
181
- return self.getCode.chr
182
- end
183
-
184
- def code= str
185
- self.setCode str[0].ord
186
- end
187
-
188
- def to_s
189
- return '$' + self.code + " " + self.data
190
- end
191
- end
192
- end
@@ -1,125 +0,0 @@
1
- import 'org.marc4j.ErrorHandler'
2
- require 'jlogger'
3
-
4
- module MarcReader
5
- module LoggingNextRecord
6
- def nextRecord(hashify=true)
7
- begin
8
- r = self.next
9
- # rescue Java::org.marc4j.MarcException => e
10
- rescue org.marc4j.MarcException => e
11
- puts "#{e}"
12
- raise e
13
- end
14
- self.logErrors if self.methods.include? 'errors'
15
- r.hashify if hashify
16
- return r
17
- end
18
- end
19
- end
20
-
21
- module Java::OrgMarc4j::MarcReader
22
- include Enumerable
23
- include JLogger::Simple
24
-
25
-
26
- def logErrors
27
- return unless self.errors.getErrors
28
- self.errors.getErrors.each do |err|
29
- case err.severity
30
- when ErrorHandler::MAJOR_ERROR
31
- log.error err.toString
32
- when ErrorHandler::ERROR_TYPO, ErrorHandler::MINOR_ERROR
33
- self.log.warn err.toString
34
- when ErrorHandler::INFO
35
- log.info err.toString
36
- when ErrorHandler::FATAL
37
- log.error err.toString
38
- Process.exit
39
- end
40
- end
41
- end
42
-
43
- def nextRecord(hashify = true)
44
- r = self.next
45
- r.hashify if hashify
46
- return r
47
- end
48
-
49
- # Return the next record, after calling #hashify on it
50
- def each(hashify=true)
51
- while self.hasNext
52
- r = self.nextRecord(hashify)
53
- yield r
54
- end
55
- end
56
- end
57
-
58
-
59
- module MARC4J4R
60
-
61
- class Reader
62
-
63
- ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8']
64
- ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'}
65
-
66
- # @attr_reader [File] handle The handle of the File (or IO) object being read from
67
- attr_reader :handle
68
-
69
- # Get a marc reader of the appropriate type
70
- # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
71
- # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
72
- # @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding
73
- # @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
74
- #
75
- # @example Get a strict binary MARC reader for the file 'test.mrc'
76
- # reader = MARC4J4R::Reader.new('test.mrc')
77
- # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default
78
- #
79
- # @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8
80
- # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8)
81
- #
82
- # @example Get a permissive binary MARC reader
83
- # reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
84
- #
85
- # @example Get a reader for an xml file
86
- # reader = MARC4J4R::Reader.new('test.xml', :marcxml)
87
- #
88
- # @example Get a reader based on an existing IO object
89
- # require 'open-uri'
90
- # infile = open('http://my.machine.com/test.mrc')
91
- # reader = MARC4J4R::Reader.new(infile)
92
-
93
- def self.new(input, type = :strictmarc, encoding = nil)
94
- if encoding
95
- encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding]
96
- unless ENCODINGS.include? encoding
97
- raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\""
98
- end
99
- end
100
- @handle = IOConvert.byteinstream(input)
101
- case type
102
- when :strictmarc then
103
- Java::org.marc4j.MarcStreamReader.send(:include, Enumerable)
104
- return Java::org.marc4j.MarcStreamReader.new(@handle, encoding)
105
- when :permissivemarc then
106
- encoding ||= 'BESTGUESS'
107
- Java::org.marc4j.MarcPermissiveStreamReader.send(:include, Enumerable)
108
- Java::org.marc4j.MarcPermissiveStreamReader.send(:include, JLogger::Simple)
109
- Java::org.marc4j.MarcPermissiveStreamReader.send(:include, MarcReader::LoggingNextRecord)
110
- return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding)
111
- when :marcxml then
112
- Java::org.marc4j.MarcXmlReader.send(:include, Enumerable)
113
- Java::org.marc4j.MarcXmlReader.send(:include, JLogger::Simple)
114
- return Java::org.marc4j.MarcXmlReader.new(@handle)
115
- when :alephsequential then
116
- Java::org.marc4j.MarcAlephSequentialReader.send(:include, Enumerable)
117
- Java::org.marc4j.MarcAlephSequentialReader.send(:include, JLogger::Simple)
118
- Java::org.marc4j.MarcAlephSequentialReader.send(:include, MarcReader::LoggingNextRecord)
119
- return Java::org.marc4j.MarcAlephSequentialReader.new(@handle)
120
- else
121
- raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
122
- end
123
- end
124
- end
125
- end
@@ -1,259 +0,0 @@
1
- require 'stringio'
2
- module MARC4J4R
3
- Record = Java::org.marc4j.marc.impl::RecordImpl
4
-
5
- class Record
6
- include Enumerable
7
-
8
- alias_method :<<, :addVariableField
9
- alias_method :append, :addVariableField
10
- alias_method :fields, :getVariableFields
11
-
12
- # Export as a MARC-Hash, as described at
13
- # http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
14
- # @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
15
-
16
- # Show equality
17
-
18
- def == other
19
- return false unless (self.leader == other.leader)
20
- self.zip(other) do |so|
21
- unless so[0] == so[1]
22
- puts "self <> other\n#{so[0]}\n#{so[1]}"
23
- return false;
24
- end
25
- end
26
- other.zip(self) do |so|
27
- unless so[0] == so[1]
28
- puts "#{so[0]}\n#{so[1]}"
29
- return false;
30
- end
31
- end
32
- return true
33
- end
34
-
35
-
36
- # Create a local hash by tag number; makes some stuff faster
37
- # Called automatically if you use reader.each
38
-
39
- def hashify
40
- return if @hashedtags # don't do it more than once
41
- @hashedtags = {}
42
- self.getVariableFields.each do |f|
43
- @hashedtags[f.tag] ||= []
44
- @hashedtags[f.tag].push f
45
- end
46
- end
47
-
48
- # Force a re-hash
49
- def rehash
50
- @hashedtags = nil
51
- hashify
52
- end
53
-
54
- # Create a nice string of the record
55
- def to_s
56
- arr = ['LEADER ' + self.leader]
57
- self.each do |f|
58
- arr.push f.to_s
59
- end
60
- return arr.join("\n")
61
- end
62
-
63
- # Get the leader as a string (marc4j would otherwise return Leader object)
64
- def leader
65
- self.get_leader.toString
66
- end
67
-
68
- # Set the leader
69
- # @throw RuntimeError if leader is illegal
70
- def leader= str
71
- begin
72
- self.set_leader Java::org.marc4j.marc.impl.LeaderImpl.new(str)
73
- rescue Java::java.lang.StringIndexOutOfBoundsException => e
74
- raise RuntimeError.new("'#{str}' not a legal leader: #{e.message}")
75
- end
76
- end
77
-
78
- # Cycle through the fields in the order the appear in the record
79
- def each
80
- self.getVariableFields.each do |f|
81
- yield f
82
- end
83
- end
84
-
85
- # Get the first field associated with a tag
86
- # @param [String] tag The tag
87
- # @return [Field] The first matching field, or nil if none. Note that
88
- # to mirror ruby-marc, this returns a single field
89
-
90
- def [] tag
91
- if defined? @hashedtags
92
- if @hashedtags[tag]
93
- return @hashedtags[tag][0]
94
- else
95
- return nil
96
- end
97
- else
98
- return self.getVariableField(tag)
99
- end
100
- end
101
-
102
-
103
- # Get a (possibly empty) list of fields with the given tag(s)
104
- #
105
- # @param [String, Array<String>] tags A string (or Array of strings) with the tags you're interested in
106
- # @param [Boolean] originalorder Whether or not results should be presented in the original order within the
107
- # record or with a two-column sort of (a) Order of the tag in the list of tags sent, (b) order within that tag
108
- # in the record
109
- # @return [Array<Field>] Either an empty list or a list of one or more matched fields will be returned.
110
- #
111
- # originalorder == false will use an internal hash and be faster in many cases (see #hashify)
112
- #
113
- # @example originalorder == false
114
- # # Given a record that looks like
115
- # # 010 $a 68027371
116
- # # 035 $a (RLIN)MIUG0001728-B
117
- # # 035 $a (CaOTULAS)159818044
118
- # # 035 $a (OCoLC)ocm00001728
119
- #
120
- # r.find_by_tag(['035', '010']).each {|f| puts f.to_s}
121
- # # 035 $a (RLIN)MIUG0001728-B
122
- # # 035 $a (CaOTULAS)159818044
123
- # # 035 $a (OCoLC)ocm00001728
124
- # # 010 $a 68027371
125
- #
126
- # # The results are ordered first by tag as passed in, then by original order within the tag
127
- #
128
- # @example Just get all fields for a single tag
129
- # ohThirtyFives = r.find_by_tag('035')
130
- #
131
- # @example Get a bunch of standard identifiers
132
- # standardIDs = r.find_by_tag(['022', '020', '010'])
133
- #
134
- # @example originalorder == true
135
- # r.find_by_tag(['035', '010'], true).each {|f| puts f.to_s}
136
- # # 010 $a 68027371
137
- # # 035 $a (RLIN)MIUG0001728-B
138
- # # 035 $a (CaOTULAS)159818044
139
- # # 035 $a (OCoLC)ocm00001728
140
-
141
- def find_by_tag(tags, originalorder = false)
142
- self.hashify unless @hashedtags and !originalorder
143
- if !tags.is_a? Array
144
- return @hashedtags[tags] || []
145
- end
146
- if originalorder
147
- return self.find_all {|f| tags.include? f.tag}
148
- else
149
- # puts "Tags is #{tags}: got #{@hashedtags.values_at(*tags)}"
150
- return @hashedtags.values_at(*tags).flatten.compact
151
- end
152
- end
153
-
154
-
155
-
156
- # Return the record as valid MARC-XML
157
- # @param String encoding The encoding to use
158
- # @return String A MARC-XML representation of the record, including the XML header
159
-
160
- def to_xml
161
- return Java::org.marc4j.MarcXmlWriter.record_to_XML(self)
162
- end
163
-
164
-
165
- def to_marc encoding='UTF-8'
166
- # begin
167
- s = Java::java.io.ByteArrayOutputStream.new
168
- writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
169
- writer.write(self)
170
- return s.to_string
171
- # writer.close
172
- # @marcbinary = s.to_string
173
- # return @marcbinary
174
- # rescue
175
- # # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
176
- # "Whoops! Failed: #{$!}"
177
- # end
178
- end
179
-
180
- def to_marchash
181
- h = {}
182
- h['type'] = 'marc-hash'
183
- h['version'] = [1,0]
184
- h['leader'] = self.leader
185
-
186
- fields = []
187
-
188
- self.getVariableFields.each do |f|
189
- if f.controlField?
190
- fields << [f.tag, f.value]
191
- else
192
- farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
193
- subs = []
194
- f.each do |subfield|
195
- subs << [subfield.code, subfield.value]
196
- end
197
- farray.push subs
198
- fields << farray
199
- end
200
- end
201
- h['fields'] = fields
202
- return h
203
- end
204
-
205
- # Turn it into a marc-in-json hashmap. Note that this won't really work
206
- # like a ruby hash; you need to know what you're getting, since stuff
207
- # like #each won't work.
208
- #
209
- # Better to just use to_marc_in_json if you want a json string
210
-
211
- def to_hash
212
- return Java::org.marc4j.MarcInJSON.record_to_hash(self)
213
- end
214
-
215
-
216
- # Turn it into a marc-in-json JSON string using Jackson
217
- def to_marc_in_json
218
- return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
219
- end
220
-
221
-
222
- end
223
-
224
-
225
-
226
- # Give a marc record in a string, turn it into an object
227
- # @param String str The record as a MARC binary string
228
- # @return MARC4J4R::Record The first record encoded in the string
229
- #
230
- # Note that the normal way of defining this class (self.from_string)
231
- # didn't work; I assume it has something to do with the fact that
232
- # it's actually jrst aliased to the Java class
233
- def Record.from_string str, encoding=nil
234
- s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
235
- # return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
236
- return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
237
- end
238
-
239
-
240
- # Give a marc-xml record in a string, turn it into an object
241
- # @param String str The record as a MARC-XML string
242
- # @return MARC4J4R::Record The first record encoded in the string
243
- def Record.from_xml_string str
244
- return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
245
- end
246
-
247
- def Record.new_from_hash hash
248
- return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
249
- end
250
-
251
- def Record.new_from_marc_in_json jsonstring
252
- return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
253
- end
254
-
255
-
256
-
257
-
258
- end
259
-