marc4j4r 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,192 +0,0 @@
1
- module MARC4J4R
2
- DataField = Java::org.marc4j.marc.impl::DataFieldImpl
3
- SubField = Java::org.marc4j.marc.impl::SubfieldImpl
4
-
5
- class DataField
6
- include Enumerable
7
-
8
- alias_method :<<, :addSubfield
9
- alias_method :add, :addSubfield
10
-
11
- # Override the initialize to allow creation with just a tag (marc4j only allows either
12
- # no args or the tag and both indicators)
13
-
14
- alias_method :oldinit, :initialize
15
- def initialize(tag = nil, ind1 = ' ', ind2 = ' ')
16
- self.oldinit(tag, ind1[0].ord, ind2[0].ord)
17
- end
18
-
19
- def controlField?
20
- return false
21
- end
22
-
23
- def == other
24
-
25
- basics = ((self.tag == other.tag) and (self.indicator1 == other.indicator1) and (self.indicator2 == other.indicator2))
26
- unless basics
27
- # puts "Failed basics"
28
- return false
29
- end
30
- selfsubs = self.to_a
31
- othersubs = other.to_a
32
- # puts "#{self} vs #{other}"
33
- while (selfsubs.length > 0)
34
- ssf = selfsubs.shift
35
- osf = othersubs.shift
36
- unless ssf == osf
37
- # puts "#{ssf} <> #{osf}"
38
- return false
39
- end
40
- end
41
-
42
- if ((selfsubs.size > 0) or (othersubs.size > 0))
43
- # puts "sizes unequal"
44
- return false
45
- end
46
- return true
47
- end
48
-
49
- # Pretty-print
50
- # @param [String] joiner What string to use to join the subfields
51
- # @param [String] The pretty string
52
- def to_s (joiner = ' ')
53
- arr = [self.tag + ' ' + self.indicator1 + self.indicator2]
54
- self.each do |s|
55
- arr.push s.to_s
56
- end
57
- return arr.join(joiner)
58
- end
59
-
60
-
61
- # Get the value of the first subfield of this field with the given code
62
- # @param [String] code 1-character string of the subfield code
63
- # @return [String] The value of the first matched subfield
64
- def [] code
65
- raise ArgumentError, "Code must be a one-character string, not #{code}" unless code.is_a? String and code.size == 1
66
- # need to send a char value that the underlying java can deal with
67
- sub = self.getSubfield(code[0].ord)
68
- if (sub)
69
- return sub.getData
70
- else
71
- return nil
72
- end
73
- end
74
-
75
- # Also call it "sub" for symmatry wtih "sub_values" and "subs"
76
- # and "first" because it makes sense
77
- alias_method :sub, :[]
78
- alias_method :first, :[]
79
-
80
- # Get all subfields, optionally restricting to those with a given code
81
- # @param [String, Array<String>] code A (array of?) 1-character strings; the code(s) to collect. Default is all
82
- # @return [Array<MARC4J4R::SubField] The matching subfields, or an empty array
83
-
84
- def subs code = false
85
- unless code
86
- return self.to_a
87
- end
88
-
89
- # Is it a singleton?
90
- unless code.is_a? Array
91
- code = [code]
92
- end
93
-
94
- return self.select {|s| code.include? s.code}
95
- end
96
-
97
- # Get all values from the subfields for the given code or array of codes
98
- # @param [String, Array<String>] code (Array of?) 1-character string(s) of the subfield code
99
- # @return [Array<String>] A possibly-empty array of Strings made up of the values in the subfields whose
100
- # code is included in the given codes (or all subfields is code is empty)
101
- #
102
- #
103
- # @example Quick examples:
104
- # # 260 $a New York, $b Van Nostrand Reinhold Co. $c 1969
105
- # rec['260'].sub_values('a') #=> ["New York,"]
106
- # rec['260'].sub_values(['a', 'c']) #=> ["New York,", "1969"]
107
- # rec['260'].sub_values(['c', 'a']) #=> ["New York,", "1969"]
108
-
109
- def sub_values(code=nil)
110
- return self.subs(code).collect {|s| s.value}
111
- end
112
-
113
-
114
- # Get first indicator as a one-character string
115
- def indicator1
116
- return self.getIndicator1.chr
117
- end
118
-
119
- # Get second indicator as a one-character string
120
- def indicator2
121
- return self.getIndicator2.chr
122
- end
123
-
124
- def indicator1= char
125
- self.setIndicator1 char[0].ord
126
- end
127
-
128
- def indicator2= char
129
- self.setIndicator2 char[0].ord
130
- end
131
-
132
- alias_method :ind1, :indicator1
133
- alias_method :"ind1=", :"indicator1="
134
- alias_method :ind2, :indicator2
135
- alias_method :"ind2=", :"indicator2="
136
-
137
- # Iterate over the subfields
138
- def each
139
- self.getSubfields.each do |s|
140
- yield s
141
- end
142
- end
143
-
144
- # Get the concatentated values of the subfields in order the appear in the field
145
- # @param [String] joiner The string used to join the subfield values
146
- def value joiner=' '
147
- data = self.getSubfields.map {|s| s.data}
148
- return data.join(joiner)
149
- end
150
- end
151
-
152
- class SubField
153
-
154
- alias_method :oldinit, :initialize
155
- def initialize code=nil, data=nil
156
- if code
157
- code = code[0].ord
158
- if data
159
- self.oldinit(code, data)
160
- else
161
- self.oldinit(code)
162
- end
163
- else
164
- self.oldinit
165
- end
166
- end
167
-
168
- def == other
169
- return ((self.code == other.code) and (self.data == other.data))
170
- end
171
-
172
- def value
173
- return self.data
174
- end
175
-
176
- def value= str
177
- self.data = str
178
- end
179
-
180
- def code
181
- return self.getCode.chr
182
- end
183
-
184
- def code= str
185
- self.setCode str[0].ord
186
- end
187
-
188
- def to_s
189
- return '$' + self.code + " " + self.data
190
- end
191
- end
192
- end
@@ -1,125 +0,0 @@
1
- import 'org.marc4j.ErrorHandler'
2
- require 'jlogger'
3
-
4
- module MarcReader
5
- module LoggingNextRecord
6
- def nextRecord(hashify=true)
7
- begin
8
- r = self.next
9
- # rescue Java::org.marc4j.MarcException => e
10
- rescue org.marc4j.MarcException => e
11
- puts "#{e}"
12
- raise e
13
- end
14
- self.logErrors if self.methods.include? 'errors'
15
- r.hashify if hashify
16
- return r
17
- end
18
- end
19
- end
20
-
21
- module Java::OrgMarc4j::MarcReader
22
- include Enumerable
23
- include JLogger::Simple
24
-
25
-
26
- def logErrors
27
- return unless self.errors.getErrors
28
- self.errors.getErrors.each do |err|
29
- case err.severity
30
- when ErrorHandler::MAJOR_ERROR
31
- log.error err.toString
32
- when ErrorHandler::ERROR_TYPO, ErrorHandler::MINOR_ERROR
33
- self.log.warn err.toString
34
- when ErrorHandler::INFO
35
- log.info err.toString
36
- when ErrorHandler::FATAL
37
- log.error err.toString
38
- Process.exit
39
- end
40
- end
41
- end
42
-
43
- def nextRecord(hashify = true)
44
- r = self.next
45
- r.hashify if hashify
46
- return r
47
- end
48
-
49
- # Return the next record, after calling #hashify on it
50
- def each(hashify=true)
51
- while self.hasNext
52
- r = self.nextRecord(hashify)
53
- yield r
54
- end
55
- end
56
- end
57
-
58
-
59
- module MARC4J4R
60
-
61
- class Reader
62
-
63
- ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8']
64
- ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'}
65
-
66
- # @attr_reader [File] handle The handle of the File (or IO) object being read from
67
- attr_reader :handle
68
-
69
- # Get a marc reader of the appropriate type
70
- # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
71
- # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
72
- # @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding
73
- # @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
74
- #
75
- # @example Get a strict binary MARC reader for the file 'test.mrc'
76
- # reader = MARC4J4R::Reader.new('test.mrc')
77
- # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default
78
- #
79
- # @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8
80
- # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8)
81
- #
82
- # @example Get a permissive binary MARC reader
83
- # reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
84
- #
85
- # @example Get a reader for an xml file
86
- # reader = MARC4J4R::Reader.new('test.xml', :marcxml)
87
- #
88
- # @example Get a reader based on an existing IO object
89
- # require 'open-uri'
90
- # infile = open('http://my.machine.com/test.mrc')
91
- # reader = MARC4J4R::Reader.new(infile)
92
-
93
- def self.new(input, type = :strictmarc, encoding = nil)
94
- if encoding
95
- encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding]
96
- unless ENCODINGS.include? encoding
97
- raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\""
98
- end
99
- end
100
- @handle = IOConvert.byteinstream(input)
101
- case type
102
- when :strictmarc then
103
- Java::org.marc4j.MarcStreamReader.send(:include, Enumerable)
104
- return Java::org.marc4j.MarcStreamReader.new(@handle, encoding)
105
- when :permissivemarc then
106
- encoding ||= 'BESTGUESS'
107
- Java::org.marc4j.MarcPermissiveStreamReader.send(:include, Enumerable)
108
- Java::org.marc4j.MarcPermissiveStreamReader.send(:include, JLogger::Simple)
109
- Java::org.marc4j.MarcPermissiveStreamReader.send(:include, MarcReader::LoggingNextRecord)
110
- return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding)
111
- when :marcxml then
112
- Java::org.marc4j.MarcXmlReader.send(:include, Enumerable)
113
- Java::org.marc4j.MarcXmlReader.send(:include, JLogger::Simple)
114
- return Java::org.marc4j.MarcXmlReader.new(@handle)
115
- when :alephsequential then
116
- Java::org.marc4j.MarcAlephSequentialReader.send(:include, Enumerable)
117
- Java::org.marc4j.MarcAlephSequentialReader.send(:include, JLogger::Simple)
118
- Java::org.marc4j.MarcAlephSequentialReader.send(:include, MarcReader::LoggingNextRecord)
119
- return Java::org.marc4j.MarcAlephSequentialReader.new(@handle)
120
- else
121
- raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
122
- end
123
- end
124
- end
125
- end
@@ -1,259 +0,0 @@
1
- require 'stringio'
2
- module MARC4J4R
3
- Record = Java::org.marc4j.marc.impl::RecordImpl
4
-
5
- class Record
6
- include Enumerable
7
-
8
- alias_method :<<, :addVariableField
9
- alias_method :append, :addVariableField
10
- alias_method :fields, :getVariableFields
11
-
12
- # Export as a MARC-Hash, as described at
13
- # http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
14
- # @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
15
-
16
- # Show equality
17
-
18
- def == other
19
- return false unless (self.leader == other.leader)
20
- self.zip(other) do |so|
21
- unless so[0] == so[1]
22
- puts "self <> other\n#{so[0]}\n#{so[1]}"
23
- return false;
24
- end
25
- end
26
- other.zip(self) do |so|
27
- unless so[0] == so[1]
28
- puts "#{so[0]}\n#{so[1]}"
29
- return false;
30
- end
31
- end
32
- return true
33
- end
34
-
35
-
36
- # Create a local hash by tag number; makes some stuff faster
37
- # Called automatically if you use reader.each
38
-
39
- def hashify
40
- return if @hashedtags # don't do it more than once
41
- @hashedtags = {}
42
- self.getVariableFields.each do |f|
43
- @hashedtags[f.tag] ||= []
44
- @hashedtags[f.tag].push f
45
- end
46
- end
47
-
48
- # Force a re-hash
49
- def rehash
50
- @hashedtags = nil
51
- hashify
52
- end
53
-
54
- # Create a nice string of the record
55
- def to_s
56
- arr = ['LEADER ' + self.leader]
57
- self.each do |f|
58
- arr.push f.to_s
59
- end
60
- return arr.join("\n")
61
- end
62
-
63
- # Get the leader as a string (marc4j would otherwise return Leader object)
64
- def leader
65
- self.get_leader.toString
66
- end
67
-
68
- # Set the leader
69
- # @throw RuntimeError if leader is illegal
70
- def leader= str
71
- begin
72
- self.set_leader Java::org.marc4j.marc.impl.LeaderImpl.new(str)
73
- rescue Java::java.lang.StringIndexOutOfBoundsException => e
74
- raise RuntimeError.new("'#{str}' not a legal leader: #{e.message}")
75
- end
76
- end
77
-
78
- # Cycle through the fields in the order the appear in the record
79
- def each
80
- self.getVariableFields.each do |f|
81
- yield f
82
- end
83
- end
84
-
85
- # Get the first field associated with a tag
86
- # @param [String] tag The tag
87
- # @return [Field] The first matching field, or nil if none. Note that
88
- # to mirror ruby-marc, this returns a single field
89
-
90
- def [] tag
91
- if defined? @hashedtags
92
- if @hashedtags[tag]
93
- return @hashedtags[tag][0]
94
- else
95
- return nil
96
- end
97
- else
98
- return self.getVariableField(tag)
99
- end
100
- end
101
-
102
-
103
- # Get a (possibly empty) list of fields with the given tag(s)
104
- #
105
- # @param [String, Array<String>] tags A string (or Array of strings) with the tags you're interested in
106
- # @param [Boolean] originalorder Whether or not results should be presented in the original order within the
107
- # record or with a two-column sort of (a) Order of the tag in the list of tags sent, (b) order within that tag
108
- # in the record
109
- # @return [Array<Field>] Either an empty list or a list of one or more matched fields will be returned.
110
- #
111
- # originalorder == false will use an internal hash and be faster in many cases (see #hashify)
112
- #
113
- # @example originalorder == false
114
- # # Given a record that looks like
115
- # # 010 $a 68027371
116
- # # 035 $a (RLIN)MIUG0001728-B
117
- # # 035 $a (CaOTULAS)159818044
118
- # # 035 $a (OCoLC)ocm00001728
119
- #
120
- # r.find_by_tag(['035', '010']).each {|f| puts f.to_s}
121
- # # 035 $a (RLIN)MIUG0001728-B
122
- # # 035 $a (CaOTULAS)159818044
123
- # # 035 $a (OCoLC)ocm00001728
124
- # # 010 $a 68027371
125
- #
126
- # # The results are ordered first by tag as passed in, then by original order within the tag
127
- #
128
- # @example Just get all fields for a single tag
129
- # ohThirtyFives = r.find_by_tag('035')
130
- #
131
- # @example Get a bunch of standard identifiers
132
- # standardIDs = r.find_by_tag(['022', '020', '010'])
133
- #
134
- # @example originalorder == true
135
- # r.find_by_tag(['035', '010'], true).each {|f| puts f.to_s}
136
- # # 010 $a 68027371
137
- # # 035 $a (RLIN)MIUG0001728-B
138
- # # 035 $a (CaOTULAS)159818044
139
- # # 035 $a (OCoLC)ocm00001728
140
-
141
- def find_by_tag(tags, originalorder = false)
142
- self.hashify unless @hashedtags and !originalorder
143
- if !tags.is_a? Array
144
- return @hashedtags[tags] || []
145
- end
146
- if originalorder
147
- return self.find_all {|f| tags.include? f.tag}
148
- else
149
- # puts "Tags is #{tags}: got #{@hashedtags.values_at(*tags)}"
150
- return @hashedtags.values_at(*tags).flatten.compact
151
- end
152
- end
153
-
154
-
155
-
156
- # Return the record as valid MARC-XML
157
- # @param String encoding The encoding to use
158
- # @return String A MARC-XML representation of the record, including the XML header
159
-
160
- def to_xml
161
- return Java::org.marc4j.MarcXmlWriter.record_to_XML(self)
162
- end
163
-
164
-
165
- def to_marc encoding='UTF-8'
166
- # begin
167
- s = Java::java.io.ByteArrayOutputStream.new
168
- writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
169
- writer.write(self)
170
- return s.to_string
171
- # writer.close
172
- # @marcbinary = s.to_string
173
- # return @marcbinary
174
- # rescue
175
- # # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
176
- # "Whoops! Failed: #{$!}"
177
- # end
178
- end
179
-
180
- def to_marchash
181
- h = {}
182
- h['type'] = 'marc-hash'
183
- h['version'] = [1,0]
184
- h['leader'] = self.leader
185
-
186
- fields = []
187
-
188
- self.getVariableFields.each do |f|
189
- if f.controlField?
190
- fields << [f.tag, f.value]
191
- else
192
- farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
193
- subs = []
194
- f.each do |subfield|
195
- subs << [subfield.code, subfield.value]
196
- end
197
- farray.push subs
198
- fields << farray
199
- end
200
- end
201
- h['fields'] = fields
202
- return h
203
- end
204
-
205
- # Turn it into a marc-in-json hashmap. Note that this won't really work
206
- # like a ruby hash; you need to know what you're getting, since stuff
207
- # like #each won't work.
208
- #
209
- # Better to just use to_marc_in_json if you want a json string
210
-
211
- def to_hash
212
- return Java::org.marc4j.MarcInJSON.record_to_hash(self)
213
- end
214
-
215
-
216
- # Turn it into a marc-in-json JSON string using Jackson
217
- def to_marc_in_json
218
- return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
219
- end
220
-
221
-
222
- end
223
-
224
-
225
-
226
- # Give a marc record in a string, turn it into an object
227
- # @param String str The record as a MARC binary string
228
- # @return MARC4J4R::Record The first record encoded in the string
229
- #
230
- # Note that the normal way of defining this class (self.from_string)
231
- # didn't work; I assume it has something to do with the fact that
232
- # it's actually jrst aliased to the Java class
233
- def Record.from_string str, encoding=nil
234
- s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
235
- # return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
236
- return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
237
- end
238
-
239
-
240
- # Give a marc-xml record in a string, turn it into an object
241
- # @param String str The record as a MARC-XML string
242
- # @return MARC4J4R::Record The first record encoded in the string
243
- def Record.from_xml_string str
244
- return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
245
- end
246
-
247
- def Record.new_from_hash hash
248
- return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
249
- end
250
-
251
- def Record.new_from_marc_in_json jsonstring
252
- return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
253
- end
254
-
255
-
256
-
257
-
258
- end
259
-