marc4j4r 1.4.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,129 @@
1
+ import 'org.marc4j.ErrorHandler'
2
+ require 'jlogger'
3
+
4
+ module MarcReader
5
+ module LoggingNextRecord
6
+ def nextRecord(hashify=true)
7
+ begin
8
+ r = self.next
9
+ # rescue Java::org.marc4j.MarcException => e
10
+ rescue org.marc4j.MarcException => e
11
+ puts "#{e}"
12
+ raise e
13
+ end
14
+ self.logErrors if self.methods.include? 'errors'
15
+ r.hashify if hashify
16
+ return r
17
+ end
18
+ end
19
+ end
20
+
21
+ module Java::OrgMarc4j::MarcReader
22
+ include Enumerable
23
+ include JLogger::Simple
24
+
25
+
26
+ def logErrors
27
+ return unless self.errors.getErrors
28
+ self.errors.getErrors.each do |err|
29
+ case err.severity
30
+ when ErrorHandler::MAJOR_ERROR
31
+ log.error err.toString
32
+ when ErrorHandler::ERROR_TYPO, ErrorHandler::MINOR_ERROR
33
+ self.log.warn err.toString
34
+ when ErrorHandler::INFO
35
+ log.info err.toString
36
+ when ErrorHandler::FATAL
37
+ log.error err.toString
38
+ Process.exit
39
+ end
40
+ end
41
+ end
42
+
43
+ def nextRecord(hashify = true)
44
+ r = self.next
45
+ r.hashify if hashify
46
+ return r
47
+ end
48
+
49
+ # Return the next record, after calling #hashify on it
50
+ def each(hashify=true)
51
+ while self.hasNext
52
+ r = self.nextRecord(hashify)
53
+ yield r
54
+ end
55
+ end
56
+ end
57
+
58
+
59
+ module MARC4J4R
60
+
61
+ class Reader
62
+
63
+ ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8']
64
+ ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'}
65
+
66
+ # @attr_reader [File] handle The handle of the File (or IO) object being read from
67
+ attr_reader :handle
68
+
69
+ # Get a marc reader of the appropriate type
70
+ # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
71
+ # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
72
+ # @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding
73
+ # @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
74
+ #
75
+ # @example Get a strict binary MARC reader for the file 'test.mrc'
76
+ # reader = MARC4J4R::Reader.new('test.mrc')
77
+ # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default
78
+ #
79
+ # @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8
80
+ # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8)
81
+ #
82
+ # @example Get a permissive binary MARC reader
83
+ # reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
84
+ #
85
+ # @example Get a reader for an xml file
86
+ # reader = MARC4J4R::Reader.new('test.xml', :marcxml)
87
+ #
88
+ # @example Get a reader based on an existing IO object
89
+ # require 'open-uri'
90
+ # infile = open('http://my.machine.com/test.mrc')
91
+ # reader = MARC4J4R::Reader.new(infile)
92
+
93
+ def self.new(input, type = :strictmarc, encoding = nil)
94
+ if encoding
95
+ encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding]
96
+ unless ENCODINGS.include? encoding
97
+ raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\""
98
+ end
99
+ end
100
+ @handle = IOConvert.byteinstream(input)
101
+ case type
102
+ when :strictmarc then
103
+ Java::org.marc4j.MarcStreamReader.send(:include, Enumerable)
104
+ return Java::org.marc4j.MarcStreamReader.new(@handle, encoding)
105
+ when :permissivemarc then
106
+ encoding ||= 'BESTGUESS'
107
+ Java::org.marc4j.MarcPermissiveStreamReader.send(:include, Enumerable)
108
+ Java::org.marc4j.MarcPermissiveStreamReader.send(:include, JLogger::Simple)
109
+ Java::org.marc4j.MarcPermissiveStreamReader.send(:include, MarcReader::LoggingNextRecord)
110
+ return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding)
111
+ when :marcxml then
112
+ Java::org.marc4j.MarcXmlReader.send(:include, Enumerable)
113
+ Java::org.marc4j.MarcXmlReader.send(:include, JLogger::Simple)
114
+ return Java::org.marc4j.MarcXmlReader.new(@handle)
115
+ when :alephsequential then
116
+ Java::org.marc4j.MarcAlephSequentialReader.send(:include, Enumerable)
117
+ Java::org.marc4j.MarcAlephSequentialReader.send(:include, JLogger::Simple)
118
+ Java::org.marc4j.MarcAlephSequentialReader.send(:include, MarcReader::LoggingNextRecord)
119
+ return Java::org.marc4j.MarcAlephSequentialReader.new(@handle)
120
+ when :json then
121
+ Java::org.marc4j.MarcJsonReader.send(:include, Enumerable)
122
+ Java::org.marc4j.MarcJsonReader.send(:include, JLogger::Simple)
123
+ return Java::org.marc4j.MarcJsonReader.new(@handle)
124
+ else
125
+ raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,257 @@
1
+ require 'stringio'
2
+ module MARC4J4R
3
+ Record = Java::org.marc4j.marc.impl::RecordImpl
4
+
5
+ class Record
6
+ include Enumerable
7
+
8
+ alias_method :<<, :addVariableField
9
+ alias_method :append, :addVariableField
10
+ alias_method :fields, :getVariableFields
11
+
12
+ # Export as a MARC-Hash, as described at
13
+ # http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
14
+ # @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
15
+
16
+ # Show equality
17
+
18
+ def == other
19
+ return false unless (self.leader == other.leader)
20
+ self.zip(other) do |so|
21
+ unless so[0] == so[1]
22
+ puts "self <> other\n#{so[0]}\n#{so[1]}"
23
+ return false;
24
+ end
25
+ end
26
+ other.zip(self) do |so|
27
+ unless so[0] == so[1]
28
+ puts "#{so[0]}\n#{so[1]}"
29
+ return false;
30
+ end
31
+ end
32
+ return true
33
+ end
34
+
35
+
36
+ # Create a local hash by tag number; makes some stuff faster
37
+ # Called automatically if you use reader.each
38
+
39
+ def hashify
40
+ return if @hashedtags # don't do it more than once
41
+ @hashedtags = {}
42
+ self.getVariableFields.each do |f|
43
+ @hashedtags[f.tag] ||= []
44
+ @hashedtags[f.tag].push f
45
+ end
46
+ end
47
+
48
+ # Force a re-hash
49
+ def rehash
50
+ @hashedtags = nil
51
+ hashify
52
+ end
53
+
54
+ # Create a nice string of the record
55
+ def to_s
56
+ arr = ['LEADER ' + self.leader]
57
+ self.each do |f|
58
+ arr.push f.to_s
59
+ end
60
+ return arr.join("\n")
61
+ end
62
+
63
+ # Get the leader as a string (marc4j would otherwise return Leader object)
64
+ def leader
65
+ self.get_leader.toString
66
+ end
67
+
68
+ # Set the leader
69
+ # @throw RuntimeError if leader is illegal
70
+ def leader= str
71
+ begin
72
+ self.set_leader Java::org.marc4j.marc.impl.LeaderImpl.new(str)
73
+ rescue Java::java.lang.StringIndexOutOfBoundsException => e
74
+ raise RuntimeError.new("'#{str}' not a legal leader: #{e.message}")
75
+ end
76
+ end
77
+
78
+ # Cycle through the fields in the order they appear in the record
79
+ def each(&blk)
80
+ self.getVariableFields.each(&blk)
81
+ end
82
+
83
+ # Get the first field associated with a tag
84
+ # @param [String] tag The tag
85
+ # @return [Field] The first matching field, or nil if none. Note that
86
+ # to mirror ruby-marc, this returns a single field
87
+
88
+ def [] tag
89
+ if defined? @hashedtags
90
+ if @hashedtags[tag]
91
+ return @hashedtags[tag][0]
92
+ else
93
+ return nil
94
+ end
95
+ else
96
+ return self.getVariableField(tag)
97
+ end
98
+ end
99
+
100
+
101
+ # Get a (possibly empty) list of fields with the given tag(s)
102
+ #
103
+ # @param [String, Array<String>] tags A string (or Array of strings) with the tags you're interested in
104
+ # @param [Boolean] originalorder Whether or not results should be presented in the original order within the
105
+ # record or with a two-column sort of (a) Order of the tag in the list of tags sent, (b) order within that tag
106
+ # in the record
107
+ # @return [Array<Field>] Either an empty list or a list of one or more matched fields will be returned.
108
+ #
109
+ # originalorder == false will use an internal hash and be faster in many cases (see #hashify)
110
+ #
111
+ # @example originalorder == false
112
+ # # Given a record that looks like
113
+ # # 010 $a 68027371
114
+ # # 035 $a (RLIN)MIUG0001728-B
115
+ # # 035 $a (CaOTULAS)159818044
116
+ # # 035 $a (OCoLC)ocm00001728
117
+ #
118
+ # r.find_by_tag(['035', '010']).each {|f| puts f.to_s}
119
+ # # 035 $a (RLIN)MIUG0001728-B
120
+ # # 035 $a (CaOTULAS)159818044
121
+ # # 035 $a (OCoLC)ocm00001728
122
+ # # 010 $a 68027371
123
+ #
124
+ # # The results are ordered first by tag as passed in, then by original order within the tag
125
+ #
126
+ # @example Just get all fields for a single tag
127
+ # ohThirtyFives = r.find_by_tag('035')
128
+ #
129
+ # @example Get a bunch of standard identifiers
130
+ # standardIDs = r.find_by_tag(['022', '020', '010'])
131
+ #
132
+ # @example originalorder == true
133
+ # r.find_by_tag(['035', '010'], true).each {|f| puts f.to_s}
134
+ # # 010 $a 68027371
135
+ # # 035 $a (RLIN)MIUG0001728-B
136
+ # # 035 $a (CaOTULAS)159818044
137
+ # # 035 $a (OCoLC)ocm00001728
138
+
139
+ def find_by_tag(tags, originalorder = false)
140
+ self.hashify unless @hashedtags and !originalorder
141
+ if !tags.is_a? Array
142
+ return @hashedtags[tags] || []
143
+ end
144
+ if originalorder
145
+ return self.find_all {|f| tags.include? f.tag}
146
+ else
147
+ # puts "Tags is #{tags}: got #{@hashedtags.values_at(*tags)}"
148
+ return @hashedtags.values_at(*tags).flatten.compact
149
+ end
150
+ end
151
+
152
+
153
+
154
+ # Return the record as valid MARC-XML
155
+ # @param String encoding The encoding to use
156
+ # @return String A MARC-XML representation of the record, including the XML header
157
+
158
+ def to_xml
159
+ return Java::org.marc4j.MarcXmlWriter.record_to_XML(self)
160
+ end
161
+
162
+
163
+ def to_marc encoding='UTF-8'
164
+ # begin
165
+ s = Java::java.io.ByteArrayOutputStream.new
166
+ writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
167
+ writer.write(self)
168
+ return s.to_string
169
+ # writer.close
170
+ # @marcbinary = s.to_string
171
+ # return @marcbinary
172
+ # rescue
173
+ # # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
174
+ # "Whoops! Failed: #{$!}"
175
+ # end
176
+ end
177
+
178
+ def to_marchash
179
+ h = {}
180
+ h['type'] = 'marc-hash'
181
+ h['version'] = [1,0]
182
+ h['leader'] = self.leader
183
+
184
+ fields = []
185
+
186
+ self.getVariableFields.each do |f|
187
+ if f.controlField?
188
+ fields << [f.tag, f.value]
189
+ else
190
+ farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
191
+ subs = []
192
+ f.each do |subfield|
193
+ subs << [subfield.code, subfield.value]
194
+ end
195
+ farray.push subs
196
+ fields << farray
197
+ end
198
+ end
199
+ h['fields'] = fields
200
+ return h
201
+ end
202
+
203
+ # Turn it into a marc-in-json hashmap. Note that this won't really work
204
+ # like a ruby hash; you need to know what you're getting, since stuff
205
+ # like #each won't work.
206
+ #
207
+ # Better to just use to_marc_in_json if you want a json string
208
+
209
+ def to_hash
210
+ return Java::org.marc4j.MarcInJSON.record_to_hash(self)
211
+ end
212
+
213
+
214
+ # Turn it into a marc-in-json JSON string using Jackson
215
+ def to_marc_in_json
216
+ return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
217
+ end
218
+
219
+
220
+ end
221
+
222
+
223
+
224
+ # Give a marc record in a string, turn it into an object
225
+ # @param String str The record as a MARC binary string
226
+ # @return MARC4J4R::Record The first record encoded in the string
227
+ #
228
+ # Note that the normal way of defining this class (self.from_string)
229
+ # didn't work; I assume it has something to do with the fact that
230
+ # it's actually jrst aliased to the Java class
231
+ def Record.from_string str, encoding=nil
232
+ s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
233
+ # return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
234
+ return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
235
+ end
236
+
237
+
238
+ # Give a marc-xml record in a string, turn it into an object
239
+ # @param String str The record as a MARC-XML string
240
+ # @return MARC4J4R::Record The first record encoded in the string
241
+ def Record.from_xml_string str
242
+ return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
243
+ end
244
+
245
+ def Record.new_from_hash hash
246
+ return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
247
+ end
248
+
249
+ def Record.new_from_marc_in_json jsonstring
250
+ return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
251
+ end
252
+
253
+
254
+
255
+
256
+ end
257
+
@@ -0,0 +1,4 @@
1
+ module MARC4J4R
2
+ # marc4j4r version
3
+ VERSION = "1.4.3"
4
+ end
@@ -0,0 +1,34 @@
1
+ module MARC4J4R
2
+ # Add some sugar to the MarcWriter interface
3
+ #
4
+ # Adjust the interface so that a #new call to any implementations that
5
+ # implement it can take a java.io.InputStream, ruby IO object, or String
6
+ # (that will be interpreted as a filename) without complaining.
7
+ #
8
+ # The mechanism -- running module_eval on a string-representation of the
9
+ # new method in each of the hard-coded implementations -- is ugly
10
+ # and deeply unsettling.
11
+ #
12
+ # @author Bill Dueber
13
+ #
14
+
15
+ class Writer
16
+
17
+ # A simple factory to return the correct type of writer
18
+ def self.new output, type = :strictmarc
19
+ @handle = IOConvert.byteoutstream(output)
20
+ if type == :strictmarc
21
+ return Java::org.marc4j.MarcStreamWriter.new(@handle)
22
+ elsif type == :marcxml
23
+ writer = Java::org.marc4j.MarcXmlWriter.new(@handle)
24
+ writer.setUnicodeNormalization(true)
25
+ return writer
26
+ elsif type == :json
27
+ writer = Java::org.marc4j.MarcJsonWriter.new(@handle)
28
+ return writer
29
+ else
30
+ raise ArgumentError.new("#{type} must be :strictmarc, :marcxml, or :json")
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.expand_path('../lib/marc4j4r/version', __FILE__)
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = "marc4j4r"
7
+ gem.platform = 'java'
8
+ gem.version = MARC4J4R::VERSION
9
+ gem.summary = %q{A minimal jruby wrapper around marc4j (http://marc4j.tigris.com)}
10
+ gem.description = %q{Syntactic sugar and some extra methods to deal with MARC data using a fork of the excellent java library marc4j}
11
+ gem.license = "MIT"
12
+ gem.authors = ["Bill Dueber"]
13
+ gem.email = "bill@dueber.com"
14
+ gem.homepage = "https://github.com/billdueber/marc4j4r#readme"
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ['lib']
20
+
21
+ end