marc4j4r 1.4.3-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,129 @@
1
+ import 'org.marc4j.ErrorHandler'
2
+ require 'jlogger'
3
+
4
+ module MarcReader
5
+ module LoggingNextRecord
6
+ def nextRecord(hashify=true)
7
+ begin
8
+ r = self.next
9
+ # rescue Java::org.marc4j.MarcException => e
10
+ rescue org.marc4j.MarcException => e
11
+ puts "#{e}"
12
+ raise e
13
+ end
14
+ self.logErrors if self.methods.include? 'errors'
15
+ r.hashify if hashify
16
+ return r
17
+ end
18
+ end
19
+ end
20
+
21
+ module Java::OrgMarc4j::MarcReader
22
+ include Enumerable
23
+ include JLogger::Simple
24
+
25
+
26
+ def logErrors
27
+ return unless self.errors.getErrors
28
+ self.errors.getErrors.each do |err|
29
+ case err.severity
30
+ when ErrorHandler::MAJOR_ERROR
31
+ log.error err.toString
32
+ when ErrorHandler::ERROR_TYPO, ErrorHandler::MINOR_ERROR
33
+ self.log.warn err.toString
34
+ when ErrorHandler::INFO
35
+ log.info err.toString
36
+ when ErrorHandler::FATAL
37
+ log.error err.toString
38
+ Process.exit
39
+ end
40
+ end
41
+ end
42
+
43
+ def nextRecord(hashify = true)
44
+ r = self.next
45
+ r.hashify if hashify
46
+ return r
47
+ end
48
+
49
+ # Return the next record, after calling #hashify on it
50
+ def each(hashify=true)
51
+ while self.hasNext
52
+ r = self.nextRecord(hashify)
53
+ yield r
54
+ end
55
+ end
56
+ end
57
+
58
+
59
+ module MARC4J4R
60
+
61
+ class Reader
62
+
63
+ ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8']
64
+ ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'}
65
+
66
+ # @attr_reader [File] handle The handle of the File (or IO) object being read from
67
+ attr_reader :handle
68
+
69
+ # Get a marc reader of the appropriate type
70
+ # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
71
+ # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
72
+ # @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding
73
+ # @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
74
+ #
75
+ # @example Get a strict binary MARC reader for the file 'test.mrc'
76
+ # reader = MARC4J4R::Reader.new('test.mrc')
77
+ # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default
78
+ #
79
+ # @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8
80
+ # reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8)
81
+ #
82
+ # @example Get a permissive binary MARC reader
83
+ # reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
84
+ #
85
+ # @example Get a reader for an xml file
86
+ # reader = MARC4J4R::Reader.new('test.xml', :marcxml)
87
+ #
88
+ # @example Get a reader based on an existing IO object
89
+ # require 'open-uri'
90
+ # infile = open('http://my.machine.com/test.mrc')
91
+ # reader = MARC4J4R::Reader.new(infile)
92
+
93
+ def self.new(input, type = :strictmarc, encoding = nil)
94
+ if encoding
95
+ encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding]
96
+ unless ENCODINGS.include? encoding
97
+ raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\""
98
+ end
99
+ end
100
+ @handle = IOConvert.byteinstream(input)
101
+ case type
102
+ when :strictmarc then
103
+ Java::org.marc4j.MarcStreamReader.send(:include, Enumerable)
104
+ return Java::org.marc4j.MarcStreamReader.new(@handle, encoding)
105
+ when :permissivemarc then
106
+ encoding ||= 'BESTGUESS'
107
+ Java::org.marc4j.MarcPermissiveStreamReader.send(:include, Enumerable)
108
+ Java::org.marc4j.MarcPermissiveStreamReader.send(:include, JLogger::Simple)
109
+ Java::org.marc4j.MarcPermissiveStreamReader.send(:include, MarcReader::LoggingNextRecord)
110
+ return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding)
111
+ when :marcxml then
112
+ Java::org.marc4j.MarcXmlReader.send(:include, Enumerable)
113
+ Java::org.marc4j.MarcXmlReader.send(:include, JLogger::Simple)
114
+ return Java::org.marc4j.MarcXmlReader.new(@handle)
115
+ when :alephsequential then
116
+ Java::org.marc4j.MarcAlephSequentialReader.send(:include, Enumerable)
117
+ Java::org.marc4j.MarcAlephSequentialReader.send(:include, JLogger::Simple)
118
+ Java::org.marc4j.MarcAlephSequentialReader.send(:include, MarcReader::LoggingNextRecord)
119
+ return Java::org.marc4j.MarcAlephSequentialReader.new(@handle)
120
+ when :json then
121
+ Java::org.marc4j.MarcJsonReader.send(:include, Enumerable)
122
+ Java::org.marc4j.MarcJsonReader.send(:include, JLogger::Simple)
123
+ return Java::org.marc4j.MarcJsonReader.new(@handle)
124
+ else
125
+ raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,257 @@
1
+ require 'stringio'
2
+ module MARC4J4R
3
+ Record = Java::org.marc4j.marc.impl::RecordImpl
4
+
5
+ class Record
6
+ include Enumerable
7
+
8
+ alias_method :<<, :addVariableField
9
+ alias_method :append, :addVariableField
10
+ alias_method :fields, :getVariableFields
11
+
12
+ # Export as a MARC-Hash, as described at
13
+ # http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
14
+ # @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
15
+
16
+ # Show equality
17
+
18
+ def == other
19
+ return false unless (self.leader == other.leader)
20
+ self.zip(other) do |so|
21
+ unless so[0] == so[1]
22
+ puts "self <> other\n#{so[0]}\n#{so[1]}"
23
+ return false;
24
+ end
25
+ end
26
+ other.zip(self) do |so|
27
+ unless so[0] == so[1]
28
+ puts "#{so[0]}\n#{so[1]}"
29
+ return false;
30
+ end
31
+ end
32
+ return true
33
+ end
34
+
35
+
36
+ # Create a local hash by tag number; makes some stuff faster
37
+ # Called automatically if you use reader.each
38
+
39
+ def hashify
40
+ return if @hashedtags # don't do it more than once
41
+ @hashedtags = {}
42
+ self.getVariableFields.each do |f|
43
+ @hashedtags[f.tag] ||= []
44
+ @hashedtags[f.tag].push f
45
+ end
46
+ end
47
+
48
+ # Force a re-hash
49
+ def rehash
50
+ @hashedtags = nil
51
+ hashify
52
+ end
53
+
54
+ # Create a nice string of the record
55
+ def to_s
56
+ arr = ['LEADER ' + self.leader]
57
+ self.each do |f|
58
+ arr.push f.to_s
59
+ end
60
+ return arr.join("\n")
61
+ end
62
+
63
+ # Get the leader as a string (marc4j would otherwise return Leader object)
64
+ def leader
65
+ self.get_leader.toString
66
+ end
67
+
68
+ # Set the leader
69
+ # @throw RuntimeError if leader is illegal
70
+ def leader= str
71
+ begin
72
+ self.set_leader Java::org.marc4j.marc.impl.LeaderImpl.new(str)
73
+ rescue Java::java.lang.StringIndexOutOfBoundsException => e
74
+ raise RuntimeError.new("'#{str}' not a legal leader: #{e.message}")
75
+ end
76
+ end
77
+
78
+ # Cycle through the fields in the order they appear in the record
79
+ def each(&blk)
80
+ self.getVariableFields.each(&blk)
81
+ end
82
+
83
+ # Get the first field associated with a tag
84
+ # @param [String] tag The tag
85
+ # @return [Field] The first matching field, or nil if none. Note that
86
+ # to mirror ruby-marc, this returns a single field
87
+
88
+ def [] tag
89
+ if defined? @hashedtags
90
+ if @hashedtags[tag]
91
+ return @hashedtags[tag][0]
92
+ else
93
+ return nil
94
+ end
95
+ else
96
+ return self.getVariableField(tag)
97
+ end
98
+ end
99
+
100
+
101
+ # Get a (possibly empty) list of fields with the given tag(s)
102
+ #
103
+ # @param [String, Array<String>] tags A string (or Array of strings) with the tags you're interested in
104
+ # @param [Boolean] originalorder Whether or not results should be presented in the original order within the
105
+ # record or with a two-column sort of (a) Order of the tag in the list of tags sent, (b) order within that tag
106
+ # in the record
107
+ # @return [Array<Field>] Either an empty list or a list of one or more matched fields will be returned.
108
+ #
109
+ # originalorder == false will use an internal hash and be faster in many cases (see #hashify)
110
+ #
111
+ # @example originalorder == false
112
+ # # Given a record that looks like
113
+ # # 010 $a 68027371
114
+ # # 035 $a (RLIN)MIUG0001728-B
115
+ # # 035 $a (CaOTULAS)159818044
116
+ # # 035 $a (OCoLC)ocm00001728
117
+ #
118
+ # r.find_by_tag(['035', '010']).each {|f| puts f.to_s}
119
+ # # 035 $a (RLIN)MIUG0001728-B
120
+ # # 035 $a (CaOTULAS)159818044
121
+ # # 035 $a (OCoLC)ocm00001728
122
+ # # 010 $a 68027371
123
+ #
124
+ # # The results are ordered first by tag as passed in, then by original order within the tag
125
+ #
126
+ # @example Just get all fields for a single tag
127
+ # ohThirtyFives = r.find_by_tag('035')
128
+ #
129
+ # @example Get a bunch of standard identifiers
130
+ # standardIDs = r.find_by_tag(['022', '020', '010'])
131
+ #
132
+ # @example originalorder == true
133
+ # r.find_by_tag(['035', '010'], true).each {|f| puts f.to_s}
134
+ # # 010 $a 68027371
135
+ # # 035 $a (RLIN)MIUG0001728-B
136
+ # # 035 $a (CaOTULAS)159818044
137
+ # # 035 $a (OCoLC)ocm00001728
138
+
139
+ def find_by_tag(tags, originalorder = false)
140
+ self.hashify unless @hashedtags and !originalorder
141
+ if !tags.is_a? Array
142
+ return @hashedtags[tags] || []
143
+ end
144
+ if originalorder
145
+ return self.find_all {|f| tags.include? f.tag}
146
+ else
147
+ # puts "Tags is #{tags}: got #{@hashedtags.values_at(*tags)}"
148
+ return @hashedtags.values_at(*tags).flatten.compact
149
+ end
150
+ end
151
+
152
+
153
+
154
+ # Return the record as valid MARC-XML
155
+ # @param String encoding The encoding to use
156
+ # @return String A MARC-XML representation of the record, including the XML header
157
+
158
+ def to_xml
159
+ return Java::org.marc4j.MarcXmlWriter.record_to_XML(self)
160
+ end
161
+
162
+
163
+ def to_marc encoding='UTF-8'
164
+ # begin
165
+ s = Java::java.io.ByteArrayOutputStream.new
166
+ writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
167
+ writer.write(self)
168
+ return s.to_string
169
+ # writer.close
170
+ # @marcbinary = s.to_string
171
+ # return @marcbinary
172
+ # rescue
173
+ # # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
174
+ # "Whoops! Failed: #{$!}"
175
+ # end
176
+ end
177
+
178
+ def to_marchash
179
+ h = {}
180
+ h['type'] = 'marc-hash'
181
+ h['version'] = [1,0]
182
+ h['leader'] = self.leader
183
+
184
+ fields = []
185
+
186
+ self.getVariableFields.each do |f|
187
+ if f.controlField?
188
+ fields << [f.tag, f.value]
189
+ else
190
+ farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
191
+ subs = []
192
+ f.each do |subfield|
193
+ subs << [subfield.code, subfield.value]
194
+ end
195
+ farray.push subs
196
+ fields << farray
197
+ end
198
+ end
199
+ h['fields'] = fields
200
+ return h
201
+ end
202
+
203
+ # Turn it into a marc-in-json hashmap. Note that this won't really work
204
+ # like a ruby hash; you need to know what you're getting, since stuff
205
+ # like #each won't work.
206
+ #
207
+ # Better to just use to_marc_in_json if you want a json string
208
+
209
+ def to_hash
210
+ return Java::org.marc4j.MarcInJSON.record_to_hash(self)
211
+ end
212
+
213
+
214
+ # Turn it into a marc-in-json JSON string using Jackson
215
+ def to_marc_in_json
216
+ return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
217
+ end
218
+
219
+
220
+ end
221
+
222
+
223
+
224
+ # Give a marc record in a string, turn it into an object
225
+ # @param String str The record as a MARC binary string
226
+ # @return MARC4J4R::Record The first record encoded in the string
227
+ #
228
+ # Note that the normal way of defining this class (self.from_string)
229
+ # didn't work; I assume it has something to do with the fact that
230
+ # it's actually jrst aliased to the Java class
231
+ def Record.from_string str, encoding=nil
232
+ s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
233
+ # return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
234
+ return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
235
+ end
236
+
237
+
238
+ # Give a marc-xml record in a string, turn it into an object
239
+ # @param String str The record as a MARC-XML string
240
+ # @return MARC4J4R::Record The first record encoded in the string
241
+ def Record.from_xml_string str
242
+ return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
243
+ end
244
+
245
+ def Record.new_from_hash hash
246
+ return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
247
+ end
248
+
249
+ def Record.new_from_marc_in_json jsonstring
250
+ return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
251
+ end
252
+
253
+
254
+
255
+
256
+ end
257
+
@@ -0,0 +1,4 @@
1
+ module MARC4J4R
2
+ # marc4j4r version
3
+ VERSION = "1.4.3"
4
+ end
@@ -0,0 +1,34 @@
1
+ module MARC4J4R
2
+ # Add some sugar to the MarcWriter interface
3
+ #
4
+ # Adjust the interface so that a #new call to any implementations that
5
+ # implement it can take a java.io.InputStream, ruby IO object, or String
6
+ # (that will be interpreted as a filename) without complaining.
7
+ #
8
+ # The mechanism -- running module_eval on a string-representation of the
9
+ # new method in each of the hard-coded implementations -- is ugly
10
+ # and deeply unsettling.
11
+ #
12
+ # @author Bill Dueber
13
+ #
14
+
15
+ class Writer
16
+
17
+ # A simple factory to return the correct type of writer
18
+ def self.new output, type = :strictmarc
19
+ @handle = IOConvert.byteoutstream(output)
20
+ if type == :strictmarc
21
+ return Java::org.marc4j.MarcStreamWriter.new(@handle)
22
+ elsif type == :marcxml
23
+ writer = Java::org.marc4j.MarcXmlWriter.new(@handle)
24
+ writer.setUnicodeNormalization(true)
25
+ return writer
26
+ elsif type == :json
27
+ writer = Java::org.marc4j.MarcJsonWriter.new(@handle)
28
+ return writer
29
+ else
30
+ raise ArgumentError.new("#{type} must be :strictmarc, :marcxml, or :json")
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.expand_path('../lib/marc4j4r/version', __FILE__)
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = "marc4j4r"
7
+ gem.platform = 'java'
8
+ gem.version = MARC4J4R::VERSION
9
+ gem.summary = %q{A minimal jruby wrapper around marc4j (http://marc4j.tigris.com)}
10
+ gem.description = %q{Syntactic sugar and some extra methods to deal with MARC data using a fork of the excellent java library marc4j}
11
+ gem.license = "MIT"
12
+ gem.authors = ["Bill Dueber"]
13
+ gem.email = "bill@dueber.com"
14
+ gem.homepage = "https://github.com/billdueber/marc4j4r#readme"
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ['lib']
20
+
21
+ end