marc4j4r 1.4.3-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +3 -0
- data/.gitignore +4 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +40 -0
- data/Gemfile +16 -0
- data/LICENSE.txt +20 -0
- data/README.md +164 -0
- data/Rakefile +34 -0
- data/jars/jackson-all-1.6.0.jar +0 -0
- data/jars/javamarc.jar +0 -0
- data/jars/marc4j-extra-readers-writers.jar +0 -0
- data/lib/marc4j4r.rb +73 -0
- data/lib/marc4j4r/controlfield.rb +34 -0
- data/lib/marc4j4r/datafield.rb +195 -0
- data/lib/marc4j4r/reader.rb +129 -0
- data/lib/marc4j4r/record.rb +257 -0
- data/lib/marc4j4r/version.rb +4 -0
- data/lib/marc4j4r/writer.rb +34 -0
- data/marc4j4r.gemspec +21 -0
- data/spec/alephsequentialreader_spec.rb +111 -0
- data/spec/bad.dat +1 -0
- data/spec/badbatch.dat +1 -0
- data/spec/badbatch.xml +13 -0
- data/spec/batch.dat +1 -0
- data/spec/batch.txt +193 -0
- data/spec/batch.xml +13 -0
- data/spec/chinese_utf8.dat +1 -0
- data/spec/controlfield_spec.rb +42 -0
- data/spec/datafield_spec.rb +115 -0
- data/spec/errors.seq +118 -0
- data/spec/helper.rb +21 -0
- data/spec/one.dat +1 -0
- data/spec/one.txt +17 -0
- data/spec/one.xml +4 -0
- data/spec/reader_spec.rb +112 -0
- data/spec/record_spec.rb +146 -0
- data/spec/test_marc4j4r.rb +12 -0
- data/spec/three.seq +118 -0
- metadata +116 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
import 'org.marc4j.ErrorHandler'
|
2
|
+
require 'jlogger'
|
3
|
+
|
4
|
+
module MarcReader
|
5
|
+
module LoggingNextRecord
|
6
|
+
def nextRecord(hashify=true)
|
7
|
+
begin
|
8
|
+
r = self.next
|
9
|
+
# rescue Java::org.marc4j.MarcException => e
|
10
|
+
rescue org.marc4j.MarcException => e
|
11
|
+
puts "#{e}"
|
12
|
+
raise e
|
13
|
+
end
|
14
|
+
self.logErrors if self.methods.include? 'errors'
|
15
|
+
r.hashify if hashify
|
16
|
+
return r
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module Java::OrgMarc4j::MarcReader
|
22
|
+
include Enumerable
|
23
|
+
include JLogger::Simple
|
24
|
+
|
25
|
+
|
26
|
+
def logErrors
|
27
|
+
return unless self.errors.getErrors
|
28
|
+
self.errors.getErrors.each do |err|
|
29
|
+
case err.severity
|
30
|
+
when ErrorHandler::MAJOR_ERROR
|
31
|
+
log.error err.toString
|
32
|
+
when ErrorHandler::ERROR_TYPO, ErrorHandler::MINOR_ERROR
|
33
|
+
self.log.warn err.toString
|
34
|
+
when ErrorHandler::INFO
|
35
|
+
log.info err.toString
|
36
|
+
when ErrorHandler::FATAL
|
37
|
+
log.error err.toString
|
38
|
+
Process.exit
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def nextRecord(hashify = true)
|
44
|
+
r = self.next
|
45
|
+
r.hashify if hashify
|
46
|
+
return r
|
47
|
+
end
|
48
|
+
|
49
|
+
# Return the next record, after calling #hashify on it
|
50
|
+
def each(hashify=true)
|
51
|
+
while self.hasNext
|
52
|
+
r = self.nextRecord(hashify)
|
53
|
+
yield r
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
module MARC4J4R
|
60
|
+
|
61
|
+
class Reader
|
62
|
+
|
63
|
+
ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8']
|
64
|
+
ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'}
|
65
|
+
|
66
|
+
# @attr_reader [File] handle The handle of the File (or IO) object being read from
|
67
|
+
attr_reader :handle
|
68
|
+
|
69
|
+
# Get a marc reader of the appropriate type
|
70
|
+
# @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
|
71
|
+
# @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
|
72
|
+
# @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding
|
73
|
+
# @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
|
74
|
+
#
|
75
|
+
# @example Get a strict binary MARC reader for the file 'test.mrc'
|
76
|
+
# reader = MARC4J4R::Reader.new('test.mrc')
|
77
|
+
# reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default
|
78
|
+
#
|
79
|
+
# @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8
|
80
|
+
# reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8)
|
81
|
+
#
|
82
|
+
# @example Get a permissive binary MARC reader
|
83
|
+
# reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
|
84
|
+
#
|
85
|
+
# @example Get a reader for an xml file
|
86
|
+
# reader = MARC4J4R::Reader.new('test.xml', :marcxml)
|
87
|
+
#
|
88
|
+
# @example Get a reader based on an existing IO object
|
89
|
+
# require 'open-uri'
|
90
|
+
# infile = open('http://my.machine.com/test.mrc')
|
91
|
+
# reader = MARC4J4R::Reader.new(infile)
|
92
|
+
|
93
|
+
def self.new(input, type = :strictmarc, encoding = nil)
|
94
|
+
if encoding
|
95
|
+
encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding]
|
96
|
+
unless ENCODINGS.include? encoding
|
97
|
+
raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\""
|
98
|
+
end
|
99
|
+
end
|
100
|
+
@handle = IOConvert.byteinstream(input)
|
101
|
+
case type
|
102
|
+
when :strictmarc then
|
103
|
+
Java::org.marc4j.MarcStreamReader.send(:include, Enumerable)
|
104
|
+
return Java::org.marc4j.MarcStreamReader.new(@handle, encoding)
|
105
|
+
when :permissivemarc then
|
106
|
+
encoding ||= 'BESTGUESS'
|
107
|
+
Java::org.marc4j.MarcPermissiveStreamReader.send(:include, Enumerable)
|
108
|
+
Java::org.marc4j.MarcPermissiveStreamReader.send(:include, JLogger::Simple)
|
109
|
+
Java::org.marc4j.MarcPermissiveStreamReader.send(:include, MarcReader::LoggingNextRecord)
|
110
|
+
return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding)
|
111
|
+
when :marcxml then
|
112
|
+
Java::org.marc4j.MarcXmlReader.send(:include, Enumerable)
|
113
|
+
Java::org.marc4j.MarcXmlReader.send(:include, JLogger::Simple)
|
114
|
+
return Java::org.marc4j.MarcXmlReader.new(@handle)
|
115
|
+
when :alephsequential then
|
116
|
+
Java::org.marc4j.MarcAlephSequentialReader.send(:include, Enumerable)
|
117
|
+
Java::org.marc4j.MarcAlephSequentialReader.send(:include, JLogger::Simple)
|
118
|
+
Java::org.marc4j.MarcAlephSequentialReader.send(:include, MarcReader::LoggingNextRecord)
|
119
|
+
return Java::org.marc4j.MarcAlephSequentialReader.new(@handle)
|
120
|
+
when :json then
|
121
|
+
Java::org.marc4j.MarcJsonReader.send(:include, Enumerable)
|
122
|
+
Java::org.marc4j.MarcJsonReader.send(:include, JLogger::Simple)
|
123
|
+
return Java::org.marc4j.MarcJsonReader.new(@handle)
|
124
|
+
else
|
125
|
+
raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,257 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
module MARC4J4R
|
3
|
+
Record = Java::org.marc4j.marc.impl::RecordImpl
|
4
|
+
|
5
|
+
class Record
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
alias_method :<<, :addVariableField
|
9
|
+
alias_method :append, :addVariableField
|
10
|
+
alias_method :fields, :getVariableFields
|
11
|
+
|
12
|
+
# Export as a MARC-Hash, as described at
|
13
|
+
# http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
|
14
|
+
# @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
|
15
|
+
|
16
|
+
# Show equality
|
17
|
+
|
18
|
+
def == other
|
19
|
+
return false unless (self.leader == other.leader)
|
20
|
+
self.zip(other) do |so|
|
21
|
+
unless so[0] == so[1]
|
22
|
+
puts "self <> other\n#{so[0]}\n#{so[1]}"
|
23
|
+
return false;
|
24
|
+
end
|
25
|
+
end
|
26
|
+
other.zip(self) do |so|
|
27
|
+
unless so[0] == so[1]
|
28
|
+
puts "#{so[0]}\n#{so[1]}"
|
29
|
+
return false;
|
30
|
+
end
|
31
|
+
end
|
32
|
+
return true
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Create a local hash by tag number; makes some stuff faster
|
37
|
+
# Called automatically if you use reader.each
|
38
|
+
|
39
|
+
def hashify
|
40
|
+
return if @hashedtags # don't do it more than once
|
41
|
+
@hashedtags = {}
|
42
|
+
self.getVariableFields.each do |f|
|
43
|
+
@hashedtags[f.tag] ||= []
|
44
|
+
@hashedtags[f.tag].push f
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Force a re-hash
|
49
|
+
def rehash
|
50
|
+
@hashedtags = nil
|
51
|
+
hashify
|
52
|
+
end
|
53
|
+
|
54
|
+
# Create a nice string of the record
|
55
|
+
def to_s
|
56
|
+
arr = ['LEADER ' + self.leader]
|
57
|
+
self.each do |f|
|
58
|
+
arr.push f.to_s
|
59
|
+
end
|
60
|
+
return arr.join("\n")
|
61
|
+
end
|
62
|
+
|
63
|
+
# Get the leader as a string (marc4j would otherwise return Leader object)
|
64
|
+
def leader
|
65
|
+
self.get_leader.toString
|
66
|
+
end
|
67
|
+
|
68
|
+
# Set the leader
|
69
|
+
# @throw RuntimeError if leader is illegal
|
70
|
+
def leader= str
|
71
|
+
begin
|
72
|
+
self.set_leader Java::org.marc4j.marc.impl.LeaderImpl.new(str)
|
73
|
+
rescue Java::java.lang.StringIndexOutOfBoundsException => e
|
74
|
+
raise RuntimeError.new("'#{str}' not a legal leader: #{e.message}")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Cycle through the fields in the order they appear in the record
|
79
|
+
def each(&blk)
|
80
|
+
self.getVariableFields.each(&blk)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Get the first field associated with a tag
|
84
|
+
# @param [String] tag The tag
|
85
|
+
# @return [Field] The first matching field, or nil if none. Note that
|
86
|
+
# to mirror ruby-marc, this returns a single field
|
87
|
+
|
88
|
+
def [] tag
|
89
|
+
if defined? @hashedtags
|
90
|
+
if @hashedtags[tag]
|
91
|
+
return @hashedtags[tag][0]
|
92
|
+
else
|
93
|
+
return nil
|
94
|
+
end
|
95
|
+
else
|
96
|
+
return self.getVariableField(tag)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
# Get a (possibly empty) list of fields with the given tag(s)
|
102
|
+
#
|
103
|
+
# @param [String, Array<String>] tags A string (or Array of strings) with the tags you're interested in
|
104
|
+
# @param [Boolean] originalorder Whether or not results should be presented in the original order within the
|
105
|
+
# record or with a two-column sort of (a) Order of the tag in the list of tags sent, (b) order within that tag
|
106
|
+
# in the record
|
107
|
+
# @return [Array<Field>] Either an empty list or a list of one or more matched fields will be returned.
|
108
|
+
#
|
109
|
+
# originalorder == false will use an internal hash and be faster in many cases (see #hashify)
|
110
|
+
#
|
111
|
+
# @example originalorder == false
|
112
|
+
# # Given a record that looks like
|
113
|
+
# # 010 $a 68027371
|
114
|
+
# # 035 $a (RLIN)MIUG0001728-B
|
115
|
+
# # 035 $a (CaOTULAS)159818044
|
116
|
+
# # 035 $a (OCoLC)ocm00001728
|
117
|
+
#
|
118
|
+
# r.find_by_tag(['035', '010']).each {|f| puts f.to_s}
|
119
|
+
# # 035 $a (RLIN)MIUG0001728-B
|
120
|
+
# # 035 $a (CaOTULAS)159818044
|
121
|
+
# # 035 $a (OCoLC)ocm00001728
|
122
|
+
# # 010 $a 68027371
|
123
|
+
#
|
124
|
+
# # The results are ordered first by tag as passed in, then by original order within the tag
|
125
|
+
#
|
126
|
+
# @example Just get all fields for a single tag
|
127
|
+
# ohThirtyFives = r.find_by_tag('035')
|
128
|
+
#
|
129
|
+
# @example Get a bunch of standard identifiers
|
130
|
+
# standardIDs = r.find_by_tag(['022', '020', '010'])
|
131
|
+
#
|
132
|
+
# @example originalorder == true
|
133
|
+
# r.find_by_tag(['035', '010'], true).each {|f| puts f.to_s}
|
134
|
+
# # 010 $a 68027371
|
135
|
+
# # 035 $a (RLIN)MIUG0001728-B
|
136
|
+
# # 035 $a (CaOTULAS)159818044
|
137
|
+
# # 035 $a (OCoLC)ocm00001728
|
138
|
+
|
139
|
+
def find_by_tag(tags, originalorder = false)
|
140
|
+
self.hashify unless @hashedtags and !originalorder
|
141
|
+
if !tags.is_a? Array
|
142
|
+
return @hashedtags[tags] || []
|
143
|
+
end
|
144
|
+
if originalorder
|
145
|
+
return self.find_all {|f| tags.include? f.tag}
|
146
|
+
else
|
147
|
+
# puts "Tags is #{tags}: got #{@hashedtags.values_at(*tags)}"
|
148
|
+
return @hashedtags.values_at(*tags).flatten.compact
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
# Return the record as valid MARC-XML
|
155
|
+
# @param String encoding The encoding to use
|
156
|
+
# @return String A MARC-XML representation of the record, including the XML header
|
157
|
+
|
158
|
+
def to_xml
|
159
|
+
return Java::org.marc4j.MarcXmlWriter.record_to_XML(self)
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
def to_marc encoding='UTF-8'
|
164
|
+
# begin
|
165
|
+
s = Java::java.io.ByteArrayOutputStream.new
|
166
|
+
writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
|
167
|
+
writer.write(self)
|
168
|
+
return s.to_string
|
169
|
+
# writer.close
|
170
|
+
# @marcbinary = s.to_string
|
171
|
+
# return @marcbinary
|
172
|
+
# rescue
|
173
|
+
# # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
|
174
|
+
# "Whoops! Failed: #{$!}"
|
175
|
+
# end
|
176
|
+
end
|
177
|
+
|
178
|
+
def to_marchash
|
179
|
+
h = {}
|
180
|
+
h['type'] = 'marc-hash'
|
181
|
+
h['version'] = [1,0]
|
182
|
+
h['leader'] = self.leader
|
183
|
+
|
184
|
+
fields = []
|
185
|
+
|
186
|
+
self.getVariableFields.each do |f|
|
187
|
+
if f.controlField?
|
188
|
+
fields << [f.tag, f.value]
|
189
|
+
else
|
190
|
+
farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
|
191
|
+
subs = []
|
192
|
+
f.each do |subfield|
|
193
|
+
subs << [subfield.code, subfield.value]
|
194
|
+
end
|
195
|
+
farray.push subs
|
196
|
+
fields << farray
|
197
|
+
end
|
198
|
+
end
|
199
|
+
h['fields'] = fields
|
200
|
+
return h
|
201
|
+
end
|
202
|
+
|
203
|
+
# Turn it into a marc-in-json hashmap. Note that this won't really work
|
204
|
+
# like a ruby hash; you need to know what you're getting, since stuff
|
205
|
+
# like #each won't work.
|
206
|
+
#
|
207
|
+
# Better to just use to_marc_in_json if you want a json string
|
208
|
+
|
209
|
+
def to_hash
|
210
|
+
return Java::org.marc4j.MarcInJSON.record_to_hash(self)
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
# Turn it into a marc-in-json JSON string using Jackson
|
215
|
+
def to_marc_in_json
|
216
|
+
return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
# Give a marc record in a string, turn it into an object
|
225
|
+
# @param String str The record as a MARC binary string
|
226
|
+
# @return MARC4J4R::Record The first record encoded in the string
|
227
|
+
#
|
228
|
+
# Note that the normal way of defining this class (self.from_string)
|
229
|
+
# didn't work; I assume it has something to do with the fact that
|
230
|
+
# it's actually jrst aliased to the Java class
|
231
|
+
def Record.from_string str, encoding=nil
|
232
|
+
s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
|
233
|
+
# return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
|
234
|
+
return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
# Give a marc-xml record in a string, turn it into an object
|
239
|
+
# @param String str The record as a MARC-XML string
|
240
|
+
# @return MARC4J4R::Record The first record encoded in the string
|
241
|
+
def Record.from_xml_string str
|
242
|
+
return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
|
243
|
+
end
|
244
|
+
|
245
|
+
def Record.new_from_hash hash
|
246
|
+
return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
|
247
|
+
end
|
248
|
+
|
249
|
+
def Record.new_from_marc_in_json jsonstring
|
250
|
+
return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
|
251
|
+
end
|
252
|
+
|
253
|
+
|
254
|
+
|
255
|
+
|
256
|
+
end
|
257
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module MARC4J4R
|
2
|
+
# Add some sugar to the MarcWriter interface
|
3
|
+
#
|
4
|
+
# Adjust the interface so that a #new call to any implementations that
|
5
|
+
# implement it can take a java.io.InputStream, ruby IO object, or String
|
6
|
+
# (that will be interpreted as a filename) without complaining.
|
7
|
+
#
|
8
|
+
# The mechanism -- running module_eval on a string-representation of the
|
9
|
+
# new method in each of the hard-coded implementations -- is ugly
|
10
|
+
# and deeply unsettling.
|
11
|
+
#
|
12
|
+
# @author Bill Dueber
|
13
|
+
#
|
14
|
+
|
15
|
+
class Writer
|
16
|
+
|
17
|
+
# A simple factory to return the correct type of writer
|
18
|
+
def self.new output, type = :strictmarc
|
19
|
+
@handle = IOConvert.byteoutstream(output)
|
20
|
+
if type == :strictmarc
|
21
|
+
return Java::org.marc4j.MarcStreamWriter.new(@handle)
|
22
|
+
elsif type == :marcxml
|
23
|
+
writer = Java::org.marc4j.MarcXmlWriter.new(@handle)
|
24
|
+
writer.setUnicodeNormalization(true)
|
25
|
+
return writer
|
26
|
+
elsif type == :json
|
27
|
+
writer = Java::org.marc4j.MarcJsonWriter.new(@handle)
|
28
|
+
return writer
|
29
|
+
else
|
30
|
+
raise ArgumentError.new("#{type} must be :strictmarc, :marcxml, or :json")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/marc4j4r.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path('../lib/marc4j4r/version', __FILE__)
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.name = "marc4j4r"
|
7
|
+
gem.platform = 'java'
|
8
|
+
gem.version = MARC4J4R::VERSION
|
9
|
+
gem.summary = %q{A minimal jruby wrapper around marc4j (http://marc4j.tigris.com)}
|
10
|
+
gem.description = %q{Syntactic sugar and some extra methods to deal with MARC data using a fork of the excellent java library marc4j}
|
11
|
+
gem.license = "MIT"
|
12
|
+
gem.authors = ["Bill Dueber"]
|
13
|
+
gem.email = "bill@dueber.com"
|
14
|
+
gem.homepage = "https://github.com/billdueber/marc4j4r#readme"
|
15
|
+
|
16
|
+
gem.files = `git ls-files`.split($/)
|
17
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
18
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
|
+
gem.require_paths = ['lib']
|
20
|
+
|
21
|
+
end
|