marc4j4r 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +5 -0
- data/Rakefile +4 -1
- data/VERSION +1 -1
- data/benchj.rb +73 -0
- data/jars/jackson-all-1.6.0.jar +0 -0
- data/jars/marc4j-extra-readers-writers.jar +0 -0
- data/lib/marc4j4r/record.rb +76 -35
- data/lib/marc4j4r.rb +13 -3
- data/spec/record_spec.rb +26 -5
- metadata +7 -5
- data/jars/marc4j_serializations.jar +0 -0
data/CHANGES
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
1.2.0
|
|
2
|
+
Fixed encoding problem with to_marc and from_string roundtrip
|
|
3
|
+
Added to_hash/to_marc_in_json and from_hash/from_marc_in_json (see
|
|
4
|
+
http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/)
|
|
5
|
+
|
|
1
6
|
1.1
|
|
2
7
|
Added native java method to turn a record into XML (20% speedup or so)
|
|
3
8
|
1.0
|
data/Rakefile
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
require 'rubygems'
|
|
2
2
|
require 'rake'
|
|
3
|
-
require 'ftools'
|
|
3
|
+
# require 'ftools'
|
|
4
4
|
|
|
5
5
|
begin
|
|
6
6
|
require 'jeweler'
|
|
@@ -14,6 +14,8 @@ begin
|
|
|
14
14
|
gem.add_development_dependency "bacon", ">= 0"
|
|
15
15
|
gem.add_development_dependency "yard", ">= 0"
|
|
16
16
|
|
|
17
|
+
gem.files.include 'jars/*.jar'
|
|
18
|
+
|
|
17
19
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
18
20
|
end
|
|
19
21
|
Jeweler::GemcutterTasks.new
|
|
@@ -27,6 +29,7 @@ LOCALJAR = 'jars/javamarc.jar'
|
|
|
27
29
|
JAVAJAR = '../../javamarc.jar'
|
|
28
30
|
|
|
29
31
|
|
|
32
|
+
|
|
30
33
|
file LOCALJAR => JAVAJAR do |t|
|
|
31
34
|
File.copy(JAVAJAR, LOCALJAR)
|
|
32
35
|
end
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.
|
|
1
|
+
1.2.0
|
data/benchj.rb
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
$:.unshift 'lib'
|
|
2
|
+
require 'marc4j4r'
|
|
3
|
+
|
|
4
|
+
require 'benchmark'
|
|
5
|
+
|
|
6
|
+
# require 'yajl'
|
|
7
|
+
# require 'json/pure'
|
|
8
|
+
# require 'msgpack'
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
jsonsize = 0.0
|
|
12
|
+
marcsize = 0.0
|
|
13
|
+
mpsize = 0.0
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Use Benchmark.measure
|
|
17
|
+
# sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON Pure")
|
|
18
|
+
smtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
|
|
19
|
+
smptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
|
|
20
|
+
sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON")
|
|
21
|
+
|
|
22
|
+
djptime = Benchmark::Tms.new(0,0,0,0,0, "JSON")
|
|
23
|
+
dmtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
|
|
24
|
+
dmptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
i = 0
|
|
29
|
+
iterations = 1
|
|
30
|
+
|
|
31
|
+
iterations.times do
|
|
32
|
+
reader = MARC4J4R::Reader.new('topics.xml', :marcxml)
|
|
33
|
+
|
|
34
|
+
reader.each_with_index do |r, i|
|
|
35
|
+
marc = nil
|
|
36
|
+
json = nil
|
|
37
|
+
mp = nil
|
|
38
|
+
copy = nil
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
smtime += Benchmark.measure {marc = r.to_marc}
|
|
42
|
+
dmtime += Benchmark.measure {copy = MARC4J4R::Record.from_string(marc)}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
sjptime += Benchmark.measure {json = r.to_marc_in_json}
|
|
46
|
+
djptime += Benchmark.measure {copy = MARC4J4R::Record.new_from_marc_in_json(json)}
|
|
47
|
+
|
|
48
|
+
# break if i > 1000
|
|
49
|
+
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
puts "Total of #{i} records run #{iterations} times"
|
|
54
|
+
|
|
55
|
+
puts "\nSERIALIZING"
|
|
56
|
+
|
|
57
|
+
base = smtime.total
|
|
58
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', smtime.total, smtime.total / base * 100]
|
|
59
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', sjptime.total, sjptime.total / base * 100]
|
|
60
|
+
# puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', smptime.total, smptime.total / base * 100]
|
|
61
|
+
|
|
62
|
+
base = dmtime.total
|
|
63
|
+
puts "\nDESERIALIZING"
|
|
64
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total, dmtime.total / base * 100]
|
|
65
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', djptime.total, djptime.total / base * 100]
|
|
66
|
+
# puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total, dmptime.total / base * 100]
|
|
67
|
+
|
|
68
|
+
base = dmtime.total + smtime.total
|
|
69
|
+
puts "\nSERIALIZE + DESERIALIZE"
|
|
70
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total + smtime.total, (dmtime.total + smtime.total) / base * 100]
|
|
71
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', djptime.total + sjptime.total, (djptime.total + sjptime.total) / base * 100]
|
|
72
|
+
# puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total + smptime.total, (dmptime.total + smptime.total) / base * 100]
|
|
73
|
+
|
|
Binary file
|
|
Binary file
|
data/lib/marc4j4r/record.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
require 'stringio'
|
|
1
2
|
module MARC4J4R
|
|
2
3
|
Record = Java::org.marc4j.marc.impl::RecordImpl
|
|
3
4
|
|
|
@@ -17,10 +18,16 @@ module MARC4J4R
|
|
|
17
18
|
def == other
|
|
18
19
|
return false unless (self.leader == other.leader)
|
|
19
20
|
self.zip(other) do |so|
|
|
20
|
-
|
|
21
|
+
unless so[0] == so[1]
|
|
22
|
+
puts "self <> other\n#{so[0]}\n#{so[1]}"
|
|
23
|
+
return false;
|
|
24
|
+
end
|
|
21
25
|
end
|
|
22
26
|
other.zip(self) do |so|
|
|
23
|
-
|
|
27
|
+
unless so[0] == so[1]
|
|
28
|
+
puts "#{so[0]}\n#{so[1]}"
|
|
29
|
+
return false;
|
|
30
|
+
end
|
|
24
31
|
end
|
|
25
32
|
return true
|
|
26
33
|
end
|
|
@@ -141,6 +148,7 @@ module MARC4J4R
|
|
|
141
148
|
|
|
142
149
|
|
|
143
150
|
# Return the record as valid MARC-XML
|
|
151
|
+
# @param String encoding The encoding to use
|
|
144
152
|
# @return String A MARC-XML representation of the record, including the XML header
|
|
145
153
|
|
|
146
154
|
def to_xml
|
|
@@ -148,45 +156,66 @@ module MARC4J4R
|
|
|
148
156
|
end
|
|
149
157
|
|
|
150
158
|
|
|
151
|
-
def to_marc
|
|
152
|
-
begin
|
|
159
|
+
def to_marc encoding='UTF-8'
|
|
160
|
+
# begin
|
|
153
161
|
s = Java::java.io.ByteArrayOutputStream.new
|
|
154
|
-
writer = org.marc4j.MarcPermissiveStreamWriter.new(s)
|
|
162
|
+
writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
|
|
155
163
|
writer.write(self)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
#
|
|
160
|
-
|
|
161
|
-
|
|
164
|
+
return s.to_string
|
|
165
|
+
# writer.close
|
|
166
|
+
# @marcbinary = s.to_string
|
|
167
|
+
# return @marcbinary
|
|
168
|
+
# rescue
|
|
169
|
+
# # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
|
|
170
|
+
# "Whoops! Failed: #{$!}"
|
|
171
|
+
# end
|
|
162
172
|
end
|
|
163
|
-
end
|
|
164
|
-
|
|
165
173
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
174
|
+
def to_marchash
|
|
175
|
+
h = {}
|
|
176
|
+
h['type'] = 'marc-hash'
|
|
177
|
+
h['version'] = [1,0]
|
|
178
|
+
h['leader'] = self.leader
|
|
179
|
+
|
|
180
|
+
fields = []
|
|
181
|
+
|
|
182
|
+
self.getVariableFields.each do |f|
|
|
183
|
+
if f.controlField?
|
|
184
|
+
fields << [f.tag, f.value]
|
|
185
|
+
else
|
|
186
|
+
farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
|
|
187
|
+
subs = []
|
|
188
|
+
f.each do |subfield|
|
|
189
|
+
subs << [subfield.code, subfield.value]
|
|
190
|
+
end
|
|
191
|
+
farray.push subs
|
|
192
|
+
fields << farray
|
|
182
193
|
end
|
|
183
|
-
farray.push subs
|
|
184
|
-
fields << farray
|
|
185
194
|
end
|
|
195
|
+
h['fields'] = fields
|
|
196
|
+
return h
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Turn it into a marc-in-json hashmap. Note that this won't really work
|
|
200
|
+
# like a ruby hash; you need to know what you're getting, since stuff
|
|
201
|
+
# like #each won't work.
|
|
202
|
+
#
|
|
203
|
+
# Better to just use to_marc_in_json if you want a json string
|
|
204
|
+
|
|
205
|
+
def to_hash
|
|
206
|
+
return Java::org.marc4j.MarcInJSON.record_to_hash(self)
|
|
186
207
|
end
|
|
187
|
-
|
|
188
|
-
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# Turn it into a marc-in-json JSON string using Jackson
|
|
211
|
+
def to_marc_in_json
|
|
212
|
+
return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
|
|
189
216
|
end
|
|
217
|
+
|
|
218
|
+
|
|
190
219
|
|
|
191
220
|
# Give a marc record in a string, turn it into an object
|
|
192
221
|
# @param String str The record as a MARC binary string
|
|
@@ -195,8 +224,10 @@ module MARC4J4R
|
|
|
195
224
|
# Note that the normal way of defining this class (self.from_string)
|
|
196
225
|
# didn't work; I assume it has something to do with the fact that
|
|
197
226
|
# it's actually jrst aliased to the Java class
|
|
198
|
-
def Record.from_string str
|
|
199
|
-
|
|
227
|
+
def Record.from_string str, encoding=nil
|
|
228
|
+
s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
|
|
229
|
+
# return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
|
|
230
|
+
return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
|
|
200
231
|
end
|
|
201
232
|
|
|
202
233
|
|
|
@@ -207,6 +238,16 @@ module MARC4J4R
|
|
|
207
238
|
return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
|
|
208
239
|
end
|
|
209
240
|
|
|
241
|
+
def Record.new_from_hash hash
|
|
242
|
+
return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def Record.new_from_marc_in_json jsonstring
|
|
246
|
+
return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
|
|
210
251
|
|
|
211
252
|
end
|
|
212
253
|
|
data/lib/marc4j4r.rb
CHANGED
|
@@ -5,20 +5,30 @@ end
|
|
|
5
5
|
require 'logger'
|
|
6
6
|
$LOG ||= Logger.new(STDERR)
|
|
7
7
|
|
|
8
|
+
jardir = File.join(File.dirname(__FILE__), '..', 'jars')
|
|
9
|
+
|
|
10
|
+
# For each jar, check for a representative class in each
|
|
11
|
+
# and include the jar if it's not defined
|
|
12
|
+
|
|
8
13
|
begin
|
|
9
14
|
include_class Java::org.marc4j.marc.impl.RecordImpl
|
|
10
15
|
rescue NameError => e
|
|
11
|
-
jardir = File.join(File.dirname(__FILE__), '..', 'jars')
|
|
12
16
|
require "#{jardir}/javamarc.jar"
|
|
13
17
|
end
|
|
14
18
|
|
|
15
19
|
begin
|
|
16
20
|
include_class Java::org.marc4j.MarcAlephSequentialReader
|
|
17
21
|
rescue
|
|
18
|
-
jardir
|
|
19
|
-
require "#{jardir}/marc4j_serializations.jar"
|
|
22
|
+
require "#{jardir}/marc4j-extra-readers-writers.jar"
|
|
20
23
|
end
|
|
21
24
|
|
|
25
|
+
begin
|
|
26
|
+
include_class Java::org.codehaus.jackson.map.ObjectMapper
|
|
27
|
+
rescue
|
|
28
|
+
require "#{jardir}/jackson-all-1.6.0.jar"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
22
32
|
# Define a method that will take a string (filename), IO object, or StringIO object,
|
|
23
33
|
# and return an inputstream/outputstream
|
|
24
34
|
|
data/spec/record_spec.rb
CHANGED
|
@@ -48,7 +48,7 @@ describe "MARC4J4R_basic_retrieval_stuff" do
|
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
it "can find all the fields" do
|
|
51
|
-
fields = @one.collect
|
|
51
|
+
fields = @one.collect {|a| a}
|
|
52
52
|
fields.size.should.equal 16
|
|
53
53
|
end
|
|
54
54
|
|
|
@@ -70,6 +70,8 @@ describe "MARC4J4R::Record #find_by_tag" do
|
|
|
70
70
|
|
|
71
71
|
@cf = MARC4J4R::ControlField.new('005', '20071104155141.9')
|
|
72
72
|
|
|
73
|
+
@batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
|
|
74
|
+
@utf8 = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat").first
|
|
73
75
|
end
|
|
74
76
|
|
|
75
77
|
it "gets an empty array for non-existant tag(s)" do
|
|
@@ -112,14 +114,33 @@ describe "MARC4J4R::Record #find_by_tag" do
|
|
|
112
114
|
first = @one['700']
|
|
113
115
|
@one.find_by_tag('700')[0].should.equal first
|
|
114
116
|
end
|
|
117
|
+
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
describe "Format checks" do
|
|
121
|
+
before do
|
|
122
|
+
@one = MARC4J4R::Reader.new("#{DIR}/one.dat").first
|
|
123
|
+
@batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect {|a| a}
|
|
124
|
+
@utf8 = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat").first
|
|
125
|
+
end
|
|
115
126
|
|
|
116
|
-
it "round trips binary
|
|
127
|
+
it "round trips binary" do
|
|
117
128
|
MARC4J4R::Record.from_string(@one.to_marc).should.equal @one
|
|
118
|
-
|
|
129
|
+
@batch.each do |r|
|
|
130
|
+
MARC4J4R::Record.from_string(r.to_marc).should.equal r
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it "round trips utf8 record as binary" do
|
|
135
|
+
MARC4J4R::Record.from_string(@utf8.to_marc, :utf8).should.equal @utf8
|
|
119
136
|
end
|
|
120
137
|
|
|
121
|
-
it "round trips
|
|
122
|
-
MARC4J4R::Record.from_xml_string(@one.
|
|
138
|
+
it "round trips XML" do
|
|
139
|
+
MARC4J4R::Record.from_xml_string(@one.to_xml).should.equal @one
|
|
123
140
|
end
|
|
124
141
|
|
|
142
|
+
it "round trips marc-in-json" do
|
|
143
|
+
copy = @one.to_marc_in_json
|
|
144
|
+
MARC4J4R::Record.new_from_marc_in_json(copy).should.equal @one
|
|
145
|
+
end
|
|
125
146
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: marc4j4r
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 31
|
|
5
5
|
prerelease: false
|
|
6
6
|
segments:
|
|
7
7
|
- 1
|
|
8
|
-
-
|
|
8
|
+
- 2
|
|
9
9
|
- 0
|
|
10
|
-
version: 1.
|
|
10
|
+
version: 1.2.0
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- BillDueber
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2010-
|
|
18
|
+
date: 2010-10-05 00:00:00 -04:00
|
|
19
19
|
default_executable:
|
|
20
20
|
dependencies:
|
|
21
21
|
- !ruby/object:Gem::Dependency
|
|
@@ -60,8 +60,10 @@ files:
|
|
|
60
60
|
- README.rdoc
|
|
61
61
|
- Rakefile
|
|
62
62
|
- VERSION
|
|
63
|
+
- benchj.rb
|
|
64
|
+
- jars/jackson-all-1.6.0.jar
|
|
63
65
|
- jars/javamarc.jar
|
|
64
|
-
- jars/
|
|
66
|
+
- jars/marc4j-extra-readers-writers.jar
|
|
65
67
|
- lib/marc4j4r.rb
|
|
66
68
|
- lib/marc4j4r/controlfield.rb
|
|
67
69
|
- lib/marc4j4r/datafield.rb
|
|
Binary file
|