marc4j4r 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +5 -0
- data/Rakefile +4 -1
- data/VERSION +1 -1
- data/benchj.rb +73 -0
- data/jars/jackson-all-1.6.0.jar +0 -0
- data/jars/marc4j-extra-readers-writers.jar +0 -0
- data/lib/marc4j4r/record.rb +76 -35
- data/lib/marc4j4r.rb +13 -3
- data/spec/record_spec.rb +26 -5
- metadata +7 -5
- data/jars/marc4j_serializations.jar +0 -0
data/CHANGES
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
1.2.0
|
2
|
+
Fixed encoding problem with to_marc and from_string roundtrip
|
3
|
+
Added to_hash/to_marc_in_json and from_hash/from_marc_in_json (see
|
4
|
+
http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/)
|
5
|
+
|
1
6
|
1.1
|
2
7
|
Added native java method to turn a record into XML (20% speedup or so)
|
3
8
|
1.0
|
data/Rakefile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'rake'
|
3
|
-
require 'ftools'
|
3
|
+
# require 'ftools'
|
4
4
|
|
5
5
|
begin
|
6
6
|
require 'jeweler'
|
@@ -14,6 +14,8 @@ begin
|
|
14
14
|
gem.add_development_dependency "bacon", ">= 0"
|
15
15
|
gem.add_development_dependency "yard", ">= 0"
|
16
16
|
|
17
|
+
gem.files.include 'jars/*.jar'
|
18
|
+
|
17
19
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
18
20
|
end
|
19
21
|
Jeweler::GemcutterTasks.new
|
@@ -27,6 +29,7 @@ LOCALJAR = 'jars/javamarc.jar'
|
|
27
29
|
JAVAJAR = '../../javamarc.jar'
|
28
30
|
|
29
31
|
|
32
|
+
|
30
33
|
file LOCALJAR => JAVAJAR do |t|
|
31
34
|
File.copy(JAVAJAR, LOCALJAR)
|
32
35
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.2.0
|
data/benchj.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
$:.unshift 'lib'
|
2
|
+
require 'marc4j4r'
|
3
|
+
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
# require 'yajl'
|
7
|
+
# require 'json/pure'
|
8
|
+
# require 'msgpack'
|
9
|
+
|
10
|
+
|
11
|
+
jsonsize = 0.0
|
12
|
+
marcsize = 0.0
|
13
|
+
mpsize = 0.0
|
14
|
+
|
15
|
+
|
16
|
+
# Use Benchmark.measure
|
17
|
+
# sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON Pure")
|
18
|
+
smtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
|
19
|
+
smptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
|
20
|
+
sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON")
|
21
|
+
|
22
|
+
djptime = Benchmark::Tms.new(0,0,0,0,0, "JSON")
|
23
|
+
dmtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
|
24
|
+
dmptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
i = 0
|
29
|
+
iterations = 1
|
30
|
+
|
31
|
+
iterations.times do
|
32
|
+
reader = MARC4J4R::Reader.new('topics.xml', :marcxml)
|
33
|
+
|
34
|
+
reader.each_with_index do |r, i|
|
35
|
+
marc = nil
|
36
|
+
json = nil
|
37
|
+
mp = nil
|
38
|
+
copy = nil
|
39
|
+
|
40
|
+
|
41
|
+
smtime += Benchmark.measure {marc = r.to_marc}
|
42
|
+
dmtime += Benchmark.measure {copy = MARC4J4R::Record.from_string(marc)}
|
43
|
+
|
44
|
+
|
45
|
+
sjptime += Benchmark.measure {json = r.to_marc_in_json}
|
46
|
+
djptime += Benchmark.measure {copy = MARC4J4R::Record.new_from_marc_in_json(json)}
|
47
|
+
|
48
|
+
# break if i > 1000
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
puts "Total of #{i} records run #{iterations} times"
|
54
|
+
|
55
|
+
puts "\nSERIALIZING"
|
56
|
+
|
57
|
+
base = smtime.total
|
58
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', smtime.total, smtime.total / base * 100]
|
59
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', sjptime.total, sjptime.total / base * 100]
|
60
|
+
# puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', smptime.total, smptime.total / base * 100]
|
61
|
+
|
62
|
+
base = dmtime.total
|
63
|
+
puts "\nDESERIALIZING"
|
64
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total, dmtime.total / base * 100]
|
65
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', djptime.total, djptime.total / base * 100]
|
66
|
+
# puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total, dmptime.total / base * 100]
|
67
|
+
|
68
|
+
base = dmtime.total + smtime.total
|
69
|
+
puts "\nSERIALIZE + DESERIALIZE"
|
70
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total + smtime.total, (dmtime.total + smtime.total) / base * 100]
|
71
|
+
puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', djptime.total + sjptime.total, (djptime.total + sjptime.total) / base * 100]
|
72
|
+
# puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total + smptime.total, (dmptime.total + smptime.total) / base * 100]
|
73
|
+
|
Binary file
|
Binary file
|
data/lib/marc4j4r/record.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'stringio'
|
1
2
|
module MARC4J4R
|
2
3
|
Record = Java::org.marc4j.marc.impl::RecordImpl
|
3
4
|
|
@@ -17,10 +18,16 @@ module MARC4J4R
|
|
17
18
|
def == other
|
18
19
|
return false unless (self.leader == other.leader)
|
19
20
|
self.zip(other) do |so|
|
20
|
-
|
21
|
+
unless so[0] == so[1]
|
22
|
+
puts "self <> other\n#{so[0]}\n#{so[1]}"
|
23
|
+
return false;
|
24
|
+
end
|
21
25
|
end
|
22
26
|
other.zip(self) do |so|
|
23
|
-
|
27
|
+
unless so[0] == so[1]
|
28
|
+
puts "#{so[0]}\n#{so[1]}"
|
29
|
+
return false;
|
30
|
+
end
|
24
31
|
end
|
25
32
|
return true
|
26
33
|
end
|
@@ -141,6 +148,7 @@ module MARC4J4R
|
|
141
148
|
|
142
149
|
|
143
150
|
# Return the record as valid MARC-XML
|
151
|
+
# @param String encoding The encoding to use
|
144
152
|
# @return String A MARC-XML representation of the record, including the XML header
|
145
153
|
|
146
154
|
def to_xml
|
@@ -148,45 +156,66 @@ module MARC4J4R
|
|
148
156
|
end
|
149
157
|
|
150
158
|
|
151
|
-
def to_marc
|
152
|
-
begin
|
159
|
+
def to_marc encoding='UTF-8'
|
160
|
+
# begin
|
153
161
|
s = Java::java.io.ByteArrayOutputStream.new
|
154
|
-
writer = org.marc4j.MarcPermissiveStreamWriter.new(s)
|
162
|
+
writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
|
155
163
|
writer.write(self)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
#
|
160
|
-
|
161
|
-
|
164
|
+
return s.to_string
|
165
|
+
# writer.close
|
166
|
+
# @marcbinary = s.to_string
|
167
|
+
# return @marcbinary
|
168
|
+
# rescue
|
169
|
+
# # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
|
170
|
+
# "Whoops! Failed: #{$!}"
|
171
|
+
# end
|
162
172
|
end
|
163
|
-
end
|
164
|
-
|
165
173
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
174
|
+
def to_marchash
|
175
|
+
h = {}
|
176
|
+
h['type'] = 'marc-hash'
|
177
|
+
h['version'] = [1,0]
|
178
|
+
h['leader'] = self.leader
|
179
|
+
|
180
|
+
fields = []
|
181
|
+
|
182
|
+
self.getVariableFields.each do |f|
|
183
|
+
if f.controlField?
|
184
|
+
fields << [f.tag, f.value]
|
185
|
+
else
|
186
|
+
farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
|
187
|
+
subs = []
|
188
|
+
f.each do |subfield|
|
189
|
+
subs << [subfield.code, subfield.value]
|
190
|
+
end
|
191
|
+
farray.push subs
|
192
|
+
fields << farray
|
182
193
|
end
|
183
|
-
farray.push subs
|
184
|
-
fields << farray
|
185
194
|
end
|
195
|
+
h['fields'] = fields
|
196
|
+
return h
|
197
|
+
end
|
198
|
+
|
199
|
+
# Turn it into a marc-in-json hashmap. Note that this won't really work
|
200
|
+
# like a ruby hash; you need to know what you're getting, since stuff
|
201
|
+
# like #each won't work.
|
202
|
+
#
|
203
|
+
# Better to just use to_marc_in_json if you want a json string
|
204
|
+
|
205
|
+
def to_hash
|
206
|
+
return Java::org.marc4j.MarcInJSON.record_to_hash(self)
|
186
207
|
end
|
187
|
-
|
188
|
-
|
208
|
+
|
209
|
+
|
210
|
+
# Turn it into a marc-in-json JSON string using Jackson
|
211
|
+
def to_marc_in_json
|
212
|
+
return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
|
213
|
+
end
|
214
|
+
|
215
|
+
|
189
216
|
end
|
217
|
+
|
218
|
+
|
190
219
|
|
191
220
|
# Give a marc record in a string, turn it into an object
|
192
221
|
# @param String str The record as a MARC binary string
|
@@ -195,8 +224,10 @@ module MARC4J4R
|
|
195
224
|
# Note that the normal way of defining this class (self.from_string)
|
196
225
|
# didn't work; I assume it has something to do with the fact that
|
197
226
|
# it's actually jrst aliased to the Java class
|
198
|
-
def Record.from_string str
|
199
|
-
|
227
|
+
def Record.from_string str, encoding=nil
|
228
|
+
s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
|
229
|
+
# return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
|
230
|
+
return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
|
200
231
|
end
|
201
232
|
|
202
233
|
|
@@ -207,6 +238,16 @@ module MARC4J4R
|
|
207
238
|
return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
|
208
239
|
end
|
209
240
|
|
241
|
+
def Record.new_from_hash hash
|
242
|
+
return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
|
243
|
+
end
|
244
|
+
|
245
|
+
def Record.new_from_marc_in_json jsonstring
|
246
|
+
return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
|
247
|
+
end
|
248
|
+
|
249
|
+
|
250
|
+
|
210
251
|
|
211
252
|
end
|
212
253
|
|
data/lib/marc4j4r.rb
CHANGED
@@ -5,20 +5,30 @@ end
|
|
5
5
|
require 'logger'
|
6
6
|
$LOG ||= Logger.new(STDERR)
|
7
7
|
|
8
|
+
jardir = File.join(File.dirname(__FILE__), '..', 'jars')
|
9
|
+
|
10
|
+
# For each jar, check for a representative class in each
|
11
|
+
# and include the jar if it's not defined
|
12
|
+
|
8
13
|
begin
|
9
14
|
include_class Java::org.marc4j.marc.impl.RecordImpl
|
10
15
|
rescue NameError => e
|
11
|
-
jardir = File.join(File.dirname(__FILE__), '..', 'jars')
|
12
16
|
require "#{jardir}/javamarc.jar"
|
13
17
|
end
|
14
18
|
|
15
19
|
begin
|
16
20
|
include_class Java::org.marc4j.MarcAlephSequentialReader
|
17
21
|
rescue
|
18
|
-
jardir
|
19
|
-
require "#{jardir}/marc4j_serializations.jar"
|
22
|
+
require "#{jardir}/marc4j-extra-readers-writers.jar"
|
20
23
|
end
|
21
24
|
|
25
|
+
begin
|
26
|
+
include_class Java::org.codehaus.jackson.map.ObjectMapper
|
27
|
+
rescue
|
28
|
+
require "#{jardir}/jackson-all-1.6.0.jar"
|
29
|
+
end
|
30
|
+
|
31
|
+
|
22
32
|
# Define a method that will take a string (filename), IO object, or StringIO object,
|
23
33
|
# and return an inputstream/outputstream
|
24
34
|
|
data/spec/record_spec.rb
CHANGED
@@ -48,7 +48,7 @@ describe "MARC4J4R_basic_retrieval_stuff" do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
it "can find all the fields" do
|
51
|
-
fields = @one.collect
|
51
|
+
fields = @one.collect {|a| a}
|
52
52
|
fields.size.should.equal 16
|
53
53
|
end
|
54
54
|
|
@@ -70,6 +70,8 @@ describe "MARC4J4R::Record #find_by_tag" do
|
|
70
70
|
|
71
71
|
@cf = MARC4J4R::ControlField.new('005', '20071104155141.9')
|
72
72
|
|
73
|
+
@batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
|
74
|
+
@utf8 = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat").first
|
73
75
|
end
|
74
76
|
|
75
77
|
it "gets an empty array for non-existant tag(s)" do
|
@@ -112,14 +114,33 @@ describe "MARC4J4R::Record #find_by_tag" do
|
|
112
114
|
first = @one['700']
|
113
115
|
@one.find_by_tag('700')[0].should.equal first
|
114
116
|
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
describe "Format checks" do
|
121
|
+
before do
|
122
|
+
@one = MARC4J4R::Reader.new("#{DIR}/one.dat").first
|
123
|
+
@batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect {|a| a}
|
124
|
+
@utf8 = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat").first
|
125
|
+
end
|
115
126
|
|
116
|
-
it "round trips binary
|
127
|
+
it "round trips binary" do
|
117
128
|
MARC4J4R::Record.from_string(@one.to_marc).should.equal @one
|
118
|
-
|
129
|
+
@batch.each do |r|
|
130
|
+
MARC4J4R::Record.from_string(r.to_marc).should.equal r
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
it "round trips utf8 record as binary" do
|
135
|
+
MARC4J4R::Record.from_string(@utf8.to_marc, :utf8).should.equal @utf8
|
119
136
|
end
|
120
137
|
|
121
|
-
it "round trips
|
122
|
-
MARC4J4R::Record.from_xml_string(@one.
|
138
|
+
it "round trips XML" do
|
139
|
+
MARC4J4R::Record.from_xml_string(@one.to_xml).should.equal @one
|
123
140
|
end
|
124
141
|
|
142
|
+
it "round trips marc-in-json" do
|
143
|
+
copy = @one.to_marc_in_json
|
144
|
+
MARC4J4R::Record.new_from_marc_in_json(copy).should.equal @one
|
145
|
+
end
|
125
146
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc4j4r
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 1.
|
10
|
+
version: 1.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- BillDueber
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-10-05 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -60,8 +60,10 @@ files:
|
|
60
60
|
- README.rdoc
|
61
61
|
- Rakefile
|
62
62
|
- VERSION
|
63
|
+
- benchj.rb
|
64
|
+
- jars/jackson-all-1.6.0.jar
|
63
65
|
- jars/javamarc.jar
|
64
|
-
- jars/
|
66
|
+
- jars/marc4j-extra-readers-writers.jar
|
65
67
|
- lib/marc4j4r.rb
|
66
68
|
- lib/marc4j4r/controlfield.rb
|
67
69
|
- lib/marc4j4r/datafield.rb
|
Binary file
|