marc4j4r 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGES CHANGED
@@ -1,3 +1,8 @@
1
+ 1.2.0
2
+ Fixed encoding problem with to_marc and from_string roundtrip
3
+ Added to_hash/to_marc_in_json and from_hash/from_marc_in_json (see
4
+ http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/)
5
+
1
6
  1.1
2
7
  Added native java method to turn a record into XML (20% speedup or so)
3
8
  1.0
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
- require 'ftools'
3
+ # require 'ftools'
4
4
 
5
5
  begin
6
6
  require 'jeweler'
@@ -14,6 +14,8 @@ begin
14
14
  gem.add_development_dependency "bacon", ">= 0"
15
15
  gem.add_development_dependency "yard", ">= 0"
16
16
 
17
+ gem.files.include 'jars/*.jar'
18
+
17
19
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
20
  end
19
21
  Jeweler::GemcutterTasks.new
@@ -27,6 +29,7 @@ LOCALJAR = 'jars/javamarc.jar'
27
29
  JAVAJAR = '../../javamarc.jar'
28
30
 
29
31
 
32
+
30
33
  file LOCALJAR => JAVAJAR do |t|
31
34
  File.copy(JAVAJAR, LOCALJAR)
32
35
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.0
1
+ 1.2.0
data/benchj.rb ADDED
@@ -0,0 +1,73 @@
1
+ $:.unshift 'lib'
2
+ require 'marc4j4r'
3
+
4
+ require 'benchmark'
5
+
6
+ # require 'yajl'
7
+ # require 'json/pure'
8
+ # require 'msgpack'
9
+
10
+
11
+ jsonsize = 0.0
12
+ marcsize = 0.0
13
+ mpsize = 0.0
14
+
15
+
16
+ # Use Benchmark.measure
17
+ # sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON Pure")
18
+ smtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
19
+ smptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
20
+ sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON")
21
+
22
+ djptime = Benchmark::Tms.new(0,0,0,0,0, "JSON")
23
+ dmtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
24
+ dmptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
25
+
26
+
27
+
28
+ i = 0
29
+ iterations = 1
30
+
31
+ iterations.times do
32
+ reader = MARC4J4R::Reader.new('topics.xml', :marcxml)
33
+
34
+ reader.each_with_index do |r, i|
35
+ marc = nil
36
+ json = nil
37
+ mp = nil
38
+ copy = nil
39
+
40
+
41
+ smtime += Benchmark.measure {marc = r.to_marc}
42
+ dmtime += Benchmark.measure {copy = MARC4J4R::Record.from_string(marc)}
43
+
44
+
45
+ sjptime += Benchmark.measure {json = r.to_marc_in_json}
46
+ djptime += Benchmark.measure {copy = MARC4J4R::Record.new_from_marc_in_json(json)}
47
+
48
+ # break if i > 1000
49
+
50
+ end
51
+ end
52
+
53
+ puts "Total of #{i} records run #{iterations} times"
54
+
55
+ puts "\nSERIALIZING"
56
+
57
+ base = smtime.total
58
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', smtime.total, smtime.total / base * 100]
59
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', sjptime.total, sjptime.total / base * 100]
60
+ # puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', smptime.total, smptime.total / base * 100]
61
+
62
+ base = dmtime.total
63
+ puts "\nDESERIALIZING"
64
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total, dmtime.total / base * 100]
65
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', djptime.total, djptime.total / base * 100]
66
+ # puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total, dmptime.total / base * 100]
67
+
68
+ base = dmtime.total + smtime.total
69
+ puts "\nSERIALIZE + DESERIALIZE"
70
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total + smtime.total, (dmtime.total + smtime.total) / base * 100]
71
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', djptime.total + sjptime.total, (djptime.total + sjptime.total) / base * 100]
72
+ # puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total + smptime.total, (dmptime.total + smptime.total) / base * 100]
73
+
Binary file
@@ -1,3 +1,4 @@
1
+ require 'stringio'
1
2
  module MARC4J4R
2
3
  Record = Java::org.marc4j.marc.impl::RecordImpl
3
4
 
@@ -17,10 +18,16 @@ module MARC4J4R
17
18
  def == other
18
19
  return false unless (self.leader == other.leader)
19
20
  self.zip(other) do |so|
20
- return false unless so[0] == so[1]
21
+ unless so[0] == so[1]
22
+ puts "self <> other\n#{so[0]}\n#{so[1]}"
23
+ return false;
24
+ end
21
25
  end
22
26
  other.zip(self) do |so|
23
- return false unless so[0] == so[1]
27
+ unless so[0] == so[1]
28
+ puts "#{so[0]}\n#{so[1]}"
29
+ return false;
30
+ end
24
31
  end
25
32
  return true
26
33
  end
@@ -141,6 +148,7 @@ module MARC4J4R
141
148
 
142
149
 
143
150
  # Return the record as valid MARC-XML
151
+ # @param String encoding The encoding to use
144
152
  # @return String A MARC-XML representation of the record, including the XML header
145
153
 
146
154
  def to_xml
@@ -148,45 +156,66 @@ module MARC4J4R
148
156
  end
149
157
 
150
158
 
151
- def to_marc
152
- begin
159
+ def to_marc encoding='UTF-8'
160
+ # begin
153
161
  s = Java::java.io.ByteArrayOutputStream.new
154
- writer = org.marc4j.MarcPermissiveStreamWriter.new(s)
162
+ writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
155
163
  writer.write(self)
156
- @marcbinary = s.to_string
157
- return @marcbinary
158
- rescue
159
- # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
160
- "Whoops! Failed: #{$!}"
161
- end
164
+ return s.to_string
165
+ # writer.close
166
+ # @marcbinary = s.to_string
167
+ # return @marcbinary
168
+ # rescue
169
+ # # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
170
+ # "Whoops! Failed: #{$!}"
171
+ # end
162
172
  end
163
- end
164
-
165
173
 
166
- def to_marchash
167
- h = {}
168
- h['type'] = 'marc-hash'
169
- h['version'] = [1,0]
170
- h['leader'] = self.leader
171
-
172
- fields = []
173
-
174
- self.getVariableFields.each do |f|
175
- if f.controlField?
176
- fields << [f.tag, f.value]
177
- else
178
- farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
179
- subs = []
180
- f.each do |subfield|
181
- subs << [subfield.code, subfield.value]
174
+ def to_marchash
175
+ h = {}
176
+ h['type'] = 'marc-hash'
177
+ h['version'] = [1,0]
178
+ h['leader'] = self.leader
179
+
180
+ fields = []
181
+
182
+ self.getVariableFields.each do |f|
183
+ if f.controlField?
184
+ fields << [f.tag, f.value]
185
+ else
186
+ farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
187
+ subs = []
188
+ f.each do |subfield|
189
+ subs << [subfield.code, subfield.value]
190
+ end
191
+ farray.push subs
192
+ fields << farray
182
193
  end
183
- farray.push subs
184
- fields << farray
185
194
  end
195
+ h['fields'] = fields
196
+ return h
197
+ end
198
+
199
+ # Turn it into a marc-in-json hashmap. Note that this won't really work
200
+ # like a ruby hash; you need to know what you're getting, since stuff
201
+ # like #each won't work.
202
+ #
203
+ # Better to just use to_marc_in_json if you want a json string
204
+
205
+ def to_hash
206
+ return Java::org.marc4j.MarcInJSON.record_to_hash(self)
186
207
  end
187
- h['fields'] = fields
188
- return h
208
+
209
+
210
+ # Turn it into a marc-in-json JSON string using Jackson
211
+ def to_marc_in_json
212
+ return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
213
+ end
214
+
215
+
189
216
  end
217
+
218
+
190
219
 
191
220
  # Give a marc record in a string, turn it into an object
192
221
  # @param String str The record as a MARC binary string
@@ -195,8 +224,10 @@ module MARC4J4R
195
224
  # Note that the normal way of defining this class (self.from_string)
196
225
  # didn't work; I assume it has something to do with the fact that
197
226
  # it's actually jrst aliased to the Java class
198
- def Record.from_string str
199
- return MARC4J4R::Reader.new(StringIO.new(str)).first
227
+ def Record.from_string str, encoding=nil
228
+ s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
229
+ # return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
230
+ return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
200
231
  end
201
232
 
202
233
 
@@ -207,6 +238,16 @@ module MARC4J4R
207
238
  return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
208
239
  end
209
240
 
241
+ def Record.new_from_hash hash
242
+ return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
243
+ end
244
+
245
+ def Record.new_from_marc_in_json jsonstring
246
+ return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
247
+ end
248
+
249
+
250
+
210
251
 
211
252
  end
212
253
 
data/lib/marc4j4r.rb CHANGED
@@ -5,20 +5,30 @@ end
5
5
  require 'logger'
6
6
  $LOG ||= Logger.new(STDERR)
7
7
 
8
+ jardir = File.join(File.dirname(__FILE__), '..', 'jars')
9
+
10
+ # For each jar, check for a representative class in each
11
+ # and include the jar if it's not defined
12
+
8
13
  begin
9
14
  include_class Java::org.marc4j.marc.impl.RecordImpl
10
15
  rescue NameError => e
11
- jardir = File.join(File.dirname(__FILE__), '..', 'jars')
12
16
  require "#{jardir}/javamarc.jar"
13
17
  end
14
18
 
15
19
  begin
16
20
  include_class Java::org.marc4j.MarcAlephSequentialReader
17
21
  rescue
18
- jardir = File.join(File.dirname(__FILE__), '..', 'jars')
19
- require "#{jardir}/marc4j_serializations.jar"
22
+ require "#{jardir}/marc4j-extra-readers-writers.jar"
20
23
  end
21
24
 
25
+ begin
26
+ include_class Java::org.codehaus.jackson.map.ObjectMapper
27
+ rescue
28
+ require "#{jardir}/jackson-all-1.6.0.jar"
29
+ end
30
+
31
+
22
32
  # Define a method that will take a string (filename), IO object, or StringIO object,
23
33
  # and return an inputstream/outputstream
24
34
 
data/spec/record_spec.rb CHANGED
@@ -48,7 +48,7 @@ describe "MARC4J4R_basic_retrieval_stuff" do
48
48
  end
49
49
 
50
50
  it "can find all the fields" do
51
- fields = @one.collect
51
+ fields = @one.collect {|a| a}
52
52
  fields.size.should.equal 16
53
53
  end
54
54
 
@@ -70,6 +70,8 @@ describe "MARC4J4R::Record #find_by_tag" do
70
70
 
71
71
  @cf = MARC4J4R::ControlField.new('005', '20071104155141.9')
72
72
 
73
+ @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
74
+ @utf8 = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat").first
73
75
  end
74
76
 
75
77
  it "gets an empty array for non-existant tag(s)" do
@@ -112,14 +114,33 @@ describe "MARC4J4R::Record #find_by_tag" do
112
114
  first = @one['700']
113
115
  @one.find_by_tag('700')[0].should.equal first
114
116
  end
117
+
118
+ end
119
+
120
+ describe "Format checks" do
121
+ before do
122
+ @one = MARC4J4R::Reader.new("#{DIR}/one.dat").first
123
+ @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect {|a| a}
124
+ @utf8 = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat").first
125
+ end
115
126
 
116
- it "round trips binary and xml" do
127
+ it "round trips binary" do
117
128
  MARC4J4R::Record.from_string(@one.to_marc).should.equal @one
118
- MARC4J4R::Record.from_xml_string(@one.to_xml).should.equal @one
129
+ @batch.each do |r|
130
+ MARC4J4R::Record.from_string(r.to_marc).should.equal r
131
+ end
132
+ end
133
+
134
+ it "round trips utf8 record as binary" do
135
+ MARC4J4R::Record.from_string(@utf8.to_marc, :utf8).should.equal @utf8
119
136
  end
120
137
 
121
- it "round trips xml with the native_to_xml" do
122
- MARC4J4R::Record.from_xml_string(@one.native_to_xml).should.equal @one
138
+ it "round trips XML" do
139
+ MARC4J4R::Record.from_xml_string(@one.to_xml).should.equal @one
123
140
  end
124
141
 
142
+ it "round trips marc-in-json" do
143
+ copy = @one.to_marc_in_json
144
+ MARC4J4R::Record.new_from_marc_in_json(copy).should.equal @one
145
+ end
125
146
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc4j4r
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 31
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
- - 1
8
+ - 2
9
9
  - 0
10
- version: 1.1.0
10
+ version: 1.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - BillDueber
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-16 00:00:00 -04:00
18
+ date: 2010-10-05 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -60,8 +60,10 @@ files:
60
60
  - README.rdoc
61
61
  - Rakefile
62
62
  - VERSION
63
+ - benchj.rb
64
+ - jars/jackson-all-1.6.0.jar
63
65
  - jars/javamarc.jar
64
- - jars/marc4j_serializations.jar
66
+ - jars/marc4j-extra-readers-writers.jar
65
67
  - lib/marc4j4r.rb
66
68
  - lib/marc4j4r/controlfield.rb
67
69
  - lib/marc4j4r/datafield.rb
Binary file