marc4j4r 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGES CHANGED
@@ -1,3 +1,8 @@
1
+ 1.2.0
2
+ Fixed encoding problem with to_marc and from_string roundtrip
3
+ Added to_hash/to_marc_in_json and from_hash/from_marc_in_json (see
4
+ http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/)
5
+
1
6
  1.1
2
7
  Added native java method to turn a record into XML (20% speedup or so)
3
8
  1.0
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
- require 'ftools'
3
+ # require 'ftools'
4
4
 
5
5
  begin
6
6
  require 'jeweler'
@@ -14,6 +14,8 @@ begin
14
14
  gem.add_development_dependency "bacon", ">= 0"
15
15
  gem.add_development_dependency "yard", ">= 0"
16
16
 
17
+ gem.files.include 'jars/*.jar'
18
+
17
19
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
20
  end
19
21
  Jeweler::GemcutterTasks.new
@@ -27,6 +29,7 @@ LOCALJAR = 'jars/javamarc.jar'
27
29
  JAVAJAR = '../../javamarc.jar'
28
30
 
29
31
 
32
+
30
33
  file LOCALJAR => JAVAJAR do |t|
31
34
  File.copy(JAVAJAR, LOCALJAR)
32
35
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.0
1
+ 1.2.0
data/benchj.rb ADDED
@@ -0,0 +1,73 @@
1
+ $:.unshift 'lib'
2
+ require 'marc4j4r'
3
+
4
+ require 'benchmark'
5
+
6
+ # require 'yajl'
7
+ # require 'json/pure'
8
+ # require 'msgpack'
9
+
10
+
11
+ jsonsize = 0.0
12
+ marcsize = 0.0
13
+ mpsize = 0.0
14
+
15
+
16
+ # Use Benchmark.measure
17
+ # sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON Pure")
18
+ smtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
19
+ smptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
20
+ sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON")
21
+
22
+ djptime = Benchmark::Tms.new(0,0,0,0,0, "JSON")
23
+ dmtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
24
+ dmptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
25
+
26
+
27
+
28
+ i = 0
29
+ iterations = 1
30
+
31
+ iterations.times do
32
+ reader = MARC4J4R::Reader.new('topics.xml', :marcxml)
33
+
34
+ reader.each_with_index do |r, i|
35
+ marc = nil
36
+ json = nil
37
+ mp = nil
38
+ copy = nil
39
+
40
+
41
+ smtime += Benchmark.measure {marc = r.to_marc}
42
+ dmtime += Benchmark.measure {copy = MARC4J4R::Record.from_string(marc)}
43
+
44
+
45
+ sjptime += Benchmark.measure {json = r.to_marc_in_json}
46
+ djptime += Benchmark.measure {copy = MARC4J4R::Record.new_from_marc_in_json(json)}
47
+
48
+ # break if i > 1000
49
+
50
+ end
51
+ end
52
+
53
+ puts "Total of #{i} records run #{iterations} times"
54
+
55
+ puts "\nSERIALIZING"
56
+
57
+ base = smtime.total
58
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', smtime.total, smtime.total / base * 100]
59
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', sjptime.total, sjptime.total / base * 100]
60
+ # puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', smptime.total, smptime.total / base * 100]
61
+
62
+ base = dmtime.total
63
+ puts "\nDESERIALIZING"
64
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total, dmtime.total / base * 100]
65
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', djptime.total, djptime.total / base * 100]
66
+ # puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total, dmptime.total / base * 100]
67
+
68
+ base = dmtime.total + smtime.total
69
+ puts "\nSERIALIZE + DESERIALIZE"
70
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total + smtime.total, (dmtime.total + smtime.total) / base * 100]
71
+ puts ' %-15s %8.2f s (%3.0f%%)' % ['Json', djptime.total + sjptime.total, (djptime.total + sjptime.total) / base * 100]
72
+ # puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total + smptime.total, (dmptime.total + smptime.total) / base * 100]
73
+
Binary file
@@ -1,3 +1,4 @@
1
+ require 'stringio'
1
2
  module MARC4J4R
2
3
  Record = Java::org.marc4j.marc.impl::RecordImpl
3
4
 
@@ -17,10 +18,16 @@ module MARC4J4R
17
18
  def == other
18
19
  return false unless (self.leader == other.leader)
19
20
  self.zip(other) do |so|
20
- return false unless so[0] == so[1]
21
+ unless so[0] == so[1]
22
+ puts "self <> other\n#{so[0]}\n#{so[1]}"
23
+ return false;
24
+ end
21
25
  end
22
26
  other.zip(self) do |so|
23
- return false unless so[0] == so[1]
27
+ unless so[0] == so[1]
28
+ puts "#{so[0]}\n#{so[1]}"
29
+ return false;
30
+ end
24
31
  end
25
32
  return true
26
33
  end
@@ -141,6 +148,7 @@ module MARC4J4R
141
148
 
142
149
 
143
150
  # Return the record as valid MARC-XML
151
+ # @param String encoding The encoding to use
144
152
  # @return String A MARC-XML representation of the record, including the XML header
145
153
 
146
154
  def to_xml
@@ -148,45 +156,66 @@ module MARC4J4R
148
156
  end
149
157
 
150
158
 
151
- def to_marc
152
- begin
159
+ def to_marc encoding='UTF-8'
160
+ # begin
153
161
  s = Java::java.io.ByteArrayOutputStream.new
154
- writer = org.marc4j.MarcPermissiveStreamWriter.new(s)
162
+ writer = org.marc4j.MarcPermissiveStreamWriter.new(s, encoding)
155
163
  writer.write(self)
156
- @marcbinary = s.to_string
157
- return @marcbinary
158
- rescue
159
- # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
160
- "Whoops! Failed: #{$!}"
161
- end
164
+ return s.to_string
165
+ # writer.close
166
+ # @marcbinary = s.to_string
167
+ # return @marcbinary
168
+ # rescue
169
+ # # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
170
+ # "Whoops! Failed: #{$!}"
171
+ # end
162
172
  end
163
- end
164
-
165
173
 
166
- def to_marchash
167
- h = {}
168
- h['type'] = 'marc-hash'
169
- h['version'] = [1,0]
170
- h['leader'] = self.leader
171
-
172
- fields = []
173
-
174
- self.getVariableFields.each do |f|
175
- if f.controlField?
176
- fields << [f.tag, f.value]
177
- else
178
- farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
179
- subs = []
180
- f.each do |subfield|
181
- subs << [subfield.code, subfield.value]
174
+ def to_marchash
175
+ h = {}
176
+ h['type'] = 'marc-hash'
177
+ h['version'] = [1,0]
178
+ h['leader'] = self.leader
179
+
180
+ fields = []
181
+
182
+ self.getVariableFields.each do |f|
183
+ if f.controlField?
184
+ fields << [f.tag, f.value]
185
+ else
186
+ farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
187
+ subs = []
188
+ f.each do |subfield|
189
+ subs << [subfield.code, subfield.value]
190
+ end
191
+ farray.push subs
192
+ fields << farray
182
193
  end
183
- farray.push subs
184
- fields << farray
185
194
  end
195
+ h['fields'] = fields
196
+ return h
197
+ end
198
+
199
+ # Turn it into a marc-in-json hashmap. Note that this won't really work
200
+ # like a ruby hash; you need to know what you're getting, since stuff
201
+ # like #each won't work.
202
+ #
203
+ # Better to just use to_marc_in_json if you want a json string
204
+
205
+ def to_hash
206
+ return Java::org.marc4j.MarcInJSON.record_to_hash(self)
186
207
  end
187
- h['fields'] = fields
188
- return h
208
+
209
+
210
+ # Turn it into a marc-in-json JSON string using Jackson
211
+ def to_marc_in_json
212
+ return Java::org.marc4j.MarcInJSON.record_to_marc_in_json(self)
213
+ end
214
+
215
+
189
216
  end
217
+
218
+
190
219
 
191
220
  # Give a marc record in a string, turn it into an object
192
221
  # @param String str The record as a MARC binary string
@@ -195,8 +224,10 @@ module MARC4J4R
195
224
  # Note that the normal way of defining this class (self.from_string)
196
225
  # didn't work; I assume it has something to do with the fact that
197
226
  # it's actually jrst aliased to the Java class
198
- def Record.from_string str
199
- return MARC4J4R::Reader.new(StringIO.new(str)).first
227
+ def Record.from_string str, encoding=nil
228
+ s = Java::java.io.ByteArrayInputStream.new(str.to_java_bytes)
229
+ # return MARC4J4R::Reader.new(StringIO.new(str), :strictmarc, encoding).first
230
+ return MARC4J4R::Reader.new(s, :strictmarc, encoding).first
200
231
  end
201
232
 
202
233
 
@@ -207,6 +238,16 @@ module MARC4J4R
207
238
  return MARC4J4R::Reader.new(StringIO.new(str), :marcxml).first
208
239
  end
209
240
 
241
+ def Record.new_from_hash hash
242
+ return Java::org.marc4j.MarcInJSON.new_from_hash(hash)
243
+ end
244
+
245
+ def Record.new_from_marc_in_json jsonstring
246
+ return Java::org.marc4j.MarcInJSON.new_from_marc_in_json(jsonstring)
247
+ end
248
+
249
+
250
+
210
251
 
211
252
  end
212
253
 
data/lib/marc4j4r.rb CHANGED
@@ -5,20 +5,30 @@ end
5
5
  require 'logger'
6
6
  $LOG ||= Logger.new(STDERR)
7
7
 
8
+ jardir = File.join(File.dirname(__FILE__), '..', 'jars')
9
+
10
+ # For each jar, check for a representative class in each
11
+ # and include the jar if it's not defined
12
+
8
13
  begin
9
14
  include_class Java::org.marc4j.marc.impl.RecordImpl
10
15
  rescue NameError => e
11
- jardir = File.join(File.dirname(__FILE__), '..', 'jars')
12
16
  require "#{jardir}/javamarc.jar"
13
17
  end
14
18
 
15
19
  begin
16
20
  include_class Java::org.marc4j.MarcAlephSequentialReader
17
21
  rescue
18
- jardir = File.join(File.dirname(__FILE__), '..', 'jars')
19
- require "#{jardir}/marc4j_serializations.jar"
22
+ require "#{jardir}/marc4j-extra-readers-writers.jar"
20
23
  end
21
24
 
25
+ begin
26
+ include_class Java::org.codehaus.jackson.map.ObjectMapper
27
+ rescue
28
+ require "#{jardir}/jackson-all-1.6.0.jar"
29
+ end
30
+
31
+
22
32
  # Define a method that will take a string (filename), IO object, or StringIO object,
23
33
  # and return an inputstream/outputstream
24
34
 
data/spec/record_spec.rb CHANGED
@@ -48,7 +48,7 @@ describe "MARC4J4R_basic_retrieval_stuff" do
48
48
  end
49
49
 
50
50
  it "can find all the fields" do
51
- fields = @one.collect
51
+ fields = @one.collect {|a| a}
52
52
  fields.size.should.equal 16
53
53
  end
54
54
 
@@ -70,6 +70,8 @@ describe "MARC4J4R::Record #find_by_tag" do
70
70
 
71
71
  @cf = MARC4J4R::ControlField.new('005', '20071104155141.9')
72
72
 
73
+ @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
74
+ @utf8 = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat").first
73
75
  end
74
76
 
75
77
  it "gets an empty array for non-existant tag(s)" do
@@ -112,14 +114,33 @@ describe "MARC4J4R::Record #find_by_tag" do
112
114
  first = @one['700']
113
115
  @one.find_by_tag('700')[0].should.equal first
114
116
  end
117
+
118
+ end
119
+
120
+ describe "Format checks" do
121
+ before do
122
+ @one = MARC4J4R::Reader.new("#{DIR}/one.dat").first
123
+ @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect {|a| a}
124
+ @utf8 = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat").first
125
+ end
115
126
 
116
- it "round trips binary and xml" do
127
+ it "round trips binary" do
117
128
  MARC4J4R::Record.from_string(@one.to_marc).should.equal @one
118
- MARC4J4R::Record.from_xml_string(@one.to_xml).should.equal @one
129
+ @batch.each do |r|
130
+ MARC4J4R::Record.from_string(r.to_marc).should.equal r
131
+ end
132
+ end
133
+
134
+ it "round trips utf8 record as binary" do
135
+ MARC4J4R::Record.from_string(@utf8.to_marc, :utf8).should.equal @utf8
119
136
  end
120
137
 
121
- it "round trips xml with the native_to_xml" do
122
- MARC4J4R::Record.from_xml_string(@one.native_to_xml).should.equal @one
138
+ it "round trips XML" do
139
+ MARC4J4R::Record.from_xml_string(@one.to_xml).should.equal @one
123
140
  end
124
141
 
142
+ it "round trips marc-in-json" do
143
+ copy = @one.to_marc_in_json
144
+ MARC4J4R::Record.new_from_marc_in_json(copy).should.equal @one
145
+ end
125
146
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc4j4r
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 31
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
- - 1
8
+ - 2
9
9
  - 0
10
- version: 1.1.0
10
+ version: 1.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - BillDueber
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-16 00:00:00 -04:00
18
+ date: 2010-10-05 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -60,8 +60,10 @@ files:
60
60
  - README.rdoc
61
61
  - Rakefile
62
62
  - VERSION
63
+ - benchj.rb
64
+ - jars/jackson-all-1.6.0.jar
63
65
  - jars/javamarc.jar
64
- - jars/marc4j_serializations.jar
66
+ - jars/marc4j-extra-readers-writers.jar
65
67
  - lib/marc4j4r.rb
66
68
  - lib/marc4j4r/controlfield.rb
67
69
  - lib/marc4j4r/datafield.rb
Binary file