marc4j4r 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/bench.rb ADDED
@@ -0,0 +1,63 @@
1
+ require 'rubygems'
2
+ require '../lib/marc4j4r'
3
+ require 'benchmark'
4
+
5
+ filename = 'test.mrc'
6
+ tags = %w(
7
+ 245
8
+ 035
9
+ 100
10
+ 010
11
+ 001
12
+ 050
13
+ 300
14
+ 600
15
+ 856
16
+ 260
17
+ )
18
+
19
+ Benchmark.bmbm do |x|
20
+
21
+ x.report("0 tags; nohash") do
22
+ reader = MARC4J4R.reader(filename)
23
+ reader.each(false) do |r|
24
+ tags.each do |t|
25
+ # fields = r.find_by_tag(t, true)
26
+ end
27
+ end
28
+ end
29
+ x.report("0 tags; hash") do
30
+ reader = MARC4J4R.reader(filename)
31
+ reader.each do |r|
32
+ tags.each do |t|
33
+ # fields = r.find_by_tag(t, true)
34
+ end
35
+ end
36
+ end
37
+
38
+
39
+ (1..10).each do |i|
40
+ x.report("#{tags.size * i} tags; nohash") do
41
+ reader = MARC4J4R.reader(filename)
42
+ reader.each(false) do |r|
43
+ i.times do
44
+ tags.each do |t|
45
+ fields = r.find_by_tag(t, true)
46
+ end
47
+ end
48
+ end
49
+ end
50
+ x.report("#{tags.size * i} tags; hash") do
51
+ reader = MARC4J4R.reader(filename)
52
+ reader.each do |r|
53
+ i.times do
54
+ tags.each do |t|
55
+ fields = r.find_by_tag(t)
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+
data/test/one.seq ADDED
@@ -0,0 +1,30 @@
1
+ 006988435 LDR L 00000nam^a2200277Ia^4500
2
+ 006988435 001 L 006988435
3
+ 006988435 006 L m^^^^^^^^d^^^^^^^^
4
+ 006988435 007 L cr^cn|||||||||
5
+ 006988435 008 L 070103s2007^^^^nyu^^^^^sb^^^^001^0^eng^d
6
+ 006988435 010 L $$z2007060391
7
+ 006988435 020 L $$z9781403977564
8
+ 006988435 035 L $$a(CaPaEBR)ebrtrial10194097
9
+ 006988435 040 L $$aCaPaEBR$$cCaPaEBR$$dMiU
10
+ 006988435 05014 L $$aPL207$$b.C39 2007eb
11
+ 006988435 1001 L $$aÇayır, Kenan.
12
+ 006988435 24510 L $$aIslamic literature in contemporary Turkey$$h[electronic resource]$$bfrom epic to novel /$$cKenan Çayır.
13
+ 006988435 250 L $$a1st ed.
14
+ 006988435 260 L $$aNew York :$$bPalgrave Macmillan,$$c2007.
15
+ 006988435 504 L $$aIncludes bibliographical references (p. 189-198) and index.
16
+ 006988435 650 0 L $$aIslamic literature, Turkish$$xHistory and criticism.
17
+ 006988435 650 0 L $$aTurkish literature$$xHistory and criticism.
18
+ 006988435 7102 L $$aebrary, Inc.
19
+ 006988435 85640 L $$3Ann Arbor campus:$$zAccess to the ebrary online version restricted; authentication may be required:$$uhttp://site.ebrary.com/lib/umich/Doc?id=10194097
20
+ 006988435 85640 L $$3Flint campus:$$zAccess to the ebrary online version restricted; authentication may be required:$$uhttp://libproxy.umflint.edu:2048/login?url=http://site.ebrary.com/lib/umich/Doc?id=10194097
21
+ 006988435 8526 L $$aMiU$$bELEC$$hSee URL for access
22
+ 006988435 8526 L $$aMiFliC$$bFLINT$$cELEC$$hSee URL for access
23
+ 006988435 970 L $$aBK$$bBook
24
+ 006988435 970 L $$aCE$$bElectronic Resource
25
+ 006988435 971 L $$aMiU
26
+ 006988435 971 L $$aMiFliC
27
+ 006988435 972 L $$c20091022
28
+ 006988435 972 L $$c20091022
29
+ 006988435 973 L $$aAO$$bavail_online
30
+ 006988435 998 L $$cebrary-dld ERLoad20091022
data/test/readtest.rb ADDED
@@ -0,0 +1,120 @@
1
+ require 'benchmark'
2
+ require '../lib/marc4j4r.rb'
3
+ curdir = File.dirname(__FILE__)
4
+
5
+ Dir.glob("#{curdir}/../../jruby_marc_to_solr/jars/*.jar") do |x|
6
+ require x
7
+ end
8
+
9
+ include_class Java::org.solrmarc.marc.MarcAlephSequentialReader
10
+
11
+
12
+ Benchmark.bmbm do |x|
13
+
14
+ x.report("Java AS") do
15
+ count = 0
16
+ 3.times do
17
+ reader = MarcAlephSequentialReader.new(java.io.FileInputStream.new('test.seq'.to_java_string))
18
+ reader.each do |r|
19
+ count += 1
20
+ end
21
+ end
22
+ # puts "AS read #{count} records"
23
+ end
24
+
25
+ x.report("strict") do
26
+ count = 0
27
+ 3.times do
28
+ reader = MARC4J4R.reader('test.mrc', :strictmarc)
29
+ reader.each do |r|
30
+ count += 1
31
+ end
32
+ end
33
+ # puts "Strict binary read #{count} records"
34
+ end
35
+
36
+ x.report("xml") do
37
+ count = 0
38
+ 3.times do
39
+ reader = MARC4J4R.reader('test.xml', :marcxml)
40
+ reader.each do |r|
41
+ count += 1
42
+ end
43
+ end
44
+ # puts "XML read #{count} records"
45
+ end
46
+
47
+
48
+ x.report("alephsequential") do
49
+ count = 0
50
+ 3.times do
51
+ reader = MARC4J4R.reader('test.seq', :alephsequential)
52
+ reader.each do |r|
53
+ count += 1
54
+ end
55
+ end
56
+ # puts "AS read #{count} records"
57
+ end
58
+
59
+
60
+
61
+
62
+ end
63
+
64
+ __END__
65
+ class AlephSequentialReader
66
+ include Enumerable
67
+ def initialize(fromwhere)
68
+ stream = nil
69
+ if fromwhere.is_a? Java::JavaIO::InputStream
70
+ stream = fromwhere.to_io
71
+ elsif fromwhere.is_a? IO
72
+ stream = fromwhere
73
+ else
74
+ stream = File.new(fromwhere)
75
+ end
76
+
77
+ @handle = stream
78
+ end
79
+
80
+ def each
81
+ record = nil
82
+ currentID = nil
83
+
84
+ @handle.each_line do |l|
85
+ l.chomp!
86
+ next unless l =~ /\S/
87
+ vals = l.unpack('a9 a a3 c c a3 a*')
88
+ id, tag, ind1, ind2, data = vals[0], vals[2], vals[3], vals[4], vals[6]
89
+ # id, tag, ind1, ind2, junk, data = *(l.unpack('A10 a3 c c a3 A*'))
90
+ if id != currentID
91
+ if record
92
+ yield record
93
+ end
94
+ record = RecordImpl.new
95
+ currentID = id
96
+ end
97
+ if tag == 'LDR'
98
+ record.setLeader(Java::org.marc4j.marc.impl.LeaderImpl.new(data))
99
+ else
100
+ record << buildField(tag,ind1,ind2,data)
101
+ end
102
+ end
103
+ yield record
104
+ end
105
+
106
+
107
+ SUBREGEXP = /\$\$(.)/
108
+ def buildField (tag, ind1, ind2, data)
109
+ if Java::org.marc4j.marc.impl.Verifier.isControlField tag
110
+ return Java::org.marc4j.marc.impl.ControlFieldImpl.new(tag, data)
111
+ else
112
+ f = Java::org.marc4j.marc.impl.DataFieldImpl.new(tag, ind1, ind2)
113
+ data.split(SUBREGEXP)[1..-1].each_slice(2) do |code, value|
114
+ f.addSubfield Java::org.marc4j.marc.impl.SubfieldImpl.new(code[0].ord, value)
115
+ end
116
+ return f
117
+ end
118
+ end
119
+
120
+ end # End of class AlephSequentialReader
data/test/t.rb ADDED
@@ -0,0 +1,6 @@
1
+ File.open('test.seq') do |f|
2
+ f.each_line do |x|
3
+ next unless x[10..12] == 'LDR'
4
+ puts x if x.size == 43
5
+ end
6
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc4j4r
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - BillDueber
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-02-15 00:00:00 -05:00
12
+ date: 2010-04-07 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -69,12 +69,15 @@ files:
69
69
  - doc/js/jquery.js
70
70
  - doc/method_list.html
71
71
  - doc/top-level-namespace.html
72
+ - jars/MarcImporter.jar
72
73
  - jars/marc4j.jar
73
74
  - lib/marc4j4r.rb
75
+ - test/batch.seq
76
+ - test/bench.rb
74
77
  - test/helper.rb
75
78
  - test/one.dat
79
+ - test/one.seq
76
80
  - test/one.xml
77
- - test/test_marc4j4r.rb
78
81
  has_rdoc: true
79
82
  homepage: http://github.com/billdueber/marc4j4r
80
83
  licenses: []
@@ -104,5 +107,7 @@ signing_key:
104
107
  specification_version: 3
105
108
  summary: Use marc4j java library in JRuby in a more ruby-ish way
106
109
  test_files:
110
+ - test/bench.rb
107
111
  - test/helper.rb
108
- - test/test_marc4j4r.rb
112
+ - test/readtest.rb
113
+ - test/t.rb
@@ -1,76 +0,0 @@
1
- require 'helper'
2
-
3
- # one.xml
4
- # LEADER 00000njm a2200000uu 4500
5
- # 001 afc99990058366
6
- # 003 DLC
7
- # 005 20071104155141.9
8
- # 007 sd ummunniauub
9
- # 008 071103s1939 xxufmnne||||||||| u eng||
10
- # 010 $a afc99990058366
11
- # 040 $a DLC $c DLC
12
- # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
13
- # 260 $a Medina, Texas, $c 1939.
14
- # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
15
- # 651 0 $a Medina $z Texas $z United States of America.
16
- # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
17
- # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
18
- # 700 1 $a Taylor, Beale D. $e Singer.
19
- # 852 $a American Folklife Center, Library of Congress
20
- # 852 $a DLC
21
-
22
- class TestMarc4j4r < Test::Unit::TestCase
23
-
24
- def setup
25
- reader = MARC4J4R.reader(File.dirname(__FILE__) + '/one.xml', :marcxml)
26
- @r = reader.next
27
- end
28
-
29
- should "get the leader as a string" do
30
- assert_equal '00000njm a2200000uu 4500', @r.leader
31
- end
32
-
33
- should "get all fields with the given tag" do
34
- assert_equal 3, @r.find_by_tag('700').size
35
- end
36
-
37
- should "get all fields with any of the given tags" do
38
- assert_equal 6, @r.find_by_tag(['010','700', '852']).size
39
- end
40
-
41
- should "get an empty array trying to find a non-existent tag" do
42
- assert_equal [], @r.find_by_tag('002')
43
- end
44
-
45
- should "not return anything for a non-existent tag" do
46
- assert_equal 1, @r.find_by_tag(['010', '002']).size
47
- end
48
-
49
- should "get the value of a control tag" do
50
- assert_equal 'DLC', @r['003'].value
51
- end
52
-
53
- should "get a subfield value via field[]" do
54
- assert_equal 'Sung by Beale D. Taylor.', @r['245']['c']
55
- end
56
-
57
- should "joing all values of a field with a space" do
58
- assert_equal "DLC DLC", @r['040'].value
59
- end
60
-
61
- should "Get the first field with a given tag via []" do
62
- assert_equal '700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.', @r['700'].to_s
63
- end
64
-
65
- should "get the subfield values in order of the original record" do
66
- assert_equal ['Medina, Texas,', '1939.'], @r['260'].sub_values(['a', 'c'])
67
- assert_equal ['Medina, Texas,', '1939.'], @r['260'].sub_values(['c', 'a'])
68
- end
69
-
70
- should "get the subfield values in order of the codes I pass" do
71
- assert_equal [ '1939.', 'Medina, Texas,'], @r['260'].sub_values(['c', 'a'], true)
72
- end
73
-
74
-
75
-
76
- end