marc4j4r 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/doc/ControlFieldImpl.html +153 -58
- data/doc/DataFieldImpl.html +223 -133
- data/doc/Java/OrgMarc4j/MarcReader.html +13 -13
- data/doc/MARC4J4R.html +34 -22
- data/doc/RecordImpl.html +415 -72
- data/doc/SubfieldImpl.html +63 -14
- data/doc/_index.html +76 -1
- data/doc/class_list.html +1 -1
- data/doc/file.README.html +7 -4
- data/doc/index.html +7 -4
- data/doc/method_list.html +168 -24
- data/doc/top-level-namespace.html +3 -3
- data/jars/MarcImporter.jar +0 -0
- data/jars/marc4j.jar +0 -0
- data/lib/marc4j4r.rb +190 -37
- data/test/batch.seq +118 -0
- data/test/bench.rb +63 -0
- data/test/one.seq +30 -0
- data/test/readtest.rb +120 -0
- data/test/t.rb +6 -0
- metadata +9 -4
- data/test/test_marc4j4r.rb +0 -76
data/test/bench.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require '../lib/marc4j4r'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
filename = 'test.mrc'
|
6
|
+
tags = %w(
|
7
|
+
245
|
8
|
+
035
|
9
|
+
100
|
10
|
+
010
|
11
|
+
001
|
12
|
+
050
|
13
|
+
300
|
14
|
+
600
|
15
|
+
856
|
16
|
+
260
|
17
|
+
)
|
18
|
+
|
19
|
+
Benchmark.bmbm do |x|
|
20
|
+
|
21
|
+
x.report("0 tags; nohash") do
|
22
|
+
reader = MARC4J4R.reader(filename)
|
23
|
+
reader.each(false) do |r|
|
24
|
+
tags.each do |t|
|
25
|
+
# fields = r.find_by_tag(t, true)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
x.report("0 tags; hash") do
|
30
|
+
reader = MARC4J4R.reader(filename)
|
31
|
+
reader.each do |r|
|
32
|
+
tags.each do |t|
|
33
|
+
# fields = r.find_by_tag(t, true)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
(1..10).each do |i|
|
40
|
+
x.report("#{tags.size * i} tags; nohash") do
|
41
|
+
reader = MARC4J4R.reader(filename)
|
42
|
+
reader.each(false) do |r|
|
43
|
+
i.times do
|
44
|
+
tags.each do |t|
|
45
|
+
fields = r.find_by_tag(t, true)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
x.report("#{tags.size * i} tags; hash") do
|
51
|
+
reader = MARC4J4R.reader(filename)
|
52
|
+
reader.each do |r|
|
53
|
+
i.times do
|
54
|
+
tags.each do |t|
|
55
|
+
fields = r.find_by_tag(t)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
|
data/test/one.seq
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
006988435 LDR L 00000nam^a2200277Ia^4500
|
2
|
+
006988435 001 L 006988435
|
3
|
+
006988435 006 L m^^^^^^^^d^^^^^^^^
|
4
|
+
006988435 007 L cr^cn|||||||||
|
5
|
+
006988435 008 L 070103s2007^^^^nyu^^^^^sb^^^^001^0^eng^d
|
6
|
+
006988435 010 L $$z2007060391
|
7
|
+
006988435 020 L $$z9781403977564
|
8
|
+
006988435 035 L $$a(CaPaEBR)ebrtrial10194097
|
9
|
+
006988435 040 L $$aCaPaEBR$$cCaPaEBR$$dMiU
|
10
|
+
006988435 05014 L $$aPL207$$b.C39 2007eb
|
11
|
+
006988435 1001 L $$aÇayır, Kenan.
|
12
|
+
006988435 24510 L $$aIslamic literature in contemporary Turkey$$h[electronic resource]$$bfrom epic to novel /$$cKenan Çayır.
|
13
|
+
006988435 250 L $$a1st ed.
|
14
|
+
006988435 260 L $$aNew York :$$bPalgrave Macmillan,$$c2007.
|
15
|
+
006988435 504 L $$aIncludes bibliographical references (p. 189-198) and index.
|
16
|
+
006988435 650 0 L $$aIslamic literature, Turkish$$xHistory and criticism.
|
17
|
+
006988435 650 0 L $$aTurkish literature$$xHistory and criticism.
|
18
|
+
006988435 7102 L $$aebrary, Inc.
|
19
|
+
006988435 85640 L $$3Ann Arbor campus:$$zAccess to the ebrary online version restricted; authentication may be required:$$uhttp://site.ebrary.com/lib/umich/Doc?id=10194097
|
20
|
+
006988435 85640 L $$3Flint campus:$$zAccess to the ebrary online version restricted; authentication may be required:$$uhttp://libproxy.umflint.edu:2048/login?url=http://site.ebrary.com/lib/umich/Doc?id=10194097
|
21
|
+
006988435 8526 L $$aMiU$$bELEC$$hSee URL for access
|
22
|
+
006988435 8526 L $$aMiFliC$$bFLINT$$cELEC$$hSee URL for access
|
23
|
+
006988435 970 L $$aBK$$bBook
|
24
|
+
006988435 970 L $$aCE$$bElectronic Resource
|
25
|
+
006988435 971 L $$aMiU
|
26
|
+
006988435 971 L $$aMiFliC
|
27
|
+
006988435 972 L $$c20091022
|
28
|
+
006988435 972 L $$c20091022
|
29
|
+
006988435 973 L $$aAO$$bavail_online
|
30
|
+
006988435 998 L $$cebrary-dld ERLoad20091022
|
data/test/readtest.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require '../lib/marc4j4r.rb'
|
3
|
+
curdir = File.dirname(__FILE__)
|
4
|
+
|
5
|
+
Dir.glob("#{curdir}/../../jruby_marc_to_solr/jars/*.jar") do |x|
|
6
|
+
require x
|
7
|
+
end
|
8
|
+
|
9
|
+
include_class Java::org.solrmarc.marc.MarcAlephSequentialReader
|
10
|
+
|
11
|
+
|
12
|
+
Benchmark.bmbm do |x|
|
13
|
+
|
14
|
+
x.report("Java AS") do
|
15
|
+
count = 0
|
16
|
+
3.times do
|
17
|
+
reader = MarcAlephSequentialReader.new(java.io.FileInputStream.new('test.seq'.to_java_string))
|
18
|
+
reader.each do |r|
|
19
|
+
count += 1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
# puts "AS read #{count} records"
|
23
|
+
end
|
24
|
+
|
25
|
+
x.report("strict") do
|
26
|
+
count = 0
|
27
|
+
3.times do
|
28
|
+
reader = MARC4J4R.reader('test.mrc', :strictmarc)
|
29
|
+
reader.each do |r|
|
30
|
+
count += 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
# puts "Strict binary read #{count} records"
|
34
|
+
end
|
35
|
+
|
36
|
+
x.report("xml") do
|
37
|
+
count = 0
|
38
|
+
3.times do
|
39
|
+
reader = MARC4J4R.reader('test.xml', :marcxml)
|
40
|
+
reader.each do |r|
|
41
|
+
count += 1
|
42
|
+
end
|
43
|
+
end
|
44
|
+
# puts "XML read #{count} records"
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
x.report("alephsequential") do
|
49
|
+
count = 0
|
50
|
+
3.times do
|
51
|
+
reader = MARC4J4R.reader('test.seq', :alephsequential)
|
52
|
+
reader.each do |r|
|
53
|
+
count += 1
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# puts "AS read #{count} records"
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
__END__
|
65
|
+
class AlephSequentialReader
|
66
|
+
include Enumerable
|
67
|
+
def initialize(fromwhere)
|
68
|
+
stream = nil
|
69
|
+
if fromwhere.is_a? Java::JavaIO::InputStream
|
70
|
+
stream = fromwhere.to_io
|
71
|
+
elsif fromwhere.is_a? IO
|
72
|
+
stream = fromwhere
|
73
|
+
else
|
74
|
+
stream = File.new(fromwhere)
|
75
|
+
end
|
76
|
+
|
77
|
+
@handle = stream
|
78
|
+
end
|
79
|
+
|
80
|
+
def each
|
81
|
+
record = nil
|
82
|
+
currentID = nil
|
83
|
+
|
84
|
+
@handle.each_line do |l|
|
85
|
+
l.chomp!
|
86
|
+
next unless l =~ /\S/
|
87
|
+
vals = l.unpack('a9 a a3 c c a3 a*')
|
88
|
+
id, tag, ind1, ind2, data = vals[0], vals[2], vals[3], vals[4], vals[6]
|
89
|
+
# id, tag, ind1, ind2, junk, data = *(l.unpack('A10 a3 c c a3 A*'))
|
90
|
+
if id != currentID
|
91
|
+
if record
|
92
|
+
yield record
|
93
|
+
end
|
94
|
+
record = RecordImpl.new
|
95
|
+
currentID = id
|
96
|
+
end
|
97
|
+
if tag == 'LDR'
|
98
|
+
record.setLeader(Java::org.marc4j.marc.impl.LeaderImpl.new(data))
|
99
|
+
else
|
100
|
+
record << buildField(tag,ind1,ind2,data)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
yield record
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
SUBREGEXP = /\$\$(.)/
|
108
|
+
def buildField (tag, ind1, ind2, data)
|
109
|
+
if Java::org.marc4j.marc.impl.Verifier.isControlField tag
|
110
|
+
return Java::org.marc4j.marc.impl.ControlFieldImpl.new(tag, data)
|
111
|
+
else
|
112
|
+
f = Java::org.marc4j.marc.impl.DataFieldImpl.new(tag, ind1, ind2)
|
113
|
+
data.split(SUBREGEXP)[1..-1].each_slice(2) do |code, value|
|
114
|
+
f.addSubfield Java::org.marc4j.marc.impl.SubfieldImpl.new(code[0].ord, value)
|
115
|
+
end
|
116
|
+
return f
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
end # End of class AlephSequentialReader
|
data/test/t.rb
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc4j4r
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- BillDueber
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-04-07 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -69,12 +69,15 @@ files:
|
|
69
69
|
- doc/js/jquery.js
|
70
70
|
- doc/method_list.html
|
71
71
|
- doc/top-level-namespace.html
|
72
|
+
- jars/MarcImporter.jar
|
72
73
|
- jars/marc4j.jar
|
73
74
|
- lib/marc4j4r.rb
|
75
|
+
- test/batch.seq
|
76
|
+
- test/bench.rb
|
74
77
|
- test/helper.rb
|
75
78
|
- test/one.dat
|
79
|
+
- test/one.seq
|
76
80
|
- test/one.xml
|
77
|
-
- test/test_marc4j4r.rb
|
78
81
|
has_rdoc: true
|
79
82
|
homepage: http://github.com/billdueber/marc4j4r
|
80
83
|
licenses: []
|
@@ -104,5 +107,7 @@ signing_key:
|
|
104
107
|
specification_version: 3
|
105
108
|
summary: Use marc4j java library in JRuby in a more ruby-ish way
|
106
109
|
test_files:
|
110
|
+
- test/bench.rb
|
107
111
|
- test/helper.rb
|
108
|
-
- test/
|
112
|
+
- test/readtest.rb
|
113
|
+
- test/t.rb
|
data/test/test_marc4j4r.rb
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
# one.xml
|
4
|
-
# LEADER 00000njm a2200000uu 4500
|
5
|
-
# 001 afc99990058366
|
6
|
-
# 003 DLC
|
7
|
-
# 005 20071104155141.9
|
8
|
-
# 007 sd ummunniauub
|
9
|
-
# 008 071103s1939 xxufmnne||||||||| u eng||
|
10
|
-
# 010 $a afc99990058366
|
11
|
-
# 040 $a DLC $c DLC
|
12
|
-
# 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
|
13
|
-
# 260 $a Medina, Texas, $c 1939.
|
14
|
-
# 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
|
15
|
-
# 651 0 $a Medina $z Texas $z United States of America.
|
16
|
-
# 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
|
17
|
-
# 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
|
18
|
-
# 700 1 $a Taylor, Beale D. $e Singer.
|
19
|
-
# 852 $a American Folklife Center, Library of Congress
|
20
|
-
# 852 $a DLC
|
21
|
-
|
22
|
-
class TestMarc4j4r < Test::Unit::TestCase
|
23
|
-
|
24
|
-
def setup
|
25
|
-
reader = MARC4J4R.reader(File.dirname(__FILE__) + '/one.xml', :marcxml)
|
26
|
-
@r = reader.next
|
27
|
-
end
|
28
|
-
|
29
|
-
should "get the leader as a string" do
|
30
|
-
assert_equal '00000njm a2200000uu 4500', @r.leader
|
31
|
-
end
|
32
|
-
|
33
|
-
should "get all fields with the given tag" do
|
34
|
-
assert_equal 3, @r.find_by_tag('700').size
|
35
|
-
end
|
36
|
-
|
37
|
-
should "get all fields with any of the given tags" do
|
38
|
-
assert_equal 6, @r.find_by_tag(['010','700', '852']).size
|
39
|
-
end
|
40
|
-
|
41
|
-
should "get an empty array trying to find a non-existent tag" do
|
42
|
-
assert_equal [], @r.find_by_tag('002')
|
43
|
-
end
|
44
|
-
|
45
|
-
should "not return anything for a non-existent tag" do
|
46
|
-
assert_equal 1, @r.find_by_tag(['010', '002']).size
|
47
|
-
end
|
48
|
-
|
49
|
-
should "get the value of a control tag" do
|
50
|
-
assert_equal 'DLC', @r['003'].value
|
51
|
-
end
|
52
|
-
|
53
|
-
should "get a subfield value via field[]" do
|
54
|
-
assert_equal 'Sung by Beale D. Taylor.', @r['245']['c']
|
55
|
-
end
|
56
|
-
|
57
|
-
should "joing all values of a field with a space" do
|
58
|
-
assert_equal "DLC DLC", @r['040'].value
|
59
|
-
end
|
60
|
-
|
61
|
-
should "Get the first field with a given tag via []" do
|
62
|
-
assert_equal '700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.', @r['700'].to_s
|
63
|
-
end
|
64
|
-
|
65
|
-
should "get the subfield values in order of the original record" do
|
66
|
-
assert_equal ['Medina, Texas,', '1939.'], @r['260'].sub_values(['a', 'c'])
|
67
|
-
assert_equal ['Medina, Texas,', '1939.'], @r['260'].sub_values(['c', 'a'])
|
68
|
-
end
|
69
|
-
|
70
|
-
should "get the subfield values in order of the codes I pass" do
|
71
|
-
assert_equal [ '1939.', 'Medina, Texas,'], @r['260'].sub_values(['c', 'a'], true)
|
72
|
-
end
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
end
|