marc4j4r 0.2.4 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{README.markdown → README.rdoc} +28 -10
- data/VERSION +1 -1
- data/lib/marc4j4r/reader.rb +21 -19
- data/lib/marc4j4r/record.rb +1 -1
- data/spec/chinese_utf8.dat +1 -0
- data/spec/reader_spec.rb +57 -0
- metadata +9 -8
@@ -1,8 +1,19 @@
|
|
1
|
-
|
1
|
+
= marc4j4r
|
2
2
|
|
3
3
|
A ruby wrapper around the marc4j.jar (as forked by javamarc) java library for dealing with library MARC data.
|
4
4
|
|
5
|
-
|
5
|
+
|
6
|
+
*Note*: rdoc.info doesn't do a great job with this; I think it's getting confused by the java stuff. Here's a list of
|
7
|
+
links for all the classes:
|
8
|
+
|
9
|
+
* {MARC4J4R::Reader}
|
10
|
+
* {MARC4J4R::Writer}
|
11
|
+
* {MARC4J4R::Record}
|
12
|
+
* {MARC4J4R::ControlField}
|
13
|
+
* {MARC4J4R::DataField}
|
14
|
+
* {MARC4J4R::SubField}
|
15
|
+
|
16
|
+
== Getting a MARC reader
|
6
17
|
|
7
18
|
marc4j4r provides three readers out of the box: :strictmarc (binary), :permissivemarc (:binary), and :marcxml (MARC-XML).
|
8
19
|
You can pass either a filename or an open IO object (either ruby or java.io.inputstream)
|
@@ -26,7 +37,7 @@ You can pass either a filename or an open IO object (either ruby or java.io.inpu
|
|
26
37
|
istream = jurl.openConnection.getInputStream
|
27
38
|
reader = MARC4J4R::Reader.new(istream)
|
28
39
|
|
29
|
-
|
40
|
+
== Using the reader
|
30
41
|
|
31
42
|
A MARC4J4R::Reader is an Enumerable, so you can do:
|
32
43
|
|
@@ -34,15 +45,14 @@ A MARC4J4R::Reader is an Enumerable, so you can do:
|
|
34
45
|
# do stuff with the record
|
35
46
|
end
|
36
47
|
|
37
|
-
Or, if you're using [
|
48
|
+
Or, if you're using threach[http://rdoc.info/projects/billdueber/threach]:
|
38
49
|
|
39
50
|
reader.threach(2) do |record|
|
40
51
|
# do stuff with records in two threads
|
41
52
|
end
|
42
53
|
|
43
|
-
|
54
|
+
== Using the writer
|
44
55
|
|
45
|
-
The writer code has not yet been tested; it *should* work as follows:
|
46
56
|
|
47
57
|
binaryWriter = MARC4J4R::Writer.new(filename, :strictmarc)
|
48
58
|
xmlWriter = MARC4J4R::Writer.new(filename, :marcxml)
|
@@ -52,12 +62,20 @@ The writer code has not yet been tested; it *should* work as follows:
|
|
52
62
|
writer.close
|
53
63
|
|
54
64
|
|
55
|
-
|
65
|
+
== Working with records and fields
|
56
66
|
|
57
67
|
In addition to all the normal marc4j methods, MARC4J4R::Record exposes some additional methods
|
58
68
|
and syntaxes.
|
59
69
|
|
60
|
-
|
70
|
+
See the classes themselves and/or the specs for more examples.
|
71
|
+
|
72
|
+
* {MARC4J4R::Reader}
|
73
|
+
* {MARC4J4R::Writer}
|
74
|
+
* {MARC4J4R::Record}
|
75
|
+
* {MARC4J4R::ControlField}
|
76
|
+
* {MARC4J4R::DataField}
|
77
|
+
* {MARC4J4R::SubField}
|
78
|
+
|
61
79
|
|
62
80
|
leader = record.leader
|
63
81
|
|
@@ -113,7 +131,7 @@ and syntaxes.
|
|
113
131
|
allSubfieldAorBValues = df.sub_values(['a', 'b'])
|
114
132
|
|
115
133
|
|
116
|
-
|
134
|
+
== Note on Patches/Pull Requests
|
117
135
|
|
118
136
|
* Fork the project.
|
119
137
|
* Make your feature addition or bug fix.
|
@@ -123,6 +141,6 @@ and syntaxes.
|
|
123
141
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
124
142
|
* Send me a pull request. Bonus points for topic branches.
|
125
143
|
|
126
|
-
|
144
|
+
== Copyright
|
127
145
|
|
128
146
|
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.9.0
|
data/lib/marc4j4r/reader.rb
CHANGED
@@ -13,32 +13,28 @@ end
|
|
13
13
|
|
14
14
|
|
15
15
|
module MARC4J4R
|
16
|
-
|
17
|
-
#
|
18
|
-
# Adjust the interface so that a #new call to any implementations that
|
19
|
-
# implement it can take a java.io.InputStream, ruby IO obejct, or String
|
20
|
-
# (that will be interpreted as a filename) without complaining.
|
21
|
-
#
|
22
|
-
# The mechanism -- running module_eval on a string-representation of the
|
23
|
-
# new method in each of the hard-coded implementations of MarcReader
|
24
|
-
# (MarcStreamReader,MarcPermissiveStreamReader,MarcXmlReader) -- is ugly
|
25
|
-
# and deeply unsettling.
|
26
|
-
#
|
27
|
-
# @author Bill Dueber
|
28
|
-
|
16
|
+
|
29
17
|
# First, add Enumerable to the interface
|
30
18
|
Java::org.marc4j.MarcReader.module_eval("include Enumerable")
|
31
19
|
|
32
|
-
|
33
20
|
class Reader
|
34
|
-
|
21
|
+
|
22
|
+
ENCODINGS = ['UTF-8', 'ISO-8859-1', 'MARC-8']
|
23
|
+
ENCODING_ALIASES = {:utf8 => 'UTF-8', :marc8 => 'MARC-8', :iso => 'ISO-8859-1'}
|
24
|
+
attr_reader :handle
|
25
|
+
|
35
26
|
# Get a marc reader of the appropriate type
|
36
27
|
# @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
|
37
28
|
# @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
|
29
|
+
# @param [:utf8, :iso, :marc8, 'UTF-8', 'ISO-8859-1', 'MARC-8'] An explicit encoding
|
38
30
|
# @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
|
39
31
|
#
|
40
32
|
# @example Get a strict binary MARC reader for the file 'test.mrc'
|
41
33
|
# reader = MARC4J4R::Reader.new('test.mrc')
|
34
|
+
# reader = MARC4J4R::Reader.new('test.mrc', :strictmarc) # same thing; :strictmarc is the default
|
35
|
+
#
|
36
|
+
# @example Get a strict binary MARC reader for the file 'test.mrc', force input to be treated as utf-8
|
37
|
+
# reader = MARC4J4R::Reader.new('test.mrc', :strictmarc, :utf8)
|
42
38
|
#
|
43
39
|
# @example Get a permissive binary MARC reader
|
44
40
|
# reader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
|
@@ -51,14 +47,20 @@ module MARC4J4R
|
|
51
47
|
# infile = open('http://my.machine.com/test.mrc')
|
52
48
|
# reader = MARC4J4R::Reader.new(infile)
|
53
49
|
|
54
|
-
|
55
|
-
|
50
|
+
def self.new(input, type = :strictmarc, encoding = nil)
|
51
|
+
if encoding
|
52
|
+
encoding = ENCODING_ALIASES[encoding] if ENCODING_ALIASES[encoding]
|
53
|
+
unless ENCODINGS.include? encoding
|
54
|
+
raise ArgumentError, "Encoding must be in [#{ENCODINGS.map {|x| '"' + x + '"'}.join(', ')}], not \"#{encoding}\""
|
55
|
+
end
|
56
|
+
end
|
56
57
|
@handle = IOConvert.byteinstream(input)
|
57
58
|
case type
|
58
59
|
when :strictmarc then
|
59
|
-
return Java::org.marc4j.MarcStreamReader.new(@handle)
|
60
|
+
return Java::org.marc4j.MarcStreamReader.new(@handle, encoding)
|
60
61
|
when :permissivemarc then
|
61
|
-
|
62
|
+
encoding ||= 'BESTGUESS'
|
63
|
+
return Java::org.marc4j.MarcPermissiveStreamReader.new(@handle, true, true, encoding)
|
62
64
|
when :marcxml then
|
63
65
|
return Java::org.marc4j.MarcXmlReader.new(@handle)
|
64
66
|
when :alephsequential then
|
data/lib/marc4j4r/record.rb
CHANGED
@@ -157,7 +157,7 @@ module MARC4J4R
|
|
157
157
|
def to_marc
|
158
158
|
begin
|
159
159
|
s = Java::java.io.ByteArrayOutputStream.new
|
160
|
-
writer = org.marc4j.
|
160
|
+
writer = org.marc4j.MarcPermissiveStreamWriter.new(s)
|
161
161
|
writer.write(self)
|
162
162
|
@marcbinary = s.to_string
|
163
163
|
return @marcbinary
|
@@ -0,0 +1 @@
|
|
1
|
+
02137nam^a22005531a^450000100100000000500170001000600190002700700150004600800410006102000130010203500210011503500230013603500280015904000260018704300130021324501480022624501280037425000180050225000220052026000980054226000870064030000590072750400290078653800300081561000420084561000390088765000200092665000350094665000350098165000370101671000410105371000390109471000430113371000450117671000430122171000420126485200410130685200190134797000130136697000280137997100080140797200130141597300170142897300190144597400370146497400310150199800130153298700380154500078551420071203110617.0m^^^^^^^^d^^^^^^^^cr^bn^---auaua010425s1982^^^^cc^af^^^^b^^^^00000^chi^^ cRMBY0.95 a(RLIN)MIUO83-B61 a(OCoLC)ocm10694745 asdr-nrlfGLAD117991861-B aMiU-AcMiU-AdCStRLIN aa--cc---00601aXiang E Gan Su qu shi gao /cHunan sheng she hui ke xue yuan, Wuhan shi fan xue yuan li shi xi, Yichun Diqu shi liao zheng ji ban bian xie.00601a湘鄂贛苏区史稿 /c湖南省社会科学院, 武汉师范学院历史系, 宜春地区史料征集办编写.9chi 602aDi 1 ban. 602a第1版.9chi 603aChangsha Shi :bHunan ren min chu ban she :bHunan sheng xin hua shu dian fa xing,c1982. 603a长沙市 :b湖南人民出版社 :b湖南省新華書店发行,c1982.9chi a8, 294 p., [1] folded leaf of plates :bill. ;c21 cm. aBibliography: p.286-294. aMode of access: Internet.20aZhongguo gong chan dang604xHistory.24604a中国共产党xHistory.9chi 0aSovietszChina. 0aCommunismzChinazHunan Sheng. 0aCommunismzChinazHubei Sheng. 0aCommunismzChinazJiangxi Sheng.2 605aHunan sheng she hui ke xue yuan.2 605a湖南省社会科学院.9chi2 aWuhan shi fan xue yuan.bLi shi xi6062 606a武汉师范学院.b历史系.9chi2 607aYichun Diqu shi liao zheng ji ban.2 607a宜春地区史料征集办.9chi0 aMiUbBUHRcASIAhJQ1519 .A5C56 H7316 auc1bSDRcNRLF aBKbBook aCEbElectronic Resource aMiU c20040625 aHTbavail_ht aACbavail_circ umdp.39015058511034ricd20091120 uuc1.b3389730ricd20100524 cSYs9114 aPINYINbCStRLINc20010219dce1.1
|
data/spec/reader_spec.rb
CHANGED
@@ -25,7 +25,64 @@ describe "MARC4J4R_reader" do
|
|
25
25
|
recs.size.should.equal 10
|
26
26
|
end
|
27
27
|
|
28
|
+
it "Bails if passed an illegal type" do
|
29
|
+
lambda{reader = MARC4J4R::Reader.new("#{DIR}/batch.xml", :marc)}.should.raise(ArgumentError).message.should.match(/^Reader type/)
|
30
|
+
end
|
31
|
+
it "Bails if passed an illegal encoding" do
|
32
|
+
lambda{reader = MARC4J4R::Reader.new("#{DIR}/batch.xml", :strictmarc, :utf)}.should.raise(ArgumentError).message.should.match(/^Encoding/)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "Reader encodings, strict reader" do
|
37
|
+
it "Works with UTF-8 without explicit encoding" do
|
38
|
+
reader = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat")
|
39
|
+
r = reader.first
|
40
|
+
second_title = r.find_by_tag('245')[1]
|
41
|
+
second_title['a'].should.equal "湘鄂贛苏区史稿 /"
|
42
|
+
end
|
43
|
+
it "Works with UTF-8, explicit encoding" do
|
44
|
+
reader = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat", :strictmarc, :utf8)
|
45
|
+
r = reader.first
|
46
|
+
second_title = r.find_by_tag('245')[1]
|
47
|
+
second_title['a'].should.equal "湘鄂贛苏区史稿 /"
|
48
|
+
end
|
49
|
+
it "Fails with UTF-8, wrong explicit encoding" do
|
50
|
+
reader = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat", :strictmarc, :marc8)
|
51
|
+
r = reader.first
|
52
|
+
second_title = r.find_by_tag('245')[1]
|
53
|
+
second_title['a'].should.not.equal "湘鄂贛苏区史稿 /"
|
54
|
+
end
|
55
|
+
it "Fails with UTF-8, wrong explicit encoding" do
|
56
|
+
reader = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat", :strictmarc, :iso)
|
57
|
+
r = reader.first
|
58
|
+
second_title = r.find_by_tag('245')[1]
|
59
|
+
second_title['a'].should.not.equal "湘鄂贛苏区史稿 /"
|
60
|
+
end
|
28
61
|
end
|
62
|
+
|
63
|
+
describe "Reader encodings, permissive reader" do
|
64
|
+
it "Works with UTF-8 without explicit encoding" do
|
65
|
+
reader = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat", :permissivemarc)
|
66
|
+
r = reader.first
|
67
|
+
second_title = r.find_by_tag('245')[1]
|
68
|
+
second_title['a'].should.equal "湘鄂贛苏区史稿 /"
|
69
|
+
end
|
70
|
+
it "Works with UTF-8, explicit encoding" do
|
71
|
+
reader = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat", :permissivemarc, :utf8)
|
72
|
+
r = reader.first
|
73
|
+
second_title = r.find_by_tag('245')[1]
|
74
|
+
second_title['a'].should.equal "湘鄂贛苏区史稿 /"
|
75
|
+
end
|
76
|
+
it "Fails with UTF-8, wrong explicit encoding" do
|
77
|
+
reader = MARC4J4R::Reader.new("#{DIR}/chinese_utf8.dat", :permissivemarc, :iso)
|
78
|
+
r = reader.first
|
79
|
+
second_title = r.find_by_tag('245')[1]
|
80
|
+
second_title['a'].should.equal "湘鄂贛苏区史稿 /" # equal, because it's permissive and tries to do the right thing
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
|
29
86
|
|
30
87
|
describe "Reader round-tripping" do
|
31
88
|
before do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc4j4r
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 59
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 9
|
9
|
+
- 0
|
10
|
+
version: 0.9.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- BillDueber
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-08-10 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -52,11 +52,11 @@ executables: []
|
|
52
52
|
|
53
53
|
extensions: []
|
54
54
|
|
55
|
-
extra_rdoc_files:
|
56
|
-
|
55
|
+
extra_rdoc_files:
|
56
|
+
- README.rdoc
|
57
57
|
files:
|
58
58
|
- LICENSE
|
59
|
-
- README.
|
59
|
+
- README.rdoc
|
60
60
|
- Rakefile
|
61
61
|
- VERSION
|
62
62
|
- jars/marc4j.jar
|
@@ -69,6 +69,7 @@ files:
|
|
69
69
|
- spec/batch.dat
|
70
70
|
- spec/batch.txt
|
71
71
|
- spec/batch.xml
|
72
|
+
- spec/chinese_utf8.dat
|
72
73
|
- spec/controlfield_spec.rb
|
73
74
|
- spec/datafield_spec.rb
|
74
75
|
- spec/one.dat
|