marc4j4r 0.2.2 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +128 -0
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/lib/marc4j4r/datafield.rb +2 -2
- metadata +74 -66
- data/README.rdoc +0 -17
- data/lib/original_monolithic_file.rb +0 -518
data/README.markdown
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# marc4j4r
|
2
|
+
|
3
|
+
A ruby wrapper around the marc4j.jar (as forked by javamarc) java library for dealing with library MARC data.
|
4
|
+
|
5
|
+
## Getting a MARC reader
|
6
|
+
|
7
|
+
marc4j4r provides three readers out of the box: :strictmarc (binary), :permissivemarc (:binary), and :marcxml (MARC-XML).
|
8
|
+
You can pass either a filename or an open IO object (either ruby or java.io.inputstream)
|
9
|
+
|
10
|
+
require 'marc4j4r'
|
11
|
+
|
12
|
+
binreader = MARC4J4R::Reader.new('test.mrc') # defaults to :strictmarc
|
13
|
+
binreader = MARC4J4R::Reader.new('test.mrc', :strictmarc)
|
14
|
+
|
15
|
+
permissivereader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
|
16
|
+
|
17
|
+
xmlreader = MARC4J4R::Reader.new('test.xml', :marcxml)
|
18
|
+
|
19
|
+
# Or use a file object
|
20
|
+
|
21
|
+
reader = MARC4J4R::Reader.new(File.open('test.mrc'))
|
22
|
+
|
23
|
+
# Or a java.io.inputstream
|
24
|
+
|
25
|
+
jurl = Java::java.net.URL.new('http://my.machine.com/test.mrc')
|
26
|
+
istream = jurl.openConnection.getInputStream
|
27
|
+
reader = MARC4J4R::Reader.new(istream)
|
28
|
+
|
29
|
+
## Using the reader
|
30
|
+
|
31
|
+
A MARC4J4R::Reader is an Enumerable, so you can do:
|
32
|
+
|
33
|
+
reader.each do |record|
|
34
|
+
# do stuff with the record
|
35
|
+
end
|
36
|
+
|
37
|
+
Or, if you're using [threach](http://rdoc.info/projects/billdueber/threach):
|
38
|
+
|
39
|
+
reader.threach(2) do |record|
|
40
|
+
# do stuff with records in two threads
|
41
|
+
end
|
42
|
+
|
43
|
+
## Using the writer
|
44
|
+
|
45
|
+
The writer code has not yet been tested; it *should* work as follows:
|
46
|
+
|
47
|
+
binaryWriter = MARC4J4R::Writer.new(filename, :strictmarc)
|
48
|
+
xmlWriter = MARC4J4R::Writer.new(filename, :marcxml)
|
49
|
+
|
50
|
+
writer.write(record)
|
51
|
+
# repeat
|
52
|
+
writer.close
|
53
|
+
|
54
|
+
|
55
|
+
## Working with records and fields
|
56
|
+
|
57
|
+
In addition to all the normal marc4j methods, MARC4J4R::Record exposes some additional methods
|
58
|
+
and syntaxes.
|
59
|
+
|
60
|
+
**See the classes themselves and/or the specs for more examples.**
|
61
|
+
|
62
|
+
leader = record.leader
|
63
|
+
|
64
|
+
# All fields are available via #each or #fields
|
65
|
+
|
66
|
+
fields = record.fields
|
67
|
+
|
68
|
+
record.each do |field|
|
69
|
+
# do something with each controlfield/datafield; returned in the order they were added
|
70
|
+
end
|
71
|
+
|
72
|
+
# Controlfields have a tag and a value
|
73
|
+
|
74
|
+
idfield = record['001']
|
75
|
+
idfield.tag # => '001'
|
76
|
+
id = idfield.value # or idfield.data, same thing
|
77
|
+
|
78
|
+
# Get the first datafield with a given tag
|
79
|
+
first700 = record['700'] # Note: need to use strings, not integers
|
80
|
+
|
81
|
+
# Stringify a field to get all the subfields joined with spaces
|
82
|
+
|
83
|
+
fullTitle = record['245'].to_s
|
84
|
+
|
85
|
+
all700s = record.find_by_tag '700'
|
86
|
+
all700and856s = record.find_by_tag ['700', '856']
|
87
|
+
|
88
|
+
|
89
|
+
# Construct and add a controlfield
|
90
|
+
record << MARC4J4R::ControlField.new('001', '0000333234')
|
91
|
+
|
92
|
+
# Construct and add a datafield
|
93
|
+
df = MARC4J4R::DataField.new(tag, ind1, ind2)
|
94
|
+
|
95
|
+
ind1 = df.ind1
|
96
|
+
ind2 = df.ind2
|
97
|
+
|
98
|
+
df << MARC4J4R::Subfield.new('a', 'the $a value')
|
99
|
+
df << MARC4J4R::Subfield.new('b', 'the $b value')
|
100
|
+
|
101
|
+
# Add it to a record
|
102
|
+
|
103
|
+
record << df
|
104
|
+
|
105
|
+
# Get subfields or their values
|
106
|
+
|
107
|
+
firstSubfieldAValue = df['a']
|
108
|
+
|
109
|
+
allSubfields = df.subs
|
110
|
+
allSubfieldAs = df.subs('a')
|
111
|
+
allSubfieldAorBs = df.subs(['a', 'b'])
|
112
|
+
|
113
|
+
allSubfieldAorBValues = df.sub_values(['a', 'b'])
|
114
|
+
|
115
|
+
|
116
|
+
## Note on Patches/Pull Requests
|
117
|
+
|
118
|
+
* Fork the project.
|
119
|
+
* Make your feature addition or bug fix.
|
120
|
+
* Add tests for it. This is important so I don't break it in a
|
121
|
+
future version unintentionally.
|
122
|
+
* Commit, do not mess with rakefile, version, or history.
|
123
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
124
|
+
* Send me a pull request. Bonus points for topic branches.
|
125
|
+
|
126
|
+
## Copyright
|
127
|
+
|
128
|
+
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -13,6 +13,7 @@ begin
|
|
13
13
|
gem.authors = ["BillDueber"]
|
14
14
|
gem.add_development_dependency "bacon", ">= 0"
|
15
15
|
gem.add_development_dependency "yard", ">= 0"
|
16
|
+
|
16
17
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
18
|
end
|
18
19
|
Jeweler::GemcutterTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.4
|
data/lib/marc4j4r/datafield.rb
CHANGED
@@ -95,7 +95,7 @@ module MARC4J4R
|
|
95
95
|
code = [code]
|
96
96
|
end
|
97
97
|
|
98
|
-
return self.
|
98
|
+
return self.select {|s| code.include? s.code}
|
99
99
|
end
|
100
100
|
|
101
101
|
# Get all values from the subfields for the given code or array of codes
|
@@ -110,7 +110,7 @@ module MARC4J4R
|
|
110
110
|
# rec['260'].sub_values(['a', 'c']) #=> ["New York,", "1969"]
|
111
111
|
# rec['260'].sub_values(['c', 'a']) #=> ["New York,", "1969"]
|
112
112
|
|
113
|
-
def sub_values(code)
|
113
|
+
def sub_values(code=nil)
|
114
114
|
return self.subs(code).collect {|s| s.value}
|
115
115
|
end
|
116
116
|
|
metadata
CHANGED
@@ -1,46 +1,51 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc4j4r
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 31
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
version: 0.2.
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 4
|
10
|
+
version: 0.2.4
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
|
-
|
13
|
+
- BillDueber
|
13
14
|
autorequire:
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2010-07-
|
18
|
+
date: 2010-07-23 00:00:00 -04:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: bacon
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: yard
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
44
49
|
description: Syntactic sugar and some extra methods to deal with MARC data using the java .jar marc4j
|
45
50
|
email: bill@dueber.com
|
46
51
|
executables: []
|
@@ -50,56 +55,59 @@ extensions: []
|
|
50
55
|
extra_rdoc_files: []
|
51
56
|
|
52
57
|
files:
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
- spec/spec_helper.rb
|
58
|
+
- LICENSE
|
59
|
+
- README.markdown
|
60
|
+
- Rakefile
|
61
|
+
- VERSION
|
62
|
+
- jars/marc4j.jar
|
63
|
+
- lib/marc4j4r.rb
|
64
|
+
- lib/marc4j4r/controlfield.rb
|
65
|
+
- lib/marc4j4r/datafield.rb
|
66
|
+
- lib/marc4j4r/reader.rb
|
67
|
+
- lib/marc4j4r/record.rb
|
68
|
+
- lib/marc4j4r/writer.rb
|
69
|
+
- spec/batch.dat
|
70
|
+
- spec/batch.txt
|
71
|
+
- spec/batch.xml
|
72
|
+
- spec/controlfield_spec.rb
|
73
|
+
- spec/datafield_spec.rb
|
74
|
+
- spec/one.dat
|
75
|
+
- spec/one.txt
|
76
|
+
- spec/one.xml
|
77
|
+
- spec/reader_spec.rb
|
78
|
+
- spec/record_spec.rb
|
79
|
+
- spec/spec_helper.rb
|
76
80
|
has_rdoc: true
|
77
81
|
homepage: http://github.com/billdueber/javamarc/tree/master/ruby/marc4j4r/
|
78
82
|
licenses: []
|
79
83
|
|
80
84
|
post_install_message:
|
81
85
|
rdoc_options:
|
82
|
-
|
86
|
+
- --charset=UTF-8
|
83
87
|
require_paths:
|
84
|
-
|
88
|
+
- lib
|
85
89
|
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
86
91
|
requirements:
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
hash: 3
|
95
|
+
segments:
|
96
|
+
- 0
|
97
|
+
version: "0"
|
92
98
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
93
100
|
requirements:
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
hash: 3
|
104
|
+
segments:
|
105
|
+
- 0
|
106
|
+
version: "0"
|
99
107
|
requirements: []
|
100
108
|
|
101
109
|
rubyforge_project:
|
102
|
-
rubygems_version: 1.3.
|
110
|
+
rubygems_version: 1.3.7
|
103
111
|
signing_key:
|
104
112
|
specification_version: 3
|
105
113
|
summary: Use marc4j java library in JRuby in a more ruby-ish way
|
data/README.rdoc
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
= marc4j4r
|
2
|
-
|
3
|
-
Description goes here.
|
4
|
-
|
5
|
-
== Note on Patches/Pull Requests
|
6
|
-
|
7
|
-
* Fork the project.
|
8
|
-
* Make your feature addition or bug fix.
|
9
|
-
* Add tests for it. This is important so I don't break it in a
|
10
|
-
future version unintentionally.
|
11
|
-
* Commit, do not mess with rakefile, version, or history.
|
12
|
-
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
-
* Send me a pull request. Bonus points for topic branches.
|
14
|
-
|
15
|
-
== Copyright
|
16
|
-
|
17
|
-
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
@@ -1,518 +0,0 @@
|
|
1
|
-
unless defined? JRUBY_VERSION
|
2
|
-
raise "Only works under JRUBY"
|
3
|
-
end
|
4
|
-
|
5
|
-
begin
|
6
|
-
include_class Java::org.marc4j.marc.impl.RecordImpl
|
7
|
-
rescue NameError => e
|
8
|
-
jardir = File.join(File.dirname(__FILE__), '..', 'jars')
|
9
|
-
require "#{jardir}/marc4j.jar"
|
10
|
-
end
|
11
|
-
|
12
|
-
require 'set'
|
13
|
-
|
14
|
-
|
15
|
-
# Re-open the MarcReader interface, define #each and include Enumerable
|
16
|
-
#
|
17
|
-
# We also automatically call #hashify on the records that stream through
|
18
|
-
# #each in order to speed up RecordImpl#[] when (a) doing many operations on a single
|
19
|
-
# record, and (b) we're not worried about interleaved tags (e.g., a 520 followed by a 510 followed
|
20
|
-
# by another 520)
|
21
|
-
|
22
|
-
module Java::OrgMarc4j::MarcReader
|
23
|
-
include Enumerable
|
24
|
-
|
25
|
-
# Return the next record, after calling #hashify on it
|
26
|
-
def each(hashify=true)
|
27
|
-
while self.hasNext
|
28
|
-
r = self.next
|
29
|
-
r.hashify if hashify
|
30
|
-
yield r
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
module MARC4J4R
|
37
|
-
|
38
|
-
# Do some simple substitutions to make things prettier. After this,
|
39
|
-
# we can use MARC4J4R::Record instead of Java::org.marc4j.marc.impl::RecordImpl
|
40
|
-
|
41
|
-
Record = Java::org.marc4j.marc.impl::RecordImpl
|
42
|
-
ControlField = Java::org.marc4j.marc.impl::ControlFieldImpl
|
43
|
-
DataField = Java::org.marc4j.marc.impl::DataFieldImpl
|
44
|
-
SubField = Java::org.marc4j.marc.impl::SubfieldImpl
|
45
|
-
|
46
|
-
|
47
|
-
# Add some sugar to the MarcReader interface
|
48
|
-
#
|
49
|
-
# Adjust the interface so that a #new call to any implementations that
|
50
|
-
# implement it can take a java.io.InputStream, ruby IO obejct, or String
|
51
|
-
# (that will be interpreted as a filename) without complaining.
|
52
|
-
#
|
53
|
-
# The mechanism -- running module_eval on a string-representation of the
|
54
|
-
# new method in each of the hard-coded implementations of MarcReader
|
55
|
-
# (MarcStreamReader,MarcPermissiveStreamReader,MarcXmlReader) -- is ugly
|
56
|
-
# and deeply unsettling.
|
57
|
-
#
|
58
|
-
# @author Bill Dueber
|
59
|
-
#
|
60
|
-
# A string used to override the initializer for each stream reader
|
61
|
-
# Need to do it this ugly way because of the way java and ruby interact;
|
62
|
-
# can't just add it to the MarcReader interface the way I wanted to.
|
63
|
-
|
64
|
-
NEWINIT = <<-ENDBINDER
|
65
|
-
include Enumerable
|
66
|
-
alias_method :oldinit, :initialize
|
67
|
-
def initialize(fromwhere)
|
68
|
-
stream = nil
|
69
|
-
if fromwhere.is_a? Java::JavaIO::InputStream or fromwhere.is_a? Java::JavaIO::ByteArrayInputStream
|
70
|
-
stream = fromwhere
|
71
|
-
elsif fromwhere.is_a? IO
|
72
|
-
stream = fromwhere.to_inputstream
|
73
|
-
else
|
74
|
-
stream = java.io.FileInputStream.new(fromwhere.to_java_string)
|
75
|
-
end
|
76
|
-
if self.class == Java::org.marc4j.MarcPermissiveStreamReader
|
77
|
-
self.oldinit(stream, true, true)
|
78
|
-
else
|
79
|
-
self.oldinit(stream)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
ENDBINDER
|
83
|
-
|
84
|
-
Java::org.marc4j.MarcStreamReader.module_eval(NEWINIT)
|
85
|
-
Java::org.marc4j.MarcPermissiveStreamReader.module_eval(NEWINIT)
|
86
|
-
Java::org.marc4j.MarcXmlReader.module_eval(NEWINIT)
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
# Get a marc reader of the appropriate type
|
91
|
-
# @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
|
92
|
-
# @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
|
93
|
-
# @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
|
94
|
-
#
|
95
|
-
# @example Get a strict binary MARC reader for the file 'test.mrc'
|
96
|
-
# reader = MARC4J4R.reader('test.mrc')
|
97
|
-
#
|
98
|
-
# @example Get a permissive binary MARC reader
|
99
|
-
# reader = MARC4J4R.reader('test.mrc', :permissivemarc)
|
100
|
-
#
|
101
|
-
# @example Get a reader for an xml file
|
102
|
-
# reader = MARC4J4R.reader('test.xml', :marcxml)
|
103
|
-
#
|
104
|
-
# @example Get a reader based on an existing IO object
|
105
|
-
# require 'open-uri'
|
106
|
-
# infile = open('http://my.machine.com/test.mrc')
|
107
|
-
# reader = MARC4J4R.reader(infile)
|
108
|
-
|
109
|
-
def reader(input, type = :strictmarc)
|
110
|
-
case type
|
111
|
-
when :strictmarc then
|
112
|
-
return Java::org.marc4j.MarcStreamReader.new(input)
|
113
|
-
when :permissivemarc then
|
114
|
-
return Java::org.marc4j.MarcPermissiveStreamReader.new(input)
|
115
|
-
when :marcxml then
|
116
|
-
return Java::org.marc4j.MarcXmlReader.new(input)
|
117
|
-
when :alephsequential then
|
118
|
-
return MARC4J4R::AlephSequentialReader.new(input)
|
119
|
-
else
|
120
|
-
raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
|
121
|
-
end
|
122
|
-
end
|
123
|
-
module_function :reader
|
124
|
-
|
125
|
-
|
126
|
-
# Implement an AlephSequential reader
|
127
|
-
class AlephSequentialReader
|
128
|
-
include Enumerable
|
129
|
-
def initialize(fromwhere)
|
130
|
-
stream = nil
|
131
|
-
if fromwhere.is_a? Java::JavaIO::InputStream
|
132
|
-
stream = fromwhere.to_io
|
133
|
-
elsif fromwhere.is_a? IO
|
134
|
-
stream = fromwhere
|
135
|
-
else
|
136
|
-
stream = File.new(fromwhere)
|
137
|
-
end
|
138
|
-
|
139
|
-
@handle = stream
|
140
|
-
end
|
141
|
-
|
142
|
-
def each
|
143
|
-
record = nil
|
144
|
-
currentID = nil
|
145
|
-
|
146
|
-
@handle.each_line do |l|
|
147
|
-
l.chomp!
|
148
|
-
next unless l =~ /\S/
|
149
|
-
vals = l.unpack('a9 a a3 c c a3 a*')
|
150
|
-
id, tag, ind1, ind2, data = vals[0], vals[2], vals[3], vals[4], vals[6]
|
151
|
-
# id, tag, ind1, ind2, junk, data = *(l.unpack('A10 a3 c c a3 A*'))
|
152
|
-
if id != currentID
|
153
|
-
if record
|
154
|
-
yield record
|
155
|
-
end
|
156
|
-
record = RecordImpl.new
|
157
|
-
currentID = id
|
158
|
-
end
|
159
|
-
if tag == 'LDR'
|
160
|
-
record.setLeader(Java::org.marc4j.marc.impl.LeaderImpl.new(data))
|
161
|
-
else
|
162
|
-
record << buildField(tag,ind1,ind2,data)
|
163
|
-
end
|
164
|
-
end
|
165
|
-
yield record
|
166
|
-
end
|
167
|
-
|
168
|
-
|
169
|
-
SUBREGEXP = /\$\$(.)/
|
170
|
-
def buildField (tag, ind1, ind2, data)
|
171
|
-
if Java::org.marc4j.marc.impl.Verifier.isControlField tag
|
172
|
-
return Java::org.marc4j.marc.impl.ControlFieldImpl.new(tag, data)
|
173
|
-
else
|
174
|
-
f = Java::org.marc4j.marc.impl.DataFieldImpl.new(tag, ind1, ind2)
|
175
|
-
data.split(SUBREGEXP)[1..-1].each_slice(2) do |code, value|
|
176
|
-
f.addSubfield Java::org.marc4j.marc.impl.SubfieldImpl.new(code[0].ord, value)
|
177
|
-
end
|
178
|
-
return f
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
end # End of class AlephSequentialReader
|
183
|
-
|
184
|
-
end
|
185
|
-
|
186
|
-
|
187
|
-
# Open up RecordImpl to add some sugar, including Enumberable as well
|
188
|
-
# @author Bill Dueber
|
189
|
-
|
190
|
-
class Record
|
191
|
-
include Enumerable
|
192
|
-
|
193
|
-
alias_method :<<, :addVariableField
|
194
|
-
alias_method :append, :addVariableField
|
195
|
-
alias_method :fields, :getVariableFields
|
196
|
-
|
197
|
-
# Export as a MARC-Hash, as described at
|
198
|
-
# http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
|
199
|
-
# @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
|
200
|
-
def to_marchash
|
201
|
-
h = {}
|
202
|
-
h['type'] = 'marc-hash'
|
203
|
-
h['version'] = [1,0]
|
204
|
-
h['leader'] = self.leader
|
205
|
-
|
206
|
-
fields = []
|
207
|
-
|
208
|
-
self.getVariableFields.each do |f|
|
209
|
-
if f.controlField?
|
210
|
-
fields << [f.tag, f.value]
|
211
|
-
else
|
212
|
-
farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
|
213
|
-
subs = []
|
214
|
-
f.each do |subfield|
|
215
|
-
subs << [subfield.code, subfield.value]
|
216
|
-
end
|
217
|
-
farray.push subs
|
218
|
-
fields << farray
|
219
|
-
end
|
220
|
-
end
|
221
|
-
h['fields'] = fields
|
222
|
-
return h
|
223
|
-
end
|
224
|
-
|
225
|
-
# Create a local hash by tag number; makes some stuff faster
|
226
|
-
# Called automatically if you use reader.each
|
227
|
-
|
228
|
-
def hashify
|
229
|
-
return if @hashedtags # don't do it more than once
|
230
|
-
@hashedtags = {}
|
231
|
-
self.getVariableFields.each do |f|
|
232
|
-
@hashedtags[f.tag] ||= []
|
233
|
-
@hashedtags[f.tag].push f
|
234
|
-
end
|
235
|
-
end
|
236
|
-
|
237
|
-
# Create a nice string of the record
|
238
|
-
def to_s
|
239
|
-
arr = ['LEADER ' + self.leader]
|
240
|
-
self.each do |f|
|
241
|
-
arr.push f.to_s
|
242
|
-
end
|
243
|
-
return arr.join("\n")
|
244
|
-
end
|
245
|
-
|
246
|
-
# Get the leader as a string (marc4j would otherwise return Leader object)
|
247
|
-
def leader
|
248
|
-
self.get_leader.toString
|
249
|
-
end
|
250
|
-
|
251
|
-
|
252
|
-
# Cycle through the fields in the order the appear in the record
|
253
|
-
def each
|
254
|
-
self.getVariableFields.each do |f|
|
255
|
-
yield f
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
# Get the first field associated with a tag
|
260
|
-
# @param [String] tag The tag
|
261
|
-
# @return [Field] The first matching field, or nil if none. Note that
|
262
|
-
# to mirror ruby-marc, this returns a single field
|
263
|
-
|
264
|
-
def [] tag
|
265
|
-
if defined? @hashedtags
|
266
|
-
if @hashedtags[tag]
|
267
|
-
return @hashedtags[tag][0]
|
268
|
-
else
|
269
|
-
return nil
|
270
|
-
end
|
271
|
-
else
|
272
|
-
return self.getVariableField(tag)
|
273
|
-
end
|
274
|
-
end
|
275
|
-
|
276
|
-
|
277
|
-
# Get a (possibly empty) list of fields with the given tag(s)
|
278
|
-
#
|
279
|
-
# @param [String, Array<String>] tags A string (or Array of strings) with the tags you're interested in
|
280
|
-
# @param [Boolean] originalorder Whether or not results should be presented in the original order within the
|
281
|
-
# record or with a two-column sort of (a) Order of the tag in the list of tags sent, (b) order within that tag
|
282
|
-
# in the record
|
283
|
-
# @return [Array<Field>] Either an empty list or a list of one or more matched fields will be returned.
|
284
|
-
#
|
285
|
-
# originalorder == false will use an internal hash and be faster in many cases (see #hashify)
|
286
|
-
#
|
287
|
-
# @example originalorder == false
|
288
|
-
# # Given a record that looks like
|
289
|
-
# # 010 $a 68027371
|
290
|
-
# # 035 $a (RLIN)MIUG0001728-B
|
291
|
-
# # 035 $a (CaOTULAS)159818044
|
292
|
-
# # 035 $a (OCoLC)ocm00001728
|
293
|
-
#
|
294
|
-
# r.find_by_tag(['035', '010']).each {|f| puts f.to_s}
|
295
|
-
# # 035 $a (RLIN)MIUG0001728-B
|
296
|
-
# # 035 $a (CaOTULAS)159818044
|
297
|
-
# # 035 $a (OCoLC)ocm00001728
|
298
|
-
# # 010 $a 68027371
|
299
|
-
#
|
300
|
-
# # The results are ordered first by tag as passed in, then by original order within the tag
|
301
|
-
#
|
302
|
-
# @example Just get all fields for a single tag
|
303
|
-
# ohThirtyFives = r.find_by_tag('035')
|
304
|
-
#
|
305
|
-
# @example Get a bunch of standard identifiers
|
306
|
-
# standardIDs = r.find_by_tag(['022', '020', '010'])
|
307
|
-
#
|
308
|
-
# @example originalorder == true
|
309
|
-
# r.find_by_tag(['035', '010'], true).each {|f| puts f.to_s}
|
310
|
-
# # 010 $a 68027371
|
311
|
-
# # 035 $a (RLIN)MIUG0001728-B
|
312
|
-
# # 035 $a (CaOTULAS)159818044
|
313
|
-
# # 035 $a (OCoLC)ocm00001728
|
314
|
-
|
315
|
-
def find_by_tag(tags, originalorder = false)
|
316
|
-
self.hashify unless @hashedtags and !originalorder
|
317
|
-
if !tags.is_a? Array
|
318
|
-
return @hashedtags[tags] || []
|
319
|
-
end
|
320
|
-
if originalorder
|
321
|
-
return self.find_all {|f| tags.include? f.tag}
|
322
|
-
else
|
323
|
-
# puts "Tags is #{tags}: got #{@hashedtags.values_at(*tags)}"
|
324
|
-
return @hashedtags.values_at(*tags).flatten.compact
|
325
|
-
end
|
326
|
-
end
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
# Return the record as valid MARC-XML
|
331
|
-
# @return String A MARC-XML representation of the record, including the XML header
|
332
|
-
def to_xml
|
333
|
-
return @xml if @xml
|
334
|
-
begin
|
335
|
-
@xml = java.io.StringWriter.new
|
336
|
-
res = javax.xml.transform.stream.StreamResult.new(@xml)
|
337
|
-
writer = org.marc4j.MarcXmlWriter.new(res)
|
338
|
-
writer.write(self)
|
339
|
-
writer.writeEndDocument
|
340
|
-
return @xml.toString
|
341
|
-
rescue
|
342
|
-
"Woops! to_xml failed for record #{self['001'].data}: #{$!}"
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
def to_marc
|
347
|
-
begin
|
348
|
-
s = Java::java.io.ByteArrayOutputStream.new
|
349
|
-
writer = org.marc4j.MarcStreamWriter.new(s)
|
350
|
-
writer.write(self)
|
351
|
-
@marcbinary = s.to_string
|
352
|
-
puts @marcbinary
|
353
|
-
return @marcbinary
|
354
|
-
rescue
|
355
|
-
# "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
|
356
|
-
"Whoops! Failed: #{$!}"
|
357
|
-
end
|
358
|
-
end
|
359
|
-
|
360
|
-
|
361
|
-
end
|
362
|
-
|
363
|
-
class ControlField
|
364
|
-
def value
|
365
|
-
return self.data
|
366
|
-
end
|
367
|
-
|
368
|
-
def controlField?
|
369
|
-
return true
|
370
|
-
end
|
371
|
-
|
372
|
-
def self.control_tag? tag
|
373
|
-
return Java::org.marc4j.marc.impl.Verifier.isControlField tag
|
374
|
-
end
|
375
|
-
|
376
|
-
# Pretty-print
|
377
|
-
# @param [String] joiner What string to use to join the subfields
|
378
|
-
# @param [String] The pretty string
|
379
|
-
def to_s
|
380
|
-
return self.tag + " " + self.value
|
381
|
-
end
|
382
|
-
|
383
|
-
def == other
|
384
|
-
self.tag == other.tag && self.value == other.value
|
385
|
-
end
|
386
|
-
|
387
|
-
end
|
388
|
-
|
389
|
-
class DataField
|
390
|
-
include Enumerable
|
391
|
-
|
392
|
-
alias_method :<<, :addSubfield
|
393
|
-
|
394
|
-
|
395
|
-
def controlField?
|
396
|
-
return false
|
397
|
-
end
|
398
|
-
|
399
|
-
# Broken. Need to check subs as well
|
400
|
-
def == other
|
401
|
-
self.tag == other.tag and
|
402
|
-
self.indicator1 == other.indicator1 and
|
403
|
-
self.indicator2 == other.indicator2
|
404
|
-
end
|
405
|
-
|
406
|
-
# Pretty-print
|
407
|
-
# @param [String] joiner What string to use to join the subfields
|
408
|
-
# @param [String] The pretty string
|
409
|
-
def to_s (joiner = ' ')
|
410
|
-
arr = [self.tag + ' ' + self.indicator1 + self.indicator2]
|
411
|
-
self.each do |s|
|
412
|
-
arr.push s.to_s
|
413
|
-
end
|
414
|
-
return arr.join(joiner)
|
415
|
-
end
|
416
|
-
|
417
|
-
# Get the value of the first subfield of this field with the given code
|
418
|
-
# @param [String] code 1-character string of the subfield code
|
419
|
-
# @return [String] The value of the first matched subfield
|
420
|
-
def [] code
|
421
|
-
raise ArgumentError, "Code must be a one-character string, not #{code}" unless code.is_a? String and code.size == 1
|
422
|
-
# need to send a char value that the underlying java can deal with
|
423
|
-
sub = self.getSubfield(code[0].ord)
|
424
|
-
if (sub)
|
425
|
-
return sub.getData
|
426
|
-
else
|
427
|
-
return nil
|
428
|
-
end
|
429
|
-
end
|
430
|
-
|
431
|
-
|
432
|
-
# Get all values from the subfields for the given code or array of codes
|
433
|
-
# @param [String, Array<String>] code (Array of?) 1-character string(s) of the subfield code
|
434
|
-
# @param [Boolean] myorder Use the order of subfields that I gave instead of the order they're in the record
|
435
|
-
# @return [Array<String>] A possibly-empty array of Strings made up of the values in the subfields whose
|
436
|
-
# code is included in the given codes. If myorder == true, use the order in which they are passed in; if a code is repeated
|
437
|
-
# (ocassionally legal) subfield values will appear first ordered by the passed array, then by order within
|
438
|
-
# the document.
|
439
|
-
#
|
440
|
-
# If myorder is false, just return the values for matching subfields in the order they appear in the field.
|
441
|
-
#
|
442
|
-
# @example Quick examples:
|
443
|
-
# # 260 $a New York, $b Van Nostrand Reinhold Co. $c 1969
|
444
|
-
# rec['260'].sub_values('a') #=> ["New York,"]
|
445
|
-
# rec['260'].sub_values(['a', 'c']) #=> ["New York,", "1969"]
|
446
|
-
# rec['260'].sub_values(['c', 'a']) #=> ["New York,", "1969"]
|
447
|
-
# rec['260'].sub_values(['c', 'a'], true) #=> ["1969", "New York"]
|
448
|
-
|
449
|
-
def sub_values(code, myorder = false)
|
450
|
-
|
451
|
-
# Do a little razzle-dazzle for the common case when a single code is given
|
452
|
-
if not [Set, Array].include? code.class
|
453
|
-
c = code
|
454
|
-
elsif code.size == 1
|
455
|
-
c = code.first
|
456
|
-
end
|
457
|
-
if c
|
458
|
-
return self.find_all { |s| c == s.code}.map {|s| s.data}
|
459
|
-
end
|
460
|
-
|
461
|
-
# unless [Set, Array].include? code.class
|
462
|
-
# code = [code]
|
463
|
-
# # puts "Arrayified for code #{code} / #{code.class}"
|
464
|
-
# end
|
465
|
-
if myorder
|
466
|
-
subs = []
|
467
|
-
code.each do |c|
|
468
|
-
subs << self.find_all {|s| c == s.code}
|
469
|
-
end
|
470
|
-
return subs.flatten.map {|s| s.data}
|
471
|
-
else
|
472
|
-
return self.find_all{|s| code.include? s.code}.map {|s| s.data}
|
473
|
-
end
|
474
|
-
end
|
475
|
-
|
476
|
-
# Get first indicator as a one-character string
|
477
|
-
def indicator1
|
478
|
-
return self.getIndicator1.chr
|
479
|
-
end
|
480
|
-
|
481
|
-
# Get second indicator as a one-character string
|
482
|
-
def indicator2
|
483
|
-
return self.getIndicator2.chr
|
484
|
-
end
|
485
|
-
|
486
|
-
# Iterate over the subfields
|
487
|
-
def each
|
488
|
-
self.getSubfields.each do |s|
|
489
|
-
yield s
|
490
|
-
end
|
491
|
-
end
|
492
|
-
|
493
|
-
# Get the concatentated values of the subfields in order the appear in the field
|
494
|
-
# @param [String] joiner The string used to join the subfield values
|
495
|
-
def value joiner=' '
|
496
|
-
data = self.getSubfields.map {|s| s.data}
|
497
|
-
return data.join(joiner)
|
498
|
-
end
|
499
|
-
end
|
500
|
-
|
501
|
-
class SubField
|
502
|
-
|
503
|
-
def == other
|
504
|
-
return ((self.code == other.code) and (self.data == other.data))
|
505
|
-
end
|
506
|
-
|
507
|
-
def value
|
508
|
-
return self.data
|
509
|
-
end
|
510
|
-
|
511
|
-
def code
|
512
|
-
return self.getCode.chr
|
513
|
-
end
|
514
|
-
|
515
|
-
def to_s
|
516
|
-
return '$' + self.code + " " + self.data
|
517
|
-
end
|
518
|
-
end
|