marc4j4r 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +128 -0
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/lib/marc4j4r/datafield.rb +2 -2
- metadata +74 -66
- data/README.rdoc +0 -17
- data/lib/original_monolithic_file.rb +0 -518
data/README.markdown
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# marc4j4r
|
2
|
+
|
3
|
+
A ruby wrapper around the marc4j.jar (as forked by javamarc) java library for dealing with library MARC data.
|
4
|
+
|
5
|
+
## Getting a MARC reader
|
6
|
+
|
7
|
+
marc4j4r provides three readers out of the box: :strictmarc (binary), :permissivemarc (:binary), and :marcxml (MARC-XML).
|
8
|
+
You can pass either a filename or an open IO object (either ruby or java.io.inputstream)
|
9
|
+
|
10
|
+
require 'marc4j4r'
|
11
|
+
|
12
|
+
binreader = MARC4J4R::Reader.new('test.mrc') # defaults to :strictmarc
|
13
|
+
binreader = MARC4J4R::Reader.new('test.mrc', :strictmarc)
|
14
|
+
|
15
|
+
permissivereader = MARC4J4R::Reader.new('test.mrc', :permissivemarc)
|
16
|
+
|
17
|
+
xmlreader = MARC4J4R::Reader.new('test.xml', :marcxml)
|
18
|
+
|
19
|
+
# Or use a file object
|
20
|
+
|
21
|
+
reader = MARC4J4R::Reader.new(File.open('test.mrc'))
|
22
|
+
|
23
|
+
# Or a java.io.inputstream
|
24
|
+
|
25
|
+
jurl = Java::java.net.URL.new('http://my.machine.com/test.mrc')
|
26
|
+
istream = jurl.openConnection.getInputStream
|
27
|
+
reader = MARC4J4R::Reader.new(istream)
|
28
|
+
|
29
|
+
## Using the reader
|
30
|
+
|
31
|
+
A MARC4J4R::Reader is an Enumerable, so you can do:
|
32
|
+
|
33
|
+
reader.each do |record|
|
34
|
+
# do stuff with the record
|
35
|
+
end
|
36
|
+
|
37
|
+
Or, if you're using [threach](http://rdoc.info/projects/billdueber/threach):
|
38
|
+
|
39
|
+
reader.threach(2) do |record|
|
40
|
+
# do stuff with records in two threads
|
41
|
+
end
|
42
|
+
|
43
|
+
## Using the writer
|
44
|
+
|
45
|
+
The writer code has not yet been tested; it *should* work as follows:
|
46
|
+
|
47
|
+
binaryWriter = MARC4J4R::Writer.new(filename, :strictmarc)
|
48
|
+
xmlWriter = MARC4J4R::Writer.new(filename, :marcxml)
|
49
|
+
|
50
|
+
writer.write(record)
|
51
|
+
# repeat
|
52
|
+
writer.close
|
53
|
+
|
54
|
+
|
55
|
+
## Working with records and fields
|
56
|
+
|
57
|
+
In addition to all the normal marc4j methods, MARC4J4R::Record exposes some additional methods
|
58
|
+
and syntaxes.
|
59
|
+
|
60
|
+
**See the classes themselves and/or the specs for more examples.**
|
61
|
+
|
62
|
+
leader = record.leader
|
63
|
+
|
64
|
+
# All fields are available via #each or #fields
|
65
|
+
|
66
|
+
fields = record.fields
|
67
|
+
|
68
|
+
record.each do |field|
|
69
|
+
# do something with each controlfield/datafield; returned in the order they were added
|
70
|
+
end
|
71
|
+
|
72
|
+
# Controlfields have a tag and a value
|
73
|
+
|
74
|
+
idfield = record['001']
|
75
|
+
idfield.tag # => '001'
|
76
|
+
id = idfield.value # or idfield.data, same thing
|
77
|
+
|
78
|
+
# Get the first datafield with a given tag
|
79
|
+
first700 = record['700'] # Note: need to use strings, not integers
|
80
|
+
|
81
|
+
# Stringify a field to get all the subfields joined with spaces
|
82
|
+
|
83
|
+
fullTitle = record['245'].to_s
|
84
|
+
|
85
|
+
all700s = record.find_by_tag '700'
|
86
|
+
all700and856s = record.find_by_tag ['700', '856']
|
87
|
+
|
88
|
+
|
89
|
+
# Construct and add a controlfield
|
90
|
+
record << MARC4J4R::ControlField.new('001', '0000333234')
|
91
|
+
|
92
|
+
# Construct and add a datafield
|
93
|
+
df = MARC4J4R::DataField.new(tag, ind1, ind2)
|
94
|
+
|
95
|
+
ind1 = df.ind1
|
96
|
+
ind2 = df.ind2
|
97
|
+
|
98
|
+
df << MARC4J4R::Subfield.new('a', 'the $a value')
|
99
|
+
df << MARC4J4R::Subfield.new('b', 'the $b value')
|
100
|
+
|
101
|
+
# Add it to a record
|
102
|
+
|
103
|
+
record << df
|
104
|
+
|
105
|
+
# Get subfields or their values
|
106
|
+
|
107
|
+
firstSubfieldAValue = df['a']
|
108
|
+
|
109
|
+
allSubfields = df.subs
|
110
|
+
allSubfieldAs = df.subs('a')
|
111
|
+
allSubfieldAorBs = df.subs(['a', 'b'])
|
112
|
+
|
113
|
+
allSubfieldAorBValues = df.sub_values(['a', 'b'])
|
114
|
+
|
115
|
+
|
116
|
+
## Note on Patches/Pull Requests
|
117
|
+
|
118
|
+
* Fork the project.
|
119
|
+
* Make your feature addition or bug fix.
|
120
|
+
* Add tests for it. This is important so I don't break it in a
|
121
|
+
future version unintentionally.
|
122
|
+
* Commit, do not mess with rakefile, version, or history.
|
123
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
124
|
+
* Send me a pull request. Bonus points for topic branches.
|
125
|
+
|
126
|
+
## Copyright
|
127
|
+
|
128
|
+
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -13,6 +13,7 @@ begin
|
|
13
13
|
gem.authors = ["BillDueber"]
|
14
14
|
gem.add_development_dependency "bacon", ">= 0"
|
15
15
|
gem.add_development_dependency "yard", ">= 0"
|
16
|
+
|
16
17
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
18
|
end
|
18
19
|
Jeweler::GemcutterTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.4
|
data/lib/marc4j4r/datafield.rb
CHANGED
@@ -95,7 +95,7 @@ module MARC4J4R
|
|
95
95
|
code = [code]
|
96
96
|
end
|
97
97
|
|
98
|
-
return self.
|
98
|
+
return self.select {|s| code.include? s.code}
|
99
99
|
end
|
100
100
|
|
101
101
|
# Get all values from the subfields for the given code or array of codes
|
@@ -110,7 +110,7 @@ module MARC4J4R
|
|
110
110
|
# rec['260'].sub_values(['a', 'c']) #=> ["New York,", "1969"]
|
111
111
|
# rec['260'].sub_values(['c', 'a']) #=> ["New York,", "1969"]
|
112
112
|
|
113
|
-
def sub_values(code)
|
113
|
+
def sub_values(code=nil)
|
114
114
|
return self.subs(code).collect {|s| s.value}
|
115
115
|
end
|
116
116
|
|
metadata
CHANGED
@@ -1,46 +1,51 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc4j4r
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 31
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
version: 0.2.
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 4
|
10
|
+
version: 0.2.4
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
|
-
|
13
|
+
- BillDueber
|
13
14
|
autorequire:
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2010-07-
|
18
|
+
date: 2010-07-23 00:00:00 -04:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: bacon
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: yard
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
44
49
|
description: Syntactic sugar and some extra methods to deal with MARC data using the java .jar marc4j
|
45
50
|
email: bill@dueber.com
|
46
51
|
executables: []
|
@@ -50,56 +55,59 @@ extensions: []
|
|
50
55
|
extra_rdoc_files: []
|
51
56
|
|
52
57
|
files:
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
- spec/spec_helper.rb
|
58
|
+
- LICENSE
|
59
|
+
- README.markdown
|
60
|
+
- Rakefile
|
61
|
+
- VERSION
|
62
|
+
- jars/marc4j.jar
|
63
|
+
- lib/marc4j4r.rb
|
64
|
+
- lib/marc4j4r/controlfield.rb
|
65
|
+
- lib/marc4j4r/datafield.rb
|
66
|
+
- lib/marc4j4r/reader.rb
|
67
|
+
- lib/marc4j4r/record.rb
|
68
|
+
- lib/marc4j4r/writer.rb
|
69
|
+
- spec/batch.dat
|
70
|
+
- spec/batch.txt
|
71
|
+
- spec/batch.xml
|
72
|
+
- spec/controlfield_spec.rb
|
73
|
+
- spec/datafield_spec.rb
|
74
|
+
- spec/one.dat
|
75
|
+
- spec/one.txt
|
76
|
+
- spec/one.xml
|
77
|
+
- spec/reader_spec.rb
|
78
|
+
- spec/record_spec.rb
|
79
|
+
- spec/spec_helper.rb
|
76
80
|
has_rdoc: true
|
77
81
|
homepage: http://github.com/billdueber/javamarc/tree/master/ruby/marc4j4r/
|
78
82
|
licenses: []
|
79
83
|
|
80
84
|
post_install_message:
|
81
85
|
rdoc_options:
|
82
|
-
|
86
|
+
- --charset=UTF-8
|
83
87
|
require_paths:
|
84
|
-
|
88
|
+
- lib
|
85
89
|
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
86
91
|
requirements:
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
hash: 3
|
95
|
+
segments:
|
96
|
+
- 0
|
97
|
+
version: "0"
|
92
98
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
93
100
|
requirements:
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
hash: 3
|
104
|
+
segments:
|
105
|
+
- 0
|
106
|
+
version: "0"
|
99
107
|
requirements: []
|
100
108
|
|
101
109
|
rubyforge_project:
|
102
|
-
rubygems_version: 1.3.
|
110
|
+
rubygems_version: 1.3.7
|
103
111
|
signing_key:
|
104
112
|
specification_version: 3
|
105
113
|
summary: Use marc4j java library in JRuby in a more ruby-ish way
|
data/README.rdoc
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
= marc4j4r
|
2
|
-
|
3
|
-
Description goes here.
|
4
|
-
|
5
|
-
== Note on Patches/Pull Requests
|
6
|
-
|
7
|
-
* Fork the project.
|
8
|
-
* Make your feature addition or bug fix.
|
9
|
-
* Add tests for it. This is important so I don't break it in a
|
10
|
-
future version unintentionally.
|
11
|
-
* Commit, do not mess with rakefile, version, or history.
|
12
|
-
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
-
* Send me a pull request. Bonus points for topic branches.
|
14
|
-
|
15
|
-
== Copyright
|
16
|
-
|
17
|
-
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
@@ -1,518 +0,0 @@
|
|
1
|
-
unless defined? JRUBY_VERSION
|
2
|
-
raise "Only works under JRUBY"
|
3
|
-
end
|
4
|
-
|
5
|
-
begin
|
6
|
-
include_class Java::org.marc4j.marc.impl.RecordImpl
|
7
|
-
rescue NameError => e
|
8
|
-
jardir = File.join(File.dirname(__FILE__), '..', 'jars')
|
9
|
-
require "#{jardir}/marc4j.jar"
|
10
|
-
end
|
11
|
-
|
12
|
-
require 'set'
|
13
|
-
|
14
|
-
|
15
|
-
# Re-open the MarcReader interface, define #each and include Enumerable
|
16
|
-
#
|
17
|
-
# We also automatically call #hashify on the records that stream through
|
18
|
-
# #each in order to speed up RecordImpl#[] when (a) doing many operations on a single
|
19
|
-
# record, and (b) we're not worried about interleaved tags (e.g., a 520 followed by a 510 followed
|
20
|
-
# by another 520)
|
21
|
-
|
22
|
-
module Java::OrgMarc4j::MarcReader
|
23
|
-
include Enumerable
|
24
|
-
|
25
|
-
# Return the next record, after calling #hashify on it
|
26
|
-
def each(hashify=true)
|
27
|
-
while self.hasNext
|
28
|
-
r = self.next
|
29
|
-
r.hashify if hashify
|
30
|
-
yield r
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
module MARC4J4R
|
37
|
-
|
38
|
-
# Do some simple substitutions to make things prettier. After this,
|
39
|
-
# we can use MARC4J4R::Record instead of Java::org.marc4j.marc.impl::RecordImpl
|
40
|
-
|
41
|
-
Record = Java::org.marc4j.marc.impl::RecordImpl
|
42
|
-
ControlField = Java::org.marc4j.marc.impl::ControlFieldImpl
|
43
|
-
DataField = Java::org.marc4j.marc.impl::DataFieldImpl
|
44
|
-
SubField = Java::org.marc4j.marc.impl::SubfieldImpl
|
45
|
-
|
46
|
-
|
47
|
-
# Add some sugar to the MarcReader interface
|
48
|
-
#
|
49
|
-
# Adjust the interface so that a #new call to any implementations that
|
50
|
-
# implement it can take a java.io.InputStream, ruby IO obejct, or String
|
51
|
-
# (that will be interpreted as a filename) without complaining.
|
52
|
-
#
|
53
|
-
# The mechanism -- running module_eval on a string-representation of the
|
54
|
-
# new method in each of the hard-coded implementations of MarcReader
|
55
|
-
# (MarcStreamReader,MarcPermissiveStreamReader,MarcXmlReader) -- is ugly
|
56
|
-
# and deeply unsettling.
|
57
|
-
#
|
58
|
-
# @author Bill Dueber
|
59
|
-
#
|
60
|
-
# A string used to override the initializer for each stream reader
|
61
|
-
# Need to do it this ugly way because of the way java and ruby interact;
|
62
|
-
# can't just add it to the MarcReader interface the way I wanted to.
|
63
|
-
|
64
|
-
NEWINIT = <<-ENDBINDER
|
65
|
-
include Enumerable
|
66
|
-
alias_method :oldinit, :initialize
|
67
|
-
def initialize(fromwhere)
|
68
|
-
stream = nil
|
69
|
-
if fromwhere.is_a? Java::JavaIO::InputStream or fromwhere.is_a? Java::JavaIO::ByteArrayInputStream
|
70
|
-
stream = fromwhere
|
71
|
-
elsif fromwhere.is_a? IO
|
72
|
-
stream = fromwhere.to_inputstream
|
73
|
-
else
|
74
|
-
stream = java.io.FileInputStream.new(fromwhere.to_java_string)
|
75
|
-
end
|
76
|
-
if self.class == Java::org.marc4j.MarcPermissiveStreamReader
|
77
|
-
self.oldinit(stream, true, true)
|
78
|
-
else
|
79
|
-
self.oldinit(stream)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
ENDBINDER
|
83
|
-
|
84
|
-
Java::org.marc4j.MarcStreamReader.module_eval(NEWINIT)
|
85
|
-
Java::org.marc4j.MarcPermissiveStreamReader.module_eval(NEWINIT)
|
86
|
-
Java::org.marc4j.MarcXmlReader.module_eval(NEWINIT)
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
# Get a marc reader of the appropriate type
|
91
|
-
# @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
|
92
|
-
# @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
|
93
|
-
# @return [MarcReader] A MarcReader object with the syntactic sugar added in this file (e.g, each)
|
94
|
-
#
|
95
|
-
# @example Get a strict binary MARC reader for the file 'test.mrc'
|
96
|
-
# reader = MARC4J4R.reader('test.mrc')
|
97
|
-
#
|
98
|
-
# @example Get a permissive binary MARC reader
|
99
|
-
# reader = MARC4J4R.reader('test.mrc', :permissivemarc)
|
100
|
-
#
|
101
|
-
# @example Get a reader for an xml file
|
102
|
-
# reader = MARC4J4R.reader('test.xml', :marcxml)
|
103
|
-
#
|
104
|
-
# @example Get a reader based on an existing IO object
|
105
|
-
# require 'open-uri'
|
106
|
-
# infile = open('http://my.machine.com/test.mrc')
|
107
|
-
# reader = MARC4J4R.reader(infile)
|
108
|
-
|
109
|
-
def reader(input, type = :strictmarc)
|
110
|
-
case type
|
111
|
-
when :strictmarc then
|
112
|
-
return Java::org.marc4j.MarcStreamReader.new(input)
|
113
|
-
when :permissivemarc then
|
114
|
-
return Java::org.marc4j.MarcPermissiveStreamReader.new(input)
|
115
|
-
when :marcxml then
|
116
|
-
return Java::org.marc4j.MarcXmlReader.new(input)
|
117
|
-
when :alephsequential then
|
118
|
-
return MARC4J4R::AlephSequentialReader.new(input)
|
119
|
-
else
|
120
|
-
raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
|
121
|
-
end
|
122
|
-
end
|
123
|
-
module_function :reader
|
124
|
-
|
125
|
-
|
126
|
-
# Implement an AlephSequential reader
|
127
|
-
class AlephSequentialReader
|
128
|
-
include Enumerable
|
129
|
-
def initialize(fromwhere)
|
130
|
-
stream = nil
|
131
|
-
if fromwhere.is_a? Java::JavaIO::InputStream
|
132
|
-
stream = fromwhere.to_io
|
133
|
-
elsif fromwhere.is_a? IO
|
134
|
-
stream = fromwhere
|
135
|
-
else
|
136
|
-
stream = File.new(fromwhere)
|
137
|
-
end
|
138
|
-
|
139
|
-
@handle = stream
|
140
|
-
end
|
141
|
-
|
142
|
-
def each
|
143
|
-
record = nil
|
144
|
-
currentID = nil
|
145
|
-
|
146
|
-
@handle.each_line do |l|
|
147
|
-
l.chomp!
|
148
|
-
next unless l =~ /\S/
|
149
|
-
vals = l.unpack('a9 a a3 c c a3 a*')
|
150
|
-
id, tag, ind1, ind2, data = vals[0], vals[2], vals[3], vals[4], vals[6]
|
151
|
-
# id, tag, ind1, ind2, junk, data = *(l.unpack('A10 a3 c c a3 A*'))
|
152
|
-
if id != currentID
|
153
|
-
if record
|
154
|
-
yield record
|
155
|
-
end
|
156
|
-
record = RecordImpl.new
|
157
|
-
currentID = id
|
158
|
-
end
|
159
|
-
if tag == 'LDR'
|
160
|
-
record.setLeader(Java::org.marc4j.marc.impl.LeaderImpl.new(data))
|
161
|
-
else
|
162
|
-
record << buildField(tag,ind1,ind2,data)
|
163
|
-
end
|
164
|
-
end
|
165
|
-
yield record
|
166
|
-
end
|
167
|
-
|
168
|
-
|
169
|
-
SUBREGEXP = /\$\$(.)/
|
170
|
-
def buildField (tag, ind1, ind2, data)
|
171
|
-
if Java::org.marc4j.marc.impl.Verifier.isControlField tag
|
172
|
-
return Java::org.marc4j.marc.impl.ControlFieldImpl.new(tag, data)
|
173
|
-
else
|
174
|
-
f = Java::org.marc4j.marc.impl.DataFieldImpl.new(tag, ind1, ind2)
|
175
|
-
data.split(SUBREGEXP)[1..-1].each_slice(2) do |code, value|
|
176
|
-
f.addSubfield Java::org.marc4j.marc.impl.SubfieldImpl.new(code[0].ord, value)
|
177
|
-
end
|
178
|
-
return f
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
end # End of class AlephSequentialReader
|
183
|
-
|
184
|
-
end
|
185
|
-
|
186
|
-
|
187
|
-
# Open up RecordImpl to add some sugar, including Enumberable as well
|
188
|
-
# @author Bill Dueber
|
189
|
-
|
190
|
-
class Record
|
191
|
-
include Enumerable
|
192
|
-
|
193
|
-
alias_method :<<, :addVariableField
|
194
|
-
alias_method :append, :addVariableField
|
195
|
-
alias_method :fields, :getVariableFields
|
196
|
-
|
197
|
-
# Export as a MARC-Hash, as described at
|
198
|
-
# http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
|
199
|
-
# @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
|
200
|
-
def to_marchash
|
201
|
-
h = {}
|
202
|
-
h['type'] = 'marc-hash'
|
203
|
-
h['version'] = [1,0]
|
204
|
-
h['leader'] = self.leader
|
205
|
-
|
206
|
-
fields = []
|
207
|
-
|
208
|
-
self.getVariableFields.each do |f|
|
209
|
-
if f.controlField?
|
210
|
-
fields << [f.tag, f.value]
|
211
|
-
else
|
212
|
-
farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
|
213
|
-
subs = []
|
214
|
-
f.each do |subfield|
|
215
|
-
subs << [subfield.code, subfield.value]
|
216
|
-
end
|
217
|
-
farray.push subs
|
218
|
-
fields << farray
|
219
|
-
end
|
220
|
-
end
|
221
|
-
h['fields'] = fields
|
222
|
-
return h
|
223
|
-
end
|
224
|
-
|
225
|
-
# Create a local hash by tag number; makes some stuff faster
|
226
|
-
# Called automatically if you use reader.each
|
227
|
-
|
228
|
-
def hashify
|
229
|
-
return if @hashedtags # don't do it more than once
|
230
|
-
@hashedtags = {}
|
231
|
-
self.getVariableFields.each do |f|
|
232
|
-
@hashedtags[f.tag] ||= []
|
233
|
-
@hashedtags[f.tag].push f
|
234
|
-
end
|
235
|
-
end
|
236
|
-
|
237
|
-
# Create a nice string of the record
|
238
|
-
def to_s
|
239
|
-
arr = ['LEADER ' + self.leader]
|
240
|
-
self.each do |f|
|
241
|
-
arr.push f.to_s
|
242
|
-
end
|
243
|
-
return arr.join("\n")
|
244
|
-
end
|
245
|
-
|
246
|
-
# Get the leader as a string (marc4j would otherwise return Leader object)
|
247
|
-
def leader
|
248
|
-
self.get_leader.toString
|
249
|
-
end
|
250
|
-
|
251
|
-
|
252
|
-
# Cycle through the fields in the order the appear in the record
|
253
|
-
def each
|
254
|
-
self.getVariableFields.each do |f|
|
255
|
-
yield f
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
# Get the first field associated with a tag
|
260
|
-
# @param [String] tag The tag
|
261
|
-
# @return [Field] The first matching field, or nil if none. Note that
|
262
|
-
# to mirror ruby-marc, this returns a single field
|
263
|
-
|
264
|
-
def [] tag
|
265
|
-
if defined? @hashedtags
|
266
|
-
if @hashedtags[tag]
|
267
|
-
return @hashedtags[tag][0]
|
268
|
-
else
|
269
|
-
return nil
|
270
|
-
end
|
271
|
-
else
|
272
|
-
return self.getVariableField(tag)
|
273
|
-
end
|
274
|
-
end
|
275
|
-
|
276
|
-
|
277
|
-
# Get a (possibly empty) list of fields with the given tag(s)
|
278
|
-
#
|
279
|
-
# @param [String, Array<String>] tags A string (or Array of strings) with the tags you're interested in
|
280
|
-
# @param [Boolean] originalorder Whether or not results should be presented in the original order within the
|
281
|
-
# record or with a two-column sort of (a) Order of the tag in the list of tags sent, (b) order within that tag
|
282
|
-
# in the record
|
283
|
-
# @return [Array<Field>] Either an empty list or a list of one or more matched fields will be returned.
|
284
|
-
#
|
285
|
-
# originalorder == false will use an internal hash and be faster in many cases (see #hashify)
|
286
|
-
#
|
287
|
-
# @example originalorder == false
|
288
|
-
# # Given a record that looks like
|
289
|
-
# # 010 $a 68027371
|
290
|
-
# # 035 $a (RLIN)MIUG0001728-B
|
291
|
-
# # 035 $a (CaOTULAS)159818044
|
292
|
-
# # 035 $a (OCoLC)ocm00001728
|
293
|
-
#
|
294
|
-
# r.find_by_tag(['035', '010']).each {|f| puts f.to_s}
|
295
|
-
# # 035 $a (RLIN)MIUG0001728-B
|
296
|
-
# # 035 $a (CaOTULAS)159818044
|
297
|
-
# # 035 $a (OCoLC)ocm00001728
|
298
|
-
# # 010 $a 68027371
|
299
|
-
#
|
300
|
-
# # The results are ordered first by tag as passed in, then by original order within the tag
|
301
|
-
#
|
302
|
-
# @example Just get all fields for a single tag
|
303
|
-
# ohThirtyFives = r.find_by_tag('035')
|
304
|
-
#
|
305
|
-
# @example Get a bunch of standard identifiers
|
306
|
-
# standardIDs = r.find_by_tag(['022', '020', '010'])
|
307
|
-
#
|
308
|
-
# @example originalorder == true
|
309
|
-
# r.find_by_tag(['035', '010'], true).each {|f| puts f.to_s}
|
310
|
-
# # 010 $a 68027371
|
311
|
-
# # 035 $a (RLIN)MIUG0001728-B
|
312
|
-
# # 035 $a (CaOTULAS)159818044
|
313
|
-
# # 035 $a (OCoLC)ocm00001728
|
314
|
-
|
315
|
-
def find_by_tag(tags, originalorder = false)
|
316
|
-
self.hashify unless @hashedtags and !originalorder
|
317
|
-
if !tags.is_a? Array
|
318
|
-
return @hashedtags[tags] || []
|
319
|
-
end
|
320
|
-
if originalorder
|
321
|
-
return self.find_all {|f| tags.include? f.tag}
|
322
|
-
else
|
323
|
-
# puts "Tags is #{tags}: got #{@hashedtags.values_at(*tags)}"
|
324
|
-
return @hashedtags.values_at(*tags).flatten.compact
|
325
|
-
end
|
326
|
-
end
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
# Return the record as valid MARC-XML
|
331
|
-
# @return String A MARC-XML representation of the record, including the XML header
|
332
|
-
def to_xml
|
333
|
-
return @xml if @xml
|
334
|
-
begin
|
335
|
-
@xml = java.io.StringWriter.new
|
336
|
-
res = javax.xml.transform.stream.StreamResult.new(@xml)
|
337
|
-
writer = org.marc4j.MarcXmlWriter.new(res)
|
338
|
-
writer.write(self)
|
339
|
-
writer.writeEndDocument
|
340
|
-
return @xml.toString
|
341
|
-
rescue
|
342
|
-
"Woops! to_xml failed for record #{self['001'].data}: #{$!}"
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
def to_marc
|
347
|
-
begin
|
348
|
-
s = Java::java.io.ByteArrayOutputStream.new
|
349
|
-
writer = org.marc4j.MarcStreamWriter.new(s)
|
350
|
-
writer.write(self)
|
351
|
-
@marcbinary = s.to_string
|
352
|
-
puts @marcbinary
|
353
|
-
return @marcbinary
|
354
|
-
rescue
|
355
|
-
# "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
|
356
|
-
"Whoops! Failed: #{$!}"
|
357
|
-
end
|
358
|
-
end
|
359
|
-
|
360
|
-
|
361
|
-
end
|
362
|
-
|
363
|
-
class ControlField
|
364
|
-
def value
|
365
|
-
return self.data
|
366
|
-
end
|
367
|
-
|
368
|
-
def controlField?
|
369
|
-
return true
|
370
|
-
end
|
371
|
-
|
372
|
-
def self.control_tag? tag
|
373
|
-
return Java::org.marc4j.marc.impl.Verifier.isControlField tag
|
374
|
-
end
|
375
|
-
|
376
|
-
# Pretty-print
|
377
|
-
# @param [String] joiner What string to use to join the subfields
|
378
|
-
# @param [String] The pretty string
|
379
|
-
def to_s
|
380
|
-
return self.tag + " " + self.value
|
381
|
-
end
|
382
|
-
|
383
|
-
def == other
|
384
|
-
self.tag == other.tag && self.value == other.value
|
385
|
-
end
|
386
|
-
|
387
|
-
end
|
388
|
-
|
389
|
-
class DataField
|
390
|
-
include Enumerable
|
391
|
-
|
392
|
-
alias_method :<<, :addSubfield
|
393
|
-
|
394
|
-
|
395
|
-
def controlField?
|
396
|
-
return false
|
397
|
-
end
|
398
|
-
|
399
|
-
# Broken. Need to check subs as well
|
400
|
-
def == other
|
401
|
-
self.tag == other.tag and
|
402
|
-
self.indicator1 == other.indicator1 and
|
403
|
-
self.indicator2 == other.indicator2
|
404
|
-
end
|
405
|
-
|
406
|
-
# Pretty-print
|
407
|
-
# @param [String] joiner What string to use to join the subfields
|
408
|
-
# @param [String] The pretty string
|
409
|
-
def to_s (joiner = ' ')
|
410
|
-
arr = [self.tag + ' ' + self.indicator1 + self.indicator2]
|
411
|
-
self.each do |s|
|
412
|
-
arr.push s.to_s
|
413
|
-
end
|
414
|
-
return arr.join(joiner)
|
415
|
-
end
|
416
|
-
|
417
|
-
# Get the value of the first subfield of this field with the given code
|
418
|
-
# @param [String] code 1-character string of the subfield code
|
419
|
-
# @return [String] The value of the first matched subfield
|
420
|
-
def [] code
|
421
|
-
raise ArgumentError, "Code must be a one-character string, not #{code}" unless code.is_a? String and code.size == 1
|
422
|
-
# need to send a char value that the underlying java can deal with
|
423
|
-
sub = self.getSubfield(code[0].ord)
|
424
|
-
if (sub)
|
425
|
-
return sub.getData
|
426
|
-
else
|
427
|
-
return nil
|
428
|
-
end
|
429
|
-
end
|
430
|
-
|
431
|
-
|
432
|
-
# Get all values from the subfields for the given code or array of codes
|
433
|
-
# @param [String, Array<String>] code (Array of?) 1-character string(s) of the subfield code
|
434
|
-
# @param [Boolean] myorder Use the order of subfields that I gave instead of the order they're in the record
|
435
|
-
# @return [Array<String>] A possibly-empty array of Strings made up of the values in the subfields whose
|
436
|
-
# code is included in the given codes. If myorder == true, use the order in which they are passed in; if a code is repeated
|
437
|
-
# (ocassionally legal) subfield values will appear first ordered by the passed array, then by order within
|
438
|
-
# the document.
|
439
|
-
#
|
440
|
-
# If myorder is false, just return the values for matching subfields in the order they appear in the field.
|
441
|
-
#
|
442
|
-
# @example Quick examples:
|
443
|
-
# # 260 $a New York, $b Van Nostrand Reinhold Co. $c 1969
|
444
|
-
# rec['260'].sub_values('a') #=> ["New York,"]
|
445
|
-
# rec['260'].sub_values(['a', 'c']) #=> ["New York,", "1969"]
|
446
|
-
# rec['260'].sub_values(['c', 'a']) #=> ["New York,", "1969"]
|
447
|
-
# rec['260'].sub_values(['c', 'a'], true) #=> ["1969", "New York"]
|
448
|
-
|
449
|
-
def sub_values(code, myorder = false)
|
450
|
-
|
451
|
-
# Do a little razzle-dazzle for the common case when a single code is given
|
452
|
-
if not [Set, Array].include? code.class
|
453
|
-
c = code
|
454
|
-
elsif code.size == 1
|
455
|
-
c = code.first
|
456
|
-
end
|
457
|
-
if c
|
458
|
-
return self.find_all { |s| c == s.code}.map {|s| s.data}
|
459
|
-
end
|
460
|
-
|
461
|
-
# unless [Set, Array].include? code.class
|
462
|
-
# code = [code]
|
463
|
-
# # puts "Arrayified for code #{code} / #{code.class}"
|
464
|
-
# end
|
465
|
-
if myorder
|
466
|
-
subs = []
|
467
|
-
code.each do |c|
|
468
|
-
subs << self.find_all {|s| c == s.code}
|
469
|
-
end
|
470
|
-
return subs.flatten.map {|s| s.data}
|
471
|
-
else
|
472
|
-
return self.find_all{|s| code.include? s.code}.map {|s| s.data}
|
473
|
-
end
|
474
|
-
end
|
475
|
-
|
476
|
-
# Get first indicator as a one-character string
|
477
|
-
def indicator1
|
478
|
-
return self.getIndicator1.chr
|
479
|
-
end
|
480
|
-
|
481
|
-
# Get second indicator as a one-character string
|
482
|
-
def indicator2
|
483
|
-
return self.getIndicator2.chr
|
484
|
-
end
|
485
|
-
|
486
|
-
# Iterate over the subfields
|
487
|
-
def each
|
488
|
-
self.getSubfields.each do |s|
|
489
|
-
yield s
|
490
|
-
end
|
491
|
-
end
|
492
|
-
|
493
|
-
# Get the concatentated values of the subfields in order the appear in the field
|
494
|
-
# @param [String] joiner The string used to join the subfield values
|
495
|
-
def value joiner=' '
|
496
|
-
data = self.getSubfields.map {|s| s.data}
|
497
|
-
return data.join(joiner)
|
498
|
-
end
|
499
|
-
end
|
500
|
-
|
501
|
-
class SubField
|
502
|
-
|
503
|
-
def == other
|
504
|
-
return ((self.code == other.code) and (self.data == other.data))
|
505
|
-
end
|
506
|
-
|
507
|
-
def value
|
508
|
-
return self.data
|
509
|
-
end
|
510
|
-
|
511
|
-
def code
|
512
|
-
return self.getCode.chr
|
513
|
-
end
|
514
|
-
|
515
|
-
def to_s
|
516
|
-
return '$' + self.code + " " + self.data
|
517
|
-
end
|
518
|
-
end
|