marc_alephsequential 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.document +3 -0
- data/.gitignore +4 -0
- data/.travis.yml +10 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +4 -0
- data/Gemfile +18 -0
- data/LICENSE.txt +20 -0
- data/README.md +103 -0
- data/Rakefile +38 -0
- data/lib/marc_alephsequential/asline.rb +161 -0
- data/lib/marc_alephsequential/asline_group.rb +101 -0
- data/lib/marc_alephsequential/asline_reader.rb +16 -0
- data/lib/marc_alephsequential/buffered_linereader.rb +100 -0
- data/lib/marc_alephsequential/error.rb +13 -0
- data/lib/marc_alephsequential/log.rb +20 -0
- data/lib/marc_alephsequential/reader.rb +54 -0
- data/lib/marc_alephsequential/version.rb +4 -0
- data/lib/marc_alephsequential.rb +9 -0
- data/marc_alephsequential.gemspec +20 -0
- data/spec/asline_group_spec.rb +59 -0
- data/spec/asline_spec.rb +46 -0
- data/spec/data/batch.seq +1000 -0
- data/spec/data/newline.seq +33 -0
- data/spec/data/no_initial_subfield.seq +34 -0
- data/spec/data/noleader.seq +33 -0
- data/spec/data/single.seq +34 -0
- data/spec/helper.rb +22 -0
- data/spec/reader_spec.rb +62 -0
- data/spec/version_spec.rb +10 -0
- metadata +84 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
Yjk3YTZkMTY4Njg0NGY3ZTQ5ZGY2MzQ3MGIxMDA4NTdjMzEzZmNkNA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NmMwMWQzMTIzMmQ3ZmNlOTM5YjRiMGFmZTI2MTA3NmM0Njk0ZTJiYg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NWE0M2NiM2M5MTcwOTJjNWJkN2JiMDk5OTJmNWIxYmVmNTg4NGY1ZTAxZGE1
|
10
|
+
OTM4ZDBhMTA3ZDZiYmY0YWU3M2I5NWM5MzBlYjhkY2QxNGZhNDdhMDAyM2Qx
|
11
|
+
MjQ4YWFiNTkxMjhmODk3Mzc0YjIzMGVlMzVmYzRlOTc0Y2Y3YmE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
OWZmOGVkYWVkZTM4ZGQ1YmM1NGMzNzBkM2IwY2I4NmZmMmI1OTYzN2FhMTQw
|
14
|
+
NDcwMDQ0Zjk3MzdiZWQxN2M1ZmQzMGRhYWFkNTg5NGFiNTY4Yjk4N2JiMTFi
|
15
|
+
ZWEyZjJkMThkY2RlM2VjYmZhNTQ1NmYxYzM5NDhlN2QyOGY3MGE=
|
data/.document
ADDED
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup markdown --title "marc_alephsequential Documentation" --protected
|
data/ChangeLog.md
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
gemspec
|
3
|
+
|
4
|
+
gem 'marc'
|
5
|
+
gem 'yell'
|
6
|
+
|
7
|
+
group :test do
|
8
|
+
gem 'minitest'
|
9
|
+
gem "minitest-reporters", '>= 0.8.0'
|
10
|
+
gem 'minitest-colorize'
|
11
|
+
end
|
12
|
+
|
13
|
+
group :development do
|
14
|
+
gem 'kramdown'
|
15
|
+
gem 'bundler', '~> 1'
|
16
|
+
gem 'rake', '>= 1.0'
|
17
|
+
gem 'yard', '>= 0.8'
|
18
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Bill Dueber
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
# marc_alephsequential
|
2
|
+
[![Build Status](https://secure.travis-ci.org/billdueber/marc_alephsequential.png)](http://travis-ci.org/billdueber/marc_alephsequential)
|
3
|
+
|
4
|
+
A [ruby-marc](https://github.com/ruby-marc/ruby-marc) reader for MARC files in the Aleph sequential format
|
5
|
+
|
6
|
+
* [Homepage](https://github.com/billdueber/marc_alephsequential#readme)
|
7
|
+
* [Issues](https://github.com/billdueber/marc_alephsequential/issues)
|
8
|
+
* [Documentation](http://rubydoc.info/gems/marc_alephsequential/frames)
|
9
|
+
* [Email](mailto:bill at dueber.com)
|
10
|
+
|
11
|
+
## Examples
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
|
15
|
+
require 'marc'
|
16
|
+
require 'marc_alephsequential'
|
17
|
+
|
18
|
+
log = GetALogFromSomewhere.new
|
19
|
+
# reader = MARC::AlephSequential::Reader.new('myfile.seq')
|
20
|
+
reader = MARC::AlephSequential::Reader.new('myfile.seq.gz') # automatically notice the .gz and behave!
|
21
|
+
|
22
|
+
reader.log = log # optional. Set up a logger; otherwise, a default logger will be used
|
23
|
+
|
24
|
+
begin
|
25
|
+
reader.each do |r|
|
26
|
+
# do stuff with the record
|
27
|
+
end
|
28
|
+
rescue MARC::AlephSequential::Error => e
|
29
|
+
log.error "Error while parsing record #{e.record_id} at/near #{e.line_number}: #{e.message}"
|
30
|
+
retry # may or may not work the way you'd hope/expect
|
31
|
+
rescue => e
|
32
|
+
log.error "Other error of some sort. quitting. #{e.message}"
|
33
|
+
end
|
34
|
+
|
35
|
+
```
|
36
|
+
|
37
|
+
## Description of the Aleph Sequential format
|
38
|
+
|
39
|
+
Aleph sequential is a MARC serialization format that is easily output by Ex Libris' Aleph software.
|
40
|
+
Each MARC record is presented as a series of unicode text lines, one field per line.
|
41
|
+
|
42
|
+
|
43
|
+
000000228 LDR L ^^^^^nam^a22002891^^4500
|
44
|
+
000000228 001 L 000000228
|
45
|
+
000000228 006 L m^^^^^^^^d^^^^^^^^
|
46
|
+
000000228 007 L cr^bn^---auaua
|
47
|
+
000000228 008 L 880715r19691828nyuab^^^^^^^^|00000^eng^^
|
48
|
+
000000228 010 L $$a68055188
|
49
|
+
000000228 020 L $$a083711750X
|
50
|
+
000000228 035 L $$a(RLIN)MIUG0021856-B
|
51
|
+
000000794 24514 L $$aThe descent of manuscripts.
|
52
|
+
000000794 60010 L $$aCicero, Marcus Tullius$$xManuscripts.
|
53
|
+
000000794 60000 L $$aPlato.$$tCritias$$xManuscripts.
|
54
|
+
|
55
|
+
Each line has the following format (note: All must be in utf-8)
|
56
|
+
|
57
|
+
* 9 characters (all digits) for the aleph record ID
|
58
|
+
* [space]
|
59
|
+
* 3 character tag (left-justified / space padded if need be)
|
60
|
+
* 1 character indicator 1
|
61
|
+
* 1 character indicator 2
|
62
|
+
* [space L space], for some historic reasons I don't know
|
63
|
+
* The tag's value, perhaps with internal subfields
|
64
|
+
|
65
|
+
A record is defined as a set of continuous lines with the same record ID (i.e., the way you know you've finished with a record is because the record ID changes or you hit EOF).
|
66
|
+
|
67
|
+
### How to read the Aleph sequential "value"
|
68
|
+
|
69
|
+
The leader and control fields have no internal structure, but spaces in the values are stored as '^' for some reason. (The reader, obviously, changes them back into spaces)
|
70
|
+
|
71
|
+
For data fields, the subfields are indicated as follows:
|
72
|
+
|
73
|
+
* A _subfield start marker_ (let's just say "SSM") matches /\$\$[a-z0-9]/ (e.g., $$a)
|
74
|
+
* The value string for a data field must start with an SSM
|
75
|
+
* An SSM marks the start of a subfield (and the end of the previous subfield, if any)
|
76
|
+
|
77
|
+
### Obvious limitations of the Aleph sequential format
|
78
|
+
|
79
|
+
Actually, it's not all bad; I like it in a lot of ways. A little verbose at times, but easy to read for a human, and easy to write one-off scripts to run through a file and get statistics about use of tags, find a specific record (just match the bib ID at the beginning of the line), etc.
|
80
|
+
|
81
|
+
The easy-to-see problems are:
|
82
|
+
|
83
|
+
* fixed field size. Aleph has a lot of Cobol underneath. So if your bib ids don't happen to be nine characters, well, too bad.
|
84
|
+
* You can't have an embedded '$$' in a data field's value, because it will be interpreted as the start of a new subfield. '$$' isn't super common as a typo, but I've seen it.
|
85
|
+
|
86
|
+
|
87
|
+
## Parse errors and automatic workarounds
|
88
|
+
|
89
|
+
* Lines that don't start with a nine-digit id will be assumed to be a part of the previous line that has an illegal spurious newline. The newline will be removed and all put back together again. If there is no "previous line" because it's the first line of the file, throw an error.
|
90
|
+
* Any completed record that doesn't include a leader (LDR) will throw an error
|
91
|
+
* Datafield values that don't start with '$$' will be logged as an error and assumed that the first set of data should be in subfield $$a
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
## Install
|
96
|
+
|
97
|
+
$ gem install marc_alephsequential
|
98
|
+
|
99
|
+
## Copyright
|
100
|
+
|
101
|
+
Copyright (c) 2013 Bill Dueber
|
102
|
+
|
103
|
+
See {file:LICENSE.txt} for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'bundler'
|
7
|
+
rescue LoadError => e
|
8
|
+
warn e.message
|
9
|
+
warn "Run `gem install bundler` to install Bundler."
|
10
|
+
exit -1
|
11
|
+
end
|
12
|
+
|
13
|
+
begin
|
14
|
+
Bundler.setup(:development)
|
15
|
+
rescue Bundler::BundlerError => e
|
16
|
+
warn e.message
|
17
|
+
warn "Run `bundle install` to install missing gems."
|
18
|
+
exit e.status_code
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake'
|
22
|
+
|
23
|
+
require "bundler/gem_tasks"
|
24
|
+
|
25
|
+
require 'yard'
|
26
|
+
YARD::Rake::YardocTask.new
|
27
|
+
task :doc => :yard
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
Rake::TestTask.new do |t|
|
31
|
+
t.libs.push 'lib'
|
32
|
+
t.libs.push 'spec'
|
33
|
+
t.test_files = Dir.glob('spec/**/*_spec.rb')
|
34
|
+
end
|
35
|
+
|
36
|
+
task :spec => :test
|
37
|
+
|
38
|
+
task(:default => :test)
|
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'marc'
|
2
|
+
require_relative 'error'
|
3
|
+
require_relative 'log'
|
4
|
+
|
5
|
+
module MARC
|
6
|
+
module AlephSequential
|
7
|
+
|
8
|
+
|
9
|
+
# A model of a line (field) in an alephsequential file.
|
10
|
+
class ASLine
|
11
|
+
|
12
|
+
include Log
|
13
|
+
|
14
|
+
# Characters in leader/control fields that need to be turned (back) into spaces
|
15
|
+
TURN_TO_SPACE = /\^/
|
16
|
+
|
17
|
+
# Pattern used to split data field values into subfield code/value pairs
|
18
|
+
SUBFIELD_SPLIT_PATTERN = /\$\$([a-zA-Z0-9])/
|
19
|
+
|
20
|
+
# How to know if we have a valid id? Must be 9 digits
|
21
|
+
VALID_ID = /^\d{9}$/
|
22
|
+
|
23
|
+
# The passed in raw string, used for post-processing later on
|
24
|
+
attr_accessor :rawstr
|
25
|
+
|
26
|
+
# The line number in the file/stream, for error reporting
|
27
|
+
attr_accessor :line_number
|
28
|
+
|
29
|
+
# Either the value of a control/fiexed field, or a string representation of a datafield's subfield
|
30
|
+
attr_accessor :value
|
31
|
+
|
32
|
+
# The type of field (:leader, :control, :data, or :invalid_id)
|
33
|
+
attr_accessor :type
|
34
|
+
|
35
|
+
attr_accessor :id, :tag, :ind1, :ind2
|
36
|
+
|
37
|
+
# The MARC field's tag
|
38
|
+
attr_reader :tag
|
39
|
+
|
40
|
+
|
41
|
+
# Given a raw string and a line number, construct the appropriate ASLine.
|
42
|
+
#
|
43
|
+
# @param [String] rawstr The raw string from the file
|
44
|
+
# @param [Number] line_number The line number from the file/stream, for error reporting
|
45
|
+
|
46
|
+
def initialize(rawstr, line_number)
|
47
|
+
@rawstr = rawstr.chomp
|
48
|
+
@line_number = line_number
|
49
|
+
|
50
|
+
(self.id,self.tag,self.ind1,self.ind2,self.value) = *(parseline(@rawstr))
|
51
|
+
|
52
|
+
# clean up the leader or fixed fields
|
53
|
+
if [:leader, :control].include? self.type
|
54
|
+
self.value = cleanup_fixed(self.value)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
# Does this line have a valid (-looking) id?
|
60
|
+
def valid_id?
|
61
|
+
return VALID_ID.match(id) ? true : false
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
# Turn it into an actual MARC field (control or data)
|
66
|
+
# Throw an error if called on a leader (LDR) line
|
67
|
+
# @return [MARC::ControlField, MARC::DataField]
|
68
|
+
def to_field
|
69
|
+
case type
|
70
|
+
when :control
|
71
|
+
self.to_control_field
|
72
|
+
when :data
|
73
|
+
self.to_data_field
|
74
|
+
else
|
75
|
+
raise MARC::AlephSequential::Error.new(id, line_number ), "Tried to call #to_field on line type '#{self.type}'", nil
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Turn the current object into a control field, without doing any checks
|
80
|
+
# @return [MARC::ControlField]
|
81
|
+
def to_control_field
|
82
|
+
MARC::ControlField.new(tag, cleanup_fixed(self.value))
|
83
|
+
end
|
84
|
+
|
85
|
+
# Turn the current object into a datafield, without doing any checks
|
86
|
+
# @return [MARC::DataField]
|
87
|
+
def to_data_field
|
88
|
+
if self.value[0..1] != '$$'
|
89
|
+
log.error("#{self.line_number} #{self.id} Variable field #{self.tag} doesn't start with '$$'. Prepending '$$a'.")
|
90
|
+
self.value = '$$a' + self.value
|
91
|
+
end
|
92
|
+
|
93
|
+
subfields = parse_string_into_subfields(value)
|
94
|
+
f = MARC::DataField.new(tag, ind1, ind2)
|
95
|
+
f.subfields = subfields
|
96
|
+
return f
|
97
|
+
end
|
98
|
+
|
99
|
+
# Parse out a non-controlfield value string into a set of subfields
|
100
|
+
# @param [String] val the value string, of the form "$$athis is the a$$band the b"
|
101
|
+
# @return [Array<Subfield>] An array of MARC subfields
|
102
|
+
#
|
103
|
+
# If the first value in the array returned by the split isn't the empty string, then
|
104
|
+
# the string didn't start with '$$' and we should throw a warning
|
105
|
+
# (and put the value into a subfield 'a' if we're running in flexible mode)
|
106
|
+
|
107
|
+
def parse_string_into_subfields(val)
|
108
|
+
sfpairs = val.split(SUBFIELD_SPLIT_PATTERN)
|
109
|
+
initial_null_string = sfpairs.shift
|
110
|
+
unless initial_null_string == ''
|
111
|
+
# do something about the error
|
112
|
+
end
|
113
|
+
|
114
|
+
sfpairs.each_slice(2).map {|code, val| MARC::Subfield.new(code, val) }
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
# Clean up fixed fields/leader, turning Ex Libris characters back into normal characters
|
119
|
+
# @param [String] val The string to clean
|
120
|
+
# @return [String] The cleaned string
|
121
|
+
def cleanup_fixed(val)
|
122
|
+
return val.gsub(TURN_TO_SPACE, ' ')
|
123
|
+
end
|
124
|
+
|
125
|
+
# Set the tag. As a side effect, set the type when we set the tag
|
126
|
+
# type will end up as :leader, :control, :data, or :invalid_id
|
127
|
+
def tag=(t)
|
128
|
+
@tag = t
|
129
|
+
if t == 'LDR'
|
130
|
+
self.type = :leader
|
131
|
+
elsif MARC::ControlField.control_tag?(t)
|
132
|
+
self.type = :control
|
133
|
+
elsif self.valid_id?
|
134
|
+
self.type = :data
|
135
|
+
else
|
136
|
+
self.type = :invalid_id
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
# Get a line and parse it out into its componant parts
|
142
|
+
# @param [String] line the line to parse
|
143
|
+
# @return [Array] An array of the form [id, tag, ind1, ind2, value]
|
144
|
+
|
145
|
+
def parseline(line)
|
146
|
+
id = line[0,9]
|
147
|
+
tag = line[10,3]
|
148
|
+
ind1 = line[13,1]
|
149
|
+
ind2 = line[14,1]
|
150
|
+
value = line[18..-1]
|
151
|
+
return [id,tag,ind1,ind2,value]
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
end # ASLine
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require_relative 'asline'
|
2
|
+
require_relative 'error'
|
3
|
+
require_relative 'log'
|
4
|
+
|
5
|
+
|
6
|
+
module MARC
|
7
|
+
module AlephSequential
|
8
|
+
|
9
|
+
# A group of ASLine objects with logic to correctly turn them into a MARC::Record object
|
10
|
+
# @see ASLine
|
11
|
+
|
12
|
+
class ASLineGroup
|
13
|
+
|
14
|
+
include Log
|
15
|
+
|
16
|
+
# @!attribute aslines
|
17
|
+
# @return [Array<MARC::Field>] Internal list of MARC field object
|
18
|
+
attr_accessor :aslines
|
19
|
+
|
20
|
+
# @!attribute [r] leader
|
21
|
+
# @return [String] The leader string, pulled from whatever was passed in with a LDR tag
|
22
|
+
attr_reader :leader
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@aslines = []
|
28
|
+
@leader = nil
|
29
|
+
end
|
30
|
+
|
31
|
+
# Number of aslines already added
|
32
|
+
# @return Integer
|
33
|
+
def size
|
34
|
+
aslines.size
|
35
|
+
end
|
36
|
+
|
37
|
+
# Is this group empty?
|
38
|
+
def empty?
|
39
|
+
aslines.empty?
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# Add an ASLine object, turning it into the appropriate type of field as we go
|
44
|
+
# An ASLine object with type :invalid_id will be treated as a string and appended to
|
45
|
+
# the previous field (to deal with not-uncommon spurious newlines in data fields)
|
46
|
+
# @return [Undefined] side effect only
|
47
|
+
# @raise MARC::AlephSequential::Error when there's an invalid ID _and_ there's no previous
|
48
|
+
# field to concatentate it to.
|
49
|
+
|
50
|
+
def add(asline)
|
51
|
+
case asline.type
|
52
|
+
when :leader
|
53
|
+
if leader
|
54
|
+
log.warn("#{asline.line_number} #{asline.id} Set leader more than once; last one wins")
|
55
|
+
end
|
56
|
+
@leader = asline.value
|
57
|
+
when :invalid_id
|
58
|
+
lastfield = @aslines.pop
|
59
|
+
unless lastfield
|
60
|
+
raise MARC::AlephSequential::Error.new('unknown', asline.line_number),
|
61
|
+
"#{asline.line_number} has invalid id and no preivous line to concat it to (file starts bad?)"
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
log.info "#{asline.line_number} #{lastfield.id} / #{lastfield.tag} Concatenating line #{asline.line_number} to previous line"
|
65
|
+
@aslines.push ASLine.new(lastfield.rawstr + asline.rawstr, lastfield.line_number)
|
66
|
+
else
|
67
|
+
@aslines.push asline
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
# Add an asline as a raw string
|
73
|
+
def add_string(asline_string, line_number)
|
74
|
+
self.add(ASLine.new(asline_string, line_number))
|
75
|
+
end
|
76
|
+
|
77
|
+
# Turn this object into a MARC::Record
|
78
|
+
# @return [MARC::Record]
|
79
|
+
# @raise MARC::AlephSequential::Error if this object is empty
|
80
|
+
# @raise MARC::AlephSequential::Error if there's no leader
|
81
|
+
def as_record
|
82
|
+
if empty?
|
83
|
+
raise MARC::AlephSequential::Error.new('unknown', 'unknown'), "Can't turn an empty group into a record", nil
|
84
|
+
end
|
85
|
+
|
86
|
+
unless leader
|
87
|
+
raise MARC::AlephSequential::Error.new(@aslines[0].id, @aslines[0].line_number),
|
88
|
+
"Record #{@aslines[0].id} (near line #{ @aslines[0].line_number}) has no leader; can't turn into a record",
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
r = MARC::Record.new
|
92
|
+
r.leader = leader
|
93
|
+
aslines.map {|f| r << f.to_field}
|
94
|
+
return r
|
95
|
+
end
|
96
|
+
|
97
|
+
alias_method :to_record, :as_record
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require_relative 'asline'
|
2
|
+
require_relative 'buffered_linereader'
|
3
|
+
|
4
|
+
module MARC
|
5
|
+
module AlephSequential
|
6
|
+
|
7
|
+
class ASLineReader < BufferedLineReader
|
8
|
+
def process_raw(raw, line_number)
|
9
|
+
super
|
10
|
+
ASLine.new(raw, line_number)
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
module MARC
|
4
|
+
module AlephSequential
|
5
|
+
|
6
|
+
# AlephSequential is a line-oriented format, with the first field of each line
|
7
|
+
# indicating the record number. Rather than try to screw around with keeping track of
|
8
|
+
# the last line read, checking to see if we have one, blah blah blah, I'm going to use
|
9
|
+
# a buffered line reader class so I can #peek at the next line to know if its id
|
10
|
+
# is different than the current record.
|
11
|
+
|
12
|
+
class BufferedLineReader
|
13
|
+
|
14
|
+
include Enumerable
|
15
|
+
|
16
|
+
attr_accessor :buffer_size
|
17
|
+
attr_reader :underlying_line_number
|
18
|
+
|
19
|
+
def initialize(filename_or_io)
|
20
|
+
|
21
|
+
@passed_in = filename_or_io
|
22
|
+
|
23
|
+
@underlying_line_number = 0
|
24
|
+
@buffer_size = 10
|
25
|
+
@buffer = []
|
26
|
+
|
27
|
+
if filename_or_io.is_a? String
|
28
|
+
@handle = File.open(filename_or_io, 'r:utf-8')
|
29
|
+
if filename_or_io =~ /\.gz$/
|
30
|
+
@handle = Zlib::GzipReader.new(@handle)
|
31
|
+
end
|
32
|
+
elsif filename_or_io.respond_to?("read", 5)
|
33
|
+
@handle = filename_or_io
|
34
|
+
else
|
35
|
+
raise ArgumentError.new("BufferedLineReader needs an IO object or filename, got #{filename_or_io} (#{filename_or_io.inspect})")
|
36
|
+
end
|
37
|
+
|
38
|
+
@iter = @handle.enum_for(:each_line)
|
39
|
+
@finished = false
|
40
|
+
# Fill up the buffer
|
41
|
+
self.fillbuffer
|
42
|
+
end
|
43
|
+
|
44
|
+
def has_next?
|
45
|
+
return !(@finished && @buffer.size == 0)
|
46
|
+
end
|
47
|
+
|
48
|
+
def fillbuffer(buffer_size = @buffer_size)
|
49
|
+
begin
|
50
|
+
buffer_size.times do
|
51
|
+
raw = @iter.next
|
52
|
+
@underlying_line_number += 1
|
53
|
+
@buffer.push process_raw(raw, @underlying_line_number)
|
54
|
+
end
|
55
|
+
rescue StopIteration
|
56
|
+
@finished = true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Empty version here; can override for processing lines on the fly
|
61
|
+
def process_raw(raw, line_number)
|
62
|
+
raw
|
63
|
+
end
|
64
|
+
|
65
|
+
def next
|
66
|
+
raise StopIteration, "End of #{@passed_in}", nil if @buffer.size == 0
|
67
|
+
rv = @buffer.shift
|
68
|
+
fillbuffer if @buffer.size == 0
|
69
|
+
rv
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
def peek
|
74
|
+
fillbuffer unless @buffer.size > 0
|
75
|
+
@buffer[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
def each
|
79
|
+
begin
|
80
|
+
while true
|
81
|
+
yield self.next
|
82
|
+
end
|
83
|
+
rescue StopIteration
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
alias_method :each_line, :each
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module MARC
|
2
|
+
module AlephSequential
|
3
|
+
|
4
|
+
module Log
|
5
|
+
|
6
|
+
def self.log
|
7
|
+
@log
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.log=(log)
|
11
|
+
@log = log
|
12
|
+
end
|
13
|
+
|
14
|
+
def log
|
15
|
+
Log.log ||= Yell.new STDERR, :level => [ :warn, :error], :name=>'MAS'
|
16
|
+
Log.log
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|