marc-fastxmlwriter 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGES.md +13 -0
- data/Gemfile +1 -1
- data/Rakefile +3 -3
- data/lib/marc/fastxmlwriter/version.rb +1 -1
- data/lib/marc/fastxmlwriter.rb +38 -45
- data/marc-fastxmlwriter.gemspec +16 -16
- data/test/minitest_helper.rb +7 -8
- data/test/round_trip_spec.rb +28 -12
- metadata +22 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 88bd084968e4c8a4faa47fee6a669a27a8d08cb87625698f224955666bf000c5
|
4
|
+
data.tar.gz: c886c3633285a5411f3759dacb8efb42cfecdd9cf3a989c645cc23c8ebac3bdf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3ffb1f390f6d922f5eb7e3d285ff3f4795a7d6281116dbc4474daf1c946d87f0651ea2482ac7a59d083df59934b06a716a38a945b9b66eb3ffc75822288094d
|
7
|
+
data.tar.gz: 6a63ff5f03c1ac97e7db88c524ef3be5e194c6c6d520312bacca9766a396843b0f9e5df33c9ed8f28529ecdac5774bf47a7bf6736acdcb4df6cc68882f011710
|
data/CHANGES.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# MARC::FastXMLWriter
|
2
|
+
|
3
|
+
## 1.1.0 Fix namespace code
|
4
|
+
|
5
|
+
The The supposed `include_namespace` code wasn't actually using
|
6
|
+
the namespace (`xmlns="http://www.loc.gov/..."`), only
|
7
|
+
defining it (`xmlns:marc="http://www.loc.gov/...`), thus producing
|
8
|
+
files were not, in fact, namespaced.
|
9
|
+
|
10
|
+
Also adds tests against nokogiri as well as rexml
|
11
|
+
|
12
|
+
### 1.0.0 First release
|
13
|
+
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -3,9 +3,9 @@ require "bundler/gem_tasks"
|
|
3
3
|
require "rake/testtask"
|
4
4
|
|
5
5
|
Rake::TestTask.new(:test) do |t|
|
6
|
-
t.pattern =
|
6
|
+
t.pattern = "test/**/*_spec.rb"
|
7
7
|
t.libs << "test"
|
8
8
|
end
|
9
9
|
|
10
|
-
task :
|
11
|
-
task :
|
10
|
+
task spec: :test
|
11
|
+
task default: :test
|
data/lib/marc/fastxmlwriter.rb
CHANGED
@@ -1,90 +1,83 @@
|
|
1
1
|
require "marc/fastxmlwriter/version"
|
2
2
|
|
3
|
-
require
|
3
|
+
require "marc"
|
4
4
|
|
5
5
|
module MARC
|
6
6
|
class FastXMLWriter < MARC::XMLWriter
|
7
|
-
|
8
7
|
XML_HEADER = '<?xml version="1.0" encoding="UTF-8"?>'
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
def initialize(file, opts={})
|
8
|
+
|
9
|
+
OPEN_COLLECTION = "<collection>"
|
10
|
+
OPEN_COLLECTION_NAMESPACE = %(<collection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/MARC21/slim" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">)
|
11
|
+
|
12
|
+
def initialize(file, opts = {})
|
16
13
|
super
|
17
|
-
end
|
18
|
-
|
14
|
+
end
|
15
|
+
|
19
16
|
def write(record)
|
20
17
|
@fh.write(self.class.encode(record))
|
21
18
|
# @fh.write("\n")
|
22
19
|
end
|
23
|
-
|
20
|
+
|
24
21
|
class << self
|
25
|
-
|
26
22
|
def open_collection(use_ns)
|
27
23
|
if use_ns
|
28
|
-
|
24
|
+
OPEN_COLLECTION_NAMESPACE.dup
|
29
25
|
else
|
30
|
-
|
26
|
+
OPEN_COLLECTION.dup
|
31
27
|
end
|
32
28
|
end
|
33
|
-
|
34
|
-
|
35
|
-
def single_record_document(r, opts={})
|
36
|
-
|
29
|
+
|
30
|
+
def single_record_document(r, include_namespace: true)
|
37
31
|
xml = XML_HEADER.dup
|
38
|
-
xml << open_collection(
|
39
|
-
xml << encode(r
|
40
|
-
xml <<
|
32
|
+
xml << open_collection(include_namespace)
|
33
|
+
xml << encode(r)
|
34
|
+
xml << "</collection>"
|
41
35
|
xml
|
42
36
|
end
|
43
|
-
|
37
|
+
|
44
38
|
def open_datafield(tag, ind1, ind2)
|
45
|
-
|
46
|
-
return "<datafield tag=\"#{tag}\" ind1=\"#{ind1}\" ind2=\"#{ind2}\">"
|
39
|
+
"<datafield tag=\"#{tag}\" ind1=\"#{ind1}\" ind2=\"#{ind2}\">"
|
47
40
|
end
|
48
|
-
|
41
|
+
|
49
42
|
def open_subfield(code)
|
50
43
|
# return "\n <subfield code=\"#{code}\">"
|
51
|
-
|
44
|
+
"<subfield code=\"#{code}\">"
|
52
45
|
end
|
53
|
-
|
46
|
+
|
54
47
|
def open_controlfield(tag)
|
55
48
|
# return "\n<controlfield tag=\"#{tag}\">"
|
56
|
-
|
49
|
+
"<controlfield tag=\"#{tag}\">"
|
57
50
|
end
|
58
|
-
|
59
|
-
def encode(r
|
51
|
+
|
52
|
+
def encode(r)
|
60
53
|
xml = "<record>"
|
61
|
-
|
54
|
+
|
62
55
|
# MARCXML only allows alphanumerics or spaces in the leader
|
63
|
-
lead = r.leader.gsub(/[^\w|^\s]/,
|
56
|
+
lead = r.leader.gsub(/[^\w|^\s]/, "Z").encode(xml: :text)
|
64
57
|
|
65
58
|
# MARCXML is particular about last four characters; ILSes aren't
|
66
|
-
lead.ljust(23,
|
59
|
+
lead.ljust(23, " ")[20..23] = "4500"
|
67
60
|
|
68
61
|
# MARCXML doesn't like a space here so we need a filler character: Z
|
69
|
-
if
|
62
|
+
if lead[6..6] == " "
|
70
63
|
lead[6..6] = "Z"
|
71
64
|
end
|
72
|
-
|
73
|
-
xml << "<leader>" << lead.encode(:
|
65
|
+
|
66
|
+
xml << "<leader>" << lead.encode(xml: :text) << "</leader>"
|
74
67
|
r.each do |f|
|
75
|
-
if f.
|
68
|
+
if f.instance_of?(MARC::DataField)
|
76
69
|
xml << open_datafield(f.tag, f.indicator1, f.indicator2)
|
77
70
|
f.each do |sf|
|
78
|
-
xml << open_subfield(sf.code) << sf.value.encode(:
|
71
|
+
xml << open_subfield(sf.code) << sf.value.encode(xml: :text) << "</subfield>"
|
79
72
|
end
|
80
|
-
xml <<
|
81
|
-
elsif f.
|
82
|
-
xml << open_controlfield(f.tag) << f.value.encode(:
|
73
|
+
xml << "</datafield>"
|
74
|
+
elsif f.instance_of?(MARC::ControlField)
|
75
|
+
xml << open_controlfield(f.tag) << f.value.encode(xml: :text) << "</controlfield>"
|
83
76
|
end
|
84
77
|
end
|
85
|
-
xml <<
|
86
|
-
|
78
|
+
xml << "</record>"
|
79
|
+
xml.force_encoding("utf-8")
|
87
80
|
end
|
88
81
|
end
|
89
82
|
end
|
90
|
-
end
|
83
|
+
end
|
data/marc-fastxmlwriter.gemspec
CHANGED
@@ -1,24 +1,24 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require "marc/fastxmlwriter/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
11
|
-
spec.summary
|
12
|
-
spec.homepage
|
13
|
-
spec.license
|
6
|
+
spec.name = "marc-fastxmlwriter"
|
7
|
+
spec.version = Marc::FastXMLWriter::VERSION
|
8
|
+
spec.authors = ["Bill Dueber"]
|
9
|
+
spec.email = ["bill@dueber.com"]
|
10
|
+
spec.summary = "Faster (but unverified) MARC-XML from a MARC Record"
|
11
|
+
spec.homepage = "https://github.com/billdueber/marc-fastxmlwriter"
|
12
|
+
spec.license = "MIT"
|
14
13
|
|
15
|
-
spec.files
|
16
|
-
spec.executables
|
17
|
-
spec.test_files
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
17
|
spec.require_paths = ["lib"]
|
19
18
|
|
20
|
-
spec.add_dependency
|
21
|
-
spec.add_development_dependency "bundler", "~>
|
22
|
-
spec.add_development_dependency "rake", "~>
|
19
|
+
spec.add_dependency "marc", "~>1.0"
|
20
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
21
|
+
spec.add_development_dependency "rake", "~>13"
|
23
22
|
spec.add_development_dependency "minitest", "~> 5.0"
|
23
|
+
spec.add_development_dependency "nokogiri", "~> 1.0"
|
24
24
|
end
|
data/test/minitest_helper.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
-
$LOAD_PATH.unshift File.expand_path(
|
2
|
-
require
|
3
|
-
|
4
|
-
require 'minitest'
|
5
|
-
require 'minitest/spec'
|
6
|
-
require 'minitest/autorun'
|
1
|
+
$LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
require "marc/fastxmlwriter"
|
7
3
|
|
4
|
+
require "minitest"
|
5
|
+
require "minitest/spec"
|
6
|
+
require "minitest/autorun"
|
8
7
|
|
9
8
|
def test_data_dir
|
10
|
-
|
9
|
+
File.expand_path(File.join(File.dirname(__FILE__), "test_data"))
|
11
10
|
end
|
12
11
|
|
13
12
|
def test_data(relative_path)
|
14
|
-
|
13
|
+
File.expand_path(File.join("test_data", relative_path), File.dirname(__FILE__))
|
15
14
|
end
|
data/test/round_trip_spec.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "minitest_helper"
|
2
|
+
require "marc"
|
3
|
+
require "stringio"
|
4
|
+
require "nokogiri"
|
4
5
|
|
5
6
|
describe "loads" do
|
6
7
|
it "loads the constant" do
|
@@ -8,19 +9,34 @@ describe "loads" do
|
|
8
9
|
end
|
9
10
|
end
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
ROUND_TRIP_FILES = Dir.glob(test_data_dir + '/*')
|
12
|
+
ROUND_TRIP_FILES = Dir.glob(test_data_dir + "/*")
|
14
13
|
|
15
14
|
describe "round-trip tests" do
|
16
|
-
|
15
|
+
describe "Using namespace" do
|
16
|
+
describe "rexml" do
|
17
|
+
ROUND_TRIP_FILES.each do |filename|
|
18
|
+
MARC::Reader.new(filename).each_with_index do |r1, i|
|
19
|
+
it "round-trips to/from xml and MARC::Record with namespace" do
|
20
|
+
use_namespace = true
|
21
|
+
xml = MARC::FastXMLWriter.single_record_document(r1, include_namespace: use_namespace)
|
22
|
+
srexml = StringIO.new(xml.dup)
|
23
|
+
r3 = MARC::XMLReader.new(srexml, parser: "rexml", ignore_namespace: !use_namespace).first
|
24
|
+
assert_equal r1, r3, "File #{filename} record #{i}, rexml with include_namespace = #{use_namespace}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "nokogiri" do
|
17
32
|
ROUND_TRIP_FILES.each do |filename|
|
18
33
|
MARC::Reader.new(filename).each_with_index do |r1, i|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
34
|
+
it "round-trips to/from xml and MARC::Record with namespace" do
|
35
|
+
use_namespace = true
|
36
|
+
xml = MARC::FastXMLWriter.single_record_document(r1, include_namespace: use_namespace)
|
37
|
+
srexml = StringIO.new(xml.dup)
|
38
|
+
r3 = MARC::XMLReader.new(srexml, parser: "nokogiri", ignore_namespace: !use_namespace).first
|
39
|
+
assert_equal r1, r3, "File #{filename} record #{i}, nokogiri with include_namespace = #{use_namespace}"
|
24
40
|
end
|
25
41
|
end
|
26
42
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc-fastxmlwriter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Dueber
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: marc
|
@@ -30,28 +30,28 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '2.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '2.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '13'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '13'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: minitest
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '5.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: nokogiri
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.0'
|
69
83
|
description:
|
70
84
|
email:
|
71
85
|
- bill@dueber.com
|
@@ -75,6 +89,7 @@ extra_rdoc_files: []
|
|
75
89
|
files:
|
76
90
|
- ".gitignore"
|
77
91
|
- ".travis.yml"
|
92
|
+
- CHANGES.md
|
78
93
|
- Gemfile
|
79
94
|
- LICENSE.txt
|
80
95
|
- README.md
|
@@ -110,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
125
|
- !ruby/object:Gem::Version
|
111
126
|
version: '0'
|
112
127
|
requirements: []
|
113
|
-
|
114
|
-
rubygems_version: 2.4.5
|
128
|
+
rubygems_version: 3.1.2
|
115
129
|
signing_key:
|
116
130
|
specification_version: 4
|
117
131
|
summary: Faster (but unverified) MARC-XML from a MARC Record
|
@@ -125,4 +139,3 @@ test_files:
|
|
125
139
|
- test/test_data/random_tag_order2.dat
|
126
140
|
- test/test_data/utf8.marc
|
127
141
|
- test/test_data/utf8_multirecord.marc
|
128
|
-
has_rdoc:
|