marc-fastxmlwriter 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGES.md +13 -0
- data/Gemfile +1 -1
- data/Rakefile +3 -3
- data/lib/marc/fastxmlwriter/version.rb +1 -1
- data/lib/marc/fastxmlwriter.rb +38 -45
- data/marc-fastxmlwriter.gemspec +16 -16
- data/test/minitest_helper.rb +7 -8
- data/test/round_trip_spec.rb +28 -12
- metadata +22 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 88bd084968e4c8a4faa47fee6a669a27a8d08cb87625698f224955666bf000c5
|
4
|
+
data.tar.gz: c886c3633285a5411f3759dacb8efb42cfecdd9cf3a989c645cc23c8ebac3bdf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3ffb1f390f6d922f5eb7e3d285ff3f4795a7d6281116dbc4474daf1c946d87f0651ea2482ac7a59d083df59934b06a716a38a945b9b66eb3ffc75822288094d
|
7
|
+
data.tar.gz: 6a63ff5f03c1ac97e7db88c524ef3be5e194c6c6d520312bacca9766a396843b0f9e5df33c9ed8f28529ecdac5774bf47a7bf6736acdcb4df6cc68882f011710
|
data/CHANGES.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# MARC::FastXMLWriter
|
2
|
+
|
3
|
+
## 1.1.0 Fix namespace code
|
4
|
+
|
5
|
+
The The supposed `include_namespace` code wasn't actually using
|
6
|
+
the namespace (`xmlns="http://www.loc.gov/..."`), only
|
7
|
+
defining it (`xmlns:marc="http://www.loc.gov/...`), thus producing
|
8
|
+
files were not, in fact, namespaced.
|
9
|
+
|
10
|
+
Also adds tests against nokogiri as well as rexml
|
11
|
+
|
12
|
+
### 1.0.0 First release
|
13
|
+
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -3,9 +3,9 @@ require "bundler/gem_tasks"
|
|
3
3
|
require "rake/testtask"
|
4
4
|
|
5
5
|
Rake::TestTask.new(:test) do |t|
|
6
|
-
t.pattern =
|
6
|
+
t.pattern = "test/**/*_spec.rb"
|
7
7
|
t.libs << "test"
|
8
8
|
end
|
9
9
|
|
10
|
-
task :
|
11
|
-
task :
|
10
|
+
task spec: :test
|
11
|
+
task default: :test
|
data/lib/marc/fastxmlwriter.rb
CHANGED
@@ -1,90 +1,83 @@
|
|
1
1
|
require "marc/fastxmlwriter/version"
|
2
2
|
|
3
|
-
require
|
3
|
+
require "marc"
|
4
4
|
|
5
5
|
module MARC
|
6
6
|
class FastXMLWriter < MARC::XMLWriter
|
7
|
-
|
8
7
|
XML_HEADER = '<?xml version="1.0" encoding="UTF-8"?>'
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
def initialize(file, opts={})
|
8
|
+
|
9
|
+
OPEN_COLLECTION = "<collection>"
|
10
|
+
OPEN_COLLECTION_NAMESPACE = %(<collection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/MARC21/slim" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">)
|
11
|
+
|
12
|
+
def initialize(file, opts = {})
|
16
13
|
super
|
17
|
-
end
|
18
|
-
|
14
|
+
end
|
15
|
+
|
19
16
|
def write(record)
|
20
17
|
@fh.write(self.class.encode(record))
|
21
18
|
# @fh.write("\n")
|
22
19
|
end
|
23
|
-
|
20
|
+
|
24
21
|
class << self
|
25
|
-
|
26
22
|
def open_collection(use_ns)
|
27
23
|
if use_ns
|
28
|
-
|
24
|
+
OPEN_COLLECTION_NAMESPACE.dup
|
29
25
|
else
|
30
|
-
|
26
|
+
OPEN_COLLECTION.dup
|
31
27
|
end
|
32
28
|
end
|
33
|
-
|
34
|
-
|
35
|
-
def single_record_document(r, opts={})
|
36
|
-
|
29
|
+
|
30
|
+
def single_record_document(r, include_namespace: true)
|
37
31
|
xml = XML_HEADER.dup
|
38
|
-
xml << open_collection(
|
39
|
-
xml << encode(r
|
40
|
-
xml <<
|
32
|
+
xml << open_collection(include_namespace)
|
33
|
+
xml << encode(r)
|
34
|
+
xml << "</collection>"
|
41
35
|
xml
|
42
36
|
end
|
43
|
-
|
37
|
+
|
44
38
|
def open_datafield(tag, ind1, ind2)
|
45
|
-
|
46
|
-
return "<datafield tag=\"#{tag}\" ind1=\"#{ind1}\" ind2=\"#{ind2}\">"
|
39
|
+
"<datafield tag=\"#{tag}\" ind1=\"#{ind1}\" ind2=\"#{ind2}\">"
|
47
40
|
end
|
48
|
-
|
41
|
+
|
49
42
|
def open_subfield(code)
|
50
43
|
# return "\n <subfield code=\"#{code}\">"
|
51
|
-
|
44
|
+
"<subfield code=\"#{code}\">"
|
52
45
|
end
|
53
|
-
|
46
|
+
|
54
47
|
def open_controlfield(tag)
|
55
48
|
# return "\n<controlfield tag=\"#{tag}\">"
|
56
|
-
|
49
|
+
"<controlfield tag=\"#{tag}\">"
|
57
50
|
end
|
58
|
-
|
59
|
-
def encode(r
|
51
|
+
|
52
|
+
def encode(r)
|
60
53
|
xml = "<record>"
|
61
|
-
|
54
|
+
|
62
55
|
# MARCXML only allows alphanumerics or spaces in the leader
|
63
|
-
lead = r.leader.gsub(/[^\w|^\s]/,
|
56
|
+
lead = r.leader.gsub(/[^\w|^\s]/, "Z").encode(xml: :text)
|
64
57
|
|
65
58
|
# MARCXML is particular about last four characters; ILSes aren't
|
66
|
-
lead.ljust(23,
|
59
|
+
lead.ljust(23, " ")[20..23] = "4500"
|
67
60
|
|
68
61
|
# MARCXML doesn't like a space here so we need a filler character: Z
|
69
|
-
if
|
62
|
+
if lead[6..6] == " "
|
70
63
|
lead[6..6] = "Z"
|
71
64
|
end
|
72
|
-
|
73
|
-
xml << "<leader>" << lead.encode(:
|
65
|
+
|
66
|
+
xml << "<leader>" << lead.encode(xml: :text) << "</leader>"
|
74
67
|
r.each do |f|
|
75
|
-
if f.
|
68
|
+
if f.instance_of?(MARC::DataField)
|
76
69
|
xml << open_datafield(f.tag, f.indicator1, f.indicator2)
|
77
70
|
f.each do |sf|
|
78
|
-
xml << open_subfield(sf.code) << sf.value.encode(:
|
71
|
+
xml << open_subfield(sf.code) << sf.value.encode(xml: :text) << "</subfield>"
|
79
72
|
end
|
80
|
-
xml <<
|
81
|
-
elsif f.
|
82
|
-
xml << open_controlfield(f.tag) << f.value.encode(:
|
73
|
+
xml << "</datafield>"
|
74
|
+
elsif f.instance_of?(MARC::ControlField)
|
75
|
+
xml << open_controlfield(f.tag) << f.value.encode(xml: :text) << "</controlfield>"
|
83
76
|
end
|
84
77
|
end
|
85
|
-
xml <<
|
86
|
-
|
78
|
+
xml << "</record>"
|
79
|
+
xml.force_encoding("utf-8")
|
87
80
|
end
|
88
81
|
end
|
89
82
|
end
|
90
|
-
end
|
83
|
+
end
|
data/marc-fastxmlwriter.gemspec
CHANGED
@@ -1,24 +1,24 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require "marc/fastxmlwriter/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
11
|
-
spec.summary
|
12
|
-
spec.homepage
|
13
|
-
spec.license
|
6
|
+
spec.name = "marc-fastxmlwriter"
|
7
|
+
spec.version = Marc::FastXMLWriter::VERSION
|
8
|
+
spec.authors = ["Bill Dueber"]
|
9
|
+
spec.email = ["bill@dueber.com"]
|
10
|
+
spec.summary = "Faster (but unverified) MARC-XML from a MARC Record"
|
11
|
+
spec.homepage = "https://github.com/billdueber/marc-fastxmlwriter"
|
12
|
+
spec.license = "MIT"
|
14
13
|
|
15
|
-
spec.files
|
16
|
-
spec.executables
|
17
|
-
spec.test_files
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
17
|
spec.require_paths = ["lib"]
|
19
18
|
|
20
|
-
spec.add_dependency
|
21
|
-
spec.add_development_dependency "bundler", "~>
|
22
|
-
spec.add_development_dependency "rake", "~>
|
19
|
+
spec.add_dependency "marc", "~>1.0"
|
20
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
21
|
+
spec.add_development_dependency "rake", "~>13"
|
23
22
|
spec.add_development_dependency "minitest", "~> 5.0"
|
23
|
+
spec.add_development_dependency "nokogiri", "~> 1.0"
|
24
24
|
end
|
data/test/minitest_helper.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
-
$LOAD_PATH.unshift File.expand_path(
|
2
|
-
require
|
3
|
-
|
4
|
-
require 'minitest'
|
5
|
-
require 'minitest/spec'
|
6
|
-
require 'minitest/autorun'
|
1
|
+
$LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
require "marc/fastxmlwriter"
|
7
3
|
|
4
|
+
require "minitest"
|
5
|
+
require "minitest/spec"
|
6
|
+
require "minitest/autorun"
|
8
7
|
|
9
8
|
def test_data_dir
|
10
|
-
|
9
|
+
File.expand_path(File.join(File.dirname(__FILE__), "test_data"))
|
11
10
|
end
|
12
11
|
|
13
12
|
def test_data(relative_path)
|
14
|
-
|
13
|
+
File.expand_path(File.join("test_data", relative_path), File.dirname(__FILE__))
|
15
14
|
end
|
data/test/round_trip_spec.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "minitest_helper"
|
2
|
+
require "marc"
|
3
|
+
require "stringio"
|
4
|
+
require "nokogiri"
|
4
5
|
|
5
6
|
describe "loads" do
|
6
7
|
it "loads the constant" do
|
@@ -8,19 +9,34 @@ describe "loads" do
|
|
8
9
|
end
|
9
10
|
end
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
ROUND_TRIP_FILES = Dir.glob(test_data_dir + '/*')
|
12
|
+
ROUND_TRIP_FILES = Dir.glob(test_data_dir + "/*")
|
14
13
|
|
15
14
|
describe "round-trip tests" do
|
16
|
-
|
15
|
+
describe "Using namespace" do
|
16
|
+
describe "rexml" do
|
17
|
+
ROUND_TRIP_FILES.each do |filename|
|
18
|
+
MARC::Reader.new(filename).each_with_index do |r1, i|
|
19
|
+
it "round-trips to/from xml and MARC::Record with namespace" do
|
20
|
+
use_namespace = true
|
21
|
+
xml = MARC::FastXMLWriter.single_record_document(r1, include_namespace: use_namespace)
|
22
|
+
srexml = StringIO.new(xml.dup)
|
23
|
+
r3 = MARC::XMLReader.new(srexml, parser: "rexml", ignore_namespace: !use_namespace).first
|
24
|
+
assert_equal r1, r3, "File #{filename} record #{i}, rexml with include_namespace = #{use_namespace}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "nokogiri" do
|
17
32
|
ROUND_TRIP_FILES.each do |filename|
|
18
33
|
MARC::Reader.new(filename).each_with_index do |r1, i|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
34
|
+
it "round-trips to/from xml and MARC::Record with namespace" do
|
35
|
+
use_namespace = true
|
36
|
+
xml = MARC::FastXMLWriter.single_record_document(r1, include_namespace: use_namespace)
|
37
|
+
srexml = StringIO.new(xml.dup)
|
38
|
+
r3 = MARC::XMLReader.new(srexml, parser: "nokogiri", ignore_namespace: !use_namespace).first
|
39
|
+
assert_equal r1, r3, "File #{filename} record #{i}, nokogiri with include_namespace = #{use_namespace}"
|
24
40
|
end
|
25
41
|
end
|
26
42
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc-fastxmlwriter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Dueber
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: marc
|
@@ -30,28 +30,28 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '2.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '2.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '13'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '13'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: minitest
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '5.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: nokogiri
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.0'
|
69
83
|
description:
|
70
84
|
email:
|
71
85
|
- bill@dueber.com
|
@@ -75,6 +89,7 @@ extra_rdoc_files: []
|
|
75
89
|
files:
|
76
90
|
- ".gitignore"
|
77
91
|
- ".travis.yml"
|
92
|
+
- CHANGES.md
|
78
93
|
- Gemfile
|
79
94
|
- LICENSE.txt
|
80
95
|
- README.md
|
@@ -110,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
125
|
- !ruby/object:Gem::Version
|
111
126
|
version: '0'
|
112
127
|
requirements: []
|
113
|
-
|
114
|
-
rubygems_version: 2.4.5
|
128
|
+
rubygems_version: 3.1.2
|
115
129
|
signing_key:
|
116
130
|
specification_version: 4
|
117
131
|
summary: Faster (but unverified) MARC-XML from a MARC Record
|
@@ -125,4 +139,3 @@ test_files:
|
|
125
139
|
- test/test_data/random_tag_order2.dat
|
126
140
|
- test/test_data/utf8.marc
|
127
141
|
- test/test_data/utf8_multirecord.marc
|
128
|
-
has_rdoc:
|