traject_horizon 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c445af099fb059edc31fcf8a82737032e97ea149
|
4
|
+
data.tar.gz: 3db072af3d1202b9235d4a0dbd9c3fcd2c29699f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 53780c5c0e8eb1d3f3929f4bb3b42f688decfafddd0c1e38ad2920b2f9e5ec690cbd656255d3d9d4f44e10bcc73b3868ce203d201e54f212360d0ffff613fca6
|
7
|
+
data.tar.gz: af84bb6b374d0bf2f4489361c9cf524b2670511369eefe0ec615f3d5b7bf0fde5950ac17466d5659fde59192e68be0f28bc531214b19faf5a993d05a9f70d49b
|
@@ -122,6 +122,10 @@ module Traject
|
|
122
122
|
# to UTF8. These character references are oddly legal representations of UTF8 in
|
123
123
|
# MARC8. http://www.loc.gov/marc/specifications/speccharconversion.html#lossless
|
124
124
|
#
|
125
|
+
# Note HorizonReader will also remove control chars from output (except for ones
|
126
|
+
# with legal meaning in binary MARC) -- these are errors in Horizon db which mean
|
127
|
+
# nothing, are illegal in MARC binary serialization, and can mess things up.
|
128
|
+
#
|
125
129
|
# == Misc
|
126
130
|
#
|
127
131
|
# [horizon.batch_size] Batch size to use for fetching item/copy info on each bib. Default 400.
|
@@ -253,14 +257,17 @@ module Traject
|
|
253
257
|
end
|
254
258
|
|
255
259
|
# Converts from Marc8 to UTF8 if neccesary.
|
256
|
-
#
|
260
|
+
#
|
261
|
+
# Also replaces escaped unicode codepoints using custom Horizon "<U+nnnn>" format
|
262
|
+
# Or standard MARC 'lossless encoding' "&#xHHHH;" format.
|
257
263
|
def convert_text!(text, error_handler)
|
258
264
|
text = AnselToUnicode.new(error_handler, true).convert(text) if convert_marc8_to_utf8?
|
259
265
|
|
260
266
|
# Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
|
261
267
|
# codepoint, turn it UTF8 for that codepoint
|
262
268
|
if settings["horizon.destination_encoding"] == "UTF8" &&
|
263
|
-
settings["horizon.codepoint_translate"].to_s == "true" ||
|
269
|
+
(settings["horizon.codepoint_translate"].to_s == "true" ||
|
270
|
+
settings["horizon.character_reference_translate"].to_s == "true")
|
264
271
|
|
265
272
|
regexp = if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.character_reference_translate"].to_s == "true"
|
266
273
|
# unicode codepoint in either HTML char reference form OR
|
@@ -278,6 +285,12 @@ module Traject
|
|
278
285
|
end
|
279
286
|
end
|
280
287
|
|
288
|
+
# eliminate illegal control chars. All ASCII less than 0x20
|
289
|
+
# _except_ for four legal ones (including MARC delimiters).
|
290
|
+
# http://www.loc.gov/marc/specifications/specchargeneral.html#controlfunction
|
291
|
+
# this is all bytes from 0x00 to 0x19 except for the allowed 1B, 1D, 1E, 1F.
|
292
|
+
text.gsub!(/[\x00-\x1A\x1C]/, '')
|
293
|
+
|
281
294
|
return text
|
282
295
|
end
|
283
296
|
|
@@ -21,6 +21,12 @@ describe "turning weird Horizon escape sequences into unicode" do
|
|
21
21
|
assert_equal "Weird \u200F but these aren't changed #x2000; ÈF etc.", converted
|
22
22
|
end
|
23
23
|
|
24
|
+
it "erases illegal control chars" do
|
25
|
+
converted = @reader.convert_text!("Some illegal control chars: '\x07' '\x02'; and a legal one: '\x1F'.", org.marc4j.ErrorHandler.new)
|
26
|
+
|
27
|
+
assert_equal "Some illegal control chars: '' ''; and a legal one: '\x1F'.", converted
|
28
|
+
end
|
29
|
+
|
24
30
|
end
|
25
31
|
|
26
32
|
end
|
data/traject_horizon.gemspec
CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.add_dependency "traject"
|
21
21
|
|
22
22
|
spec.add_dependency "marc-marc4j" # for marc4j jar files
|
23
|
+
spec.add_dependency "ensure_valid_encoding", ">= 0.5.3"
|
23
24
|
|
24
25
|
spec.add_development_dependency "bundler", "~> 1.3"
|
25
26
|
spec.add_development_dependency "rake"
|
metadata
CHANGED
@@ -1,97 +1,100 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject_horizon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jonathan Rochkind
|
9
|
-
autorequire:
|
8
|
+
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
11
|
+
date: 2013-11-11 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: traject
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
16
20
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
21
|
requirements:
|
19
|
-
- -
|
22
|
+
- - '>='
|
20
23
|
- !ruby/object:Gem::Version
|
21
24
|
version: '0'
|
22
|
-
type: :runtime
|
23
25
|
prerelease: false
|
26
|
+
type: :runtime
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: marc-marc4j
|
24
29
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
30
|
requirements:
|
27
|
-
- -
|
31
|
+
- - '>='
|
28
32
|
- !ruby/object:Gem::Version
|
29
33
|
version: '0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: marc-marc4j
|
32
34
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
35
|
requirements:
|
35
|
-
- -
|
36
|
+
- - '>='
|
36
37
|
- !ruby/object:Gem::Version
|
37
38
|
version: '0'
|
38
|
-
type: :runtime
|
39
39
|
prerelease: false
|
40
|
+
type: :runtime
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ensure_valid_encoding
|
40
43
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
44
|
requirements:
|
43
|
-
- -
|
45
|
+
- - '>='
|
44
46
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
|
-
name: bundler
|
47
|
+
version: 0.5.3
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
49
|
requirements:
|
51
|
-
- -
|
50
|
+
- - '>='
|
52
51
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
54
|
-
type: :development
|
52
|
+
version: 0.5.3
|
55
53
|
prerelease: false
|
54
|
+
type: :runtime
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
56
57
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '1.3'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: rake
|
64
62
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
63
|
requirements:
|
67
|
-
- -
|
64
|
+
- - ~>
|
68
65
|
- !ruby/object:Gem::Version
|
69
|
-
version: '
|
70
|
-
type: :development
|
66
|
+
version: '1.3'
|
71
67
|
prerelease: false
|
68
|
+
type: :development
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
72
71
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
72
|
requirements:
|
75
|
-
- -
|
73
|
+
- - '>='
|
76
74
|
- !ruby/object:Gem::Version
|
77
75
|
version: '0'
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
|
-
name: minitest
|
80
76
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
77
|
requirements:
|
83
|
-
- -
|
78
|
+
- - '>='
|
84
79
|
- !ruby/object:Gem::Version
|
85
80
|
version: '0'
|
86
|
-
type: :development
|
87
81
|
prerelease: false
|
82
|
+
type: :development
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: minitest
|
88
85
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
86
|
requirements:
|
91
|
-
- -
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
requirement: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - '>='
|
92
93
|
- !ruby/object:Gem::Version
|
93
94
|
version: '0'
|
94
|
-
|
95
|
+
prerelease: false
|
96
|
+
type: :development
|
97
|
+
description:
|
95
98
|
email:
|
96
99
|
- jonathan@dnil.net
|
97
100
|
executables: []
|
@@ -108,7 +111,7 @@ files:
|
|
108
111
|
- lib/traject_horizon.rb
|
109
112
|
- lib/traject_horizon/version.rb
|
110
113
|
- test/horizon_bib_auth_merge_test.rb
|
111
|
-
- test/
|
114
|
+
- test/horizon_convert_text_test.rb
|
112
115
|
- test/test_helper.rb
|
113
116
|
- traject_horizon.gemspec
|
114
117
|
- vendor/jtds/.DS_Store
|
@@ -116,30 +119,28 @@ files:
|
|
116
119
|
homepage: http://github.com/jrochkind/traject_horizon
|
117
120
|
licenses:
|
118
121
|
- MIT
|
119
|
-
|
122
|
+
metadata: {}
|
123
|
+
post_install_message:
|
120
124
|
rdoc_options: []
|
121
125
|
require_paths:
|
122
126
|
- lib
|
123
127
|
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
-
none: false
|
125
128
|
requirements:
|
126
|
-
- -
|
129
|
+
- - '>='
|
127
130
|
- !ruby/object:Gem::Version
|
128
131
|
version: '0'
|
129
132
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
-
none: false
|
131
133
|
requirements:
|
132
|
-
- -
|
134
|
+
- - '>='
|
133
135
|
- !ruby/object:Gem::Version
|
134
136
|
version: '0'
|
135
137
|
requirements: []
|
136
|
-
rubyforge_project:
|
137
|
-
rubygems_version: 1.
|
138
|
-
signing_key:
|
139
|
-
specification_version:
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 2.1.9
|
140
|
+
signing_key:
|
141
|
+
specification_version: 4
|
140
142
|
summary: Horizon ILS MARC Exporter, a plugin for the traject tool
|
141
143
|
test_files:
|
142
144
|
- test/horizon_bib_auth_merge_test.rb
|
143
|
-
- test/
|
145
|
+
- test/horizon_convert_text_test.rb
|
144
146
|
- test/test_helper.rb
|
145
|
-
has_rdoc:
|