traject_horizon 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c445af099fb059edc31fcf8a82737032e97ea149
|
4
|
+
data.tar.gz: 3db072af3d1202b9235d4a0dbd9c3fcd2c29699f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 53780c5c0e8eb1d3f3929f4bb3b42f688decfafddd0c1e38ad2920b2f9e5ec690cbd656255d3d9d4f44e10bcc73b3868ce203d201e54f212360d0ffff613fca6
|
7
|
+
data.tar.gz: af84bb6b374d0bf2f4489361c9cf524b2670511369eefe0ec615f3d5b7bf0fde5950ac17466d5659fde59192e68be0f28bc531214b19faf5a993d05a9f70d49b
|
@@ -122,6 +122,10 @@ module Traject
|
|
122
122
|
# to UTF8. These character references are oddly legal representations of UTF8 in
|
123
123
|
# MARC8. http://www.loc.gov/marc/specifications/speccharconversion.html#lossless
|
124
124
|
#
|
125
|
+
# Note HorizonReader will also remove control chars from output (except for ones
|
126
|
+
# with legal meaning in binary MARC) -- these are errors in Horizon db which mean
|
127
|
+
# nothing, are illegal in MARC binary serialization, and can mess things up.
|
128
|
+
#
|
125
129
|
# == Misc
|
126
130
|
#
|
127
131
|
# [horizon.batch_size] Batch size to use for fetching item/copy info on each bib. Default 400.
|
@@ -253,14 +257,17 @@ module Traject
|
|
253
257
|
end
|
254
258
|
|
255
259
|
# Converts from Marc8 to UTF8 if neccesary.
|
256
|
-
#
|
260
|
+
#
|
261
|
+
# Also replaces escaped unicode codepoints using custom Horizon "<U+nnnn>" format
|
262
|
+
# Or standard MARC 'lossless encoding' "&#xHHHH;" format.
|
257
263
|
def convert_text!(text, error_handler)
|
258
264
|
text = AnselToUnicode.new(error_handler, true).convert(text) if convert_marc8_to_utf8?
|
259
265
|
|
260
266
|
# Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
|
261
267
|
# codepoint, turn it UTF8 for that codepoint
|
262
268
|
if settings["horizon.destination_encoding"] == "UTF8" &&
|
263
|
-
settings["horizon.codepoint_translate"].to_s == "true" ||
|
269
|
+
(settings["horizon.codepoint_translate"].to_s == "true" ||
|
270
|
+
settings["horizon.character_reference_translate"].to_s == "true")
|
264
271
|
|
265
272
|
regexp = if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.character_reference_translate"].to_s == "true"
|
266
273
|
# unicode codepoint in either HTML char reference form OR
|
@@ -278,6 +285,12 @@ module Traject
|
|
278
285
|
end
|
279
286
|
end
|
280
287
|
|
288
|
+
# eliminate illegal control chars. All ASCII less than 0x20
|
289
|
+
# _except_ for four legal ones (including MARC delimiters).
|
290
|
+
# http://www.loc.gov/marc/specifications/specchargeneral.html#controlfunction
|
291
|
+
# this is all bytes from 0x00 to 0x19 except for the allowed 1B, 1D, 1E, 1F.
|
292
|
+
text.gsub!(/[\x00-\x1A\x1C]/, '')
|
293
|
+
|
281
294
|
return text
|
282
295
|
end
|
283
296
|
|
@@ -21,6 +21,12 @@ describe "turning weird Horizon escape sequences into unicode" do
|
|
21
21
|
assert_equal "Weird \u200F but these aren't changed #x2000; ÈF etc.", converted
|
22
22
|
end
|
23
23
|
|
24
|
+
it "erases illegal control chars" do
|
25
|
+
converted = @reader.convert_text!("Some illegal control chars: '\x07' '\x02'; and a legal one: '\x1F'.", org.marc4j.ErrorHandler.new)
|
26
|
+
|
27
|
+
assert_equal "Some illegal control chars: '' ''; and a legal one: '\x1F'.", converted
|
28
|
+
end
|
29
|
+
|
24
30
|
end
|
25
31
|
|
26
32
|
end
|
data/traject_horizon.gemspec
CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.add_dependency "traject"
|
21
21
|
|
22
22
|
spec.add_dependency "marc-marc4j" # for marc4j jar files
|
23
|
+
spec.add_dependency "ensure_valid_encoding", ">= 0.5.3"
|
23
24
|
|
24
25
|
spec.add_development_dependency "bundler", "~> 1.3"
|
25
26
|
spec.add_development_dependency "rake"
|
metadata
CHANGED
@@ -1,97 +1,100 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject_horizon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jonathan Rochkind
|
9
|
-
autorequire:
|
8
|
+
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
11
|
+
date: 2013-11-11 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: traject
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
16
20
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
21
|
requirements:
|
19
|
-
- -
|
22
|
+
- - '>='
|
20
23
|
- !ruby/object:Gem::Version
|
21
24
|
version: '0'
|
22
|
-
type: :runtime
|
23
25
|
prerelease: false
|
26
|
+
type: :runtime
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: marc-marc4j
|
24
29
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
30
|
requirements:
|
27
|
-
- -
|
31
|
+
- - '>='
|
28
32
|
- !ruby/object:Gem::Version
|
29
33
|
version: '0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: marc-marc4j
|
32
34
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
35
|
requirements:
|
35
|
-
- -
|
36
|
+
- - '>='
|
36
37
|
- !ruby/object:Gem::Version
|
37
38
|
version: '0'
|
38
|
-
type: :runtime
|
39
39
|
prerelease: false
|
40
|
+
type: :runtime
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ensure_valid_encoding
|
40
43
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
44
|
requirements:
|
43
|
-
- -
|
45
|
+
- - '>='
|
44
46
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
|
-
name: bundler
|
47
|
+
version: 0.5.3
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
49
|
requirements:
|
51
|
-
- -
|
50
|
+
- - '>='
|
52
51
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
54
|
-
type: :development
|
52
|
+
version: 0.5.3
|
55
53
|
prerelease: false
|
54
|
+
type: :runtime
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
56
57
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '1.3'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: rake
|
64
62
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
63
|
requirements:
|
67
|
-
- -
|
64
|
+
- - ~>
|
68
65
|
- !ruby/object:Gem::Version
|
69
|
-
version: '
|
70
|
-
type: :development
|
66
|
+
version: '1.3'
|
71
67
|
prerelease: false
|
68
|
+
type: :development
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
72
71
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
72
|
requirements:
|
75
|
-
- -
|
73
|
+
- - '>='
|
76
74
|
- !ruby/object:Gem::Version
|
77
75
|
version: '0'
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
|
-
name: minitest
|
80
76
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
77
|
requirements:
|
83
|
-
- -
|
78
|
+
- - '>='
|
84
79
|
- !ruby/object:Gem::Version
|
85
80
|
version: '0'
|
86
|
-
type: :development
|
87
81
|
prerelease: false
|
82
|
+
type: :development
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: minitest
|
88
85
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
86
|
requirements:
|
91
|
-
- -
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
requirement: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - '>='
|
92
93
|
- !ruby/object:Gem::Version
|
93
94
|
version: '0'
|
94
|
-
|
95
|
+
prerelease: false
|
96
|
+
type: :development
|
97
|
+
description:
|
95
98
|
email:
|
96
99
|
- jonathan@dnil.net
|
97
100
|
executables: []
|
@@ -108,7 +111,7 @@ files:
|
|
108
111
|
- lib/traject_horizon.rb
|
109
112
|
- lib/traject_horizon/version.rb
|
110
113
|
- test/horizon_bib_auth_merge_test.rb
|
111
|
-
- test/
|
114
|
+
- test/horizon_convert_text_test.rb
|
112
115
|
- test/test_helper.rb
|
113
116
|
- traject_horizon.gemspec
|
114
117
|
- vendor/jtds/.DS_Store
|
@@ -116,30 +119,28 @@ files:
|
|
116
119
|
homepage: http://github.com/jrochkind/traject_horizon
|
117
120
|
licenses:
|
118
121
|
- MIT
|
119
|
-
|
122
|
+
metadata: {}
|
123
|
+
post_install_message:
|
120
124
|
rdoc_options: []
|
121
125
|
require_paths:
|
122
126
|
- lib
|
123
127
|
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
-
none: false
|
125
128
|
requirements:
|
126
|
-
- -
|
129
|
+
- - '>='
|
127
130
|
- !ruby/object:Gem::Version
|
128
131
|
version: '0'
|
129
132
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
-
none: false
|
131
133
|
requirements:
|
132
|
-
- -
|
134
|
+
- - '>='
|
133
135
|
- !ruby/object:Gem::Version
|
134
136
|
version: '0'
|
135
137
|
requirements: []
|
136
|
-
rubyforge_project:
|
137
|
-
rubygems_version: 1.
|
138
|
-
signing_key:
|
139
|
-
specification_version:
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 2.1.9
|
140
|
+
signing_key:
|
141
|
+
specification_version: 4
|
140
142
|
summary: Horizon ILS MARC Exporter, a plugin for the traject tool
|
141
143
|
test_files:
|
142
144
|
- test/horizon_bib_auth_merge_test.rb
|
143
|
-
- test/
|
145
|
+
- test/horizon_convert_text_test.rb
|
144
146
|
- test/test_helper.rb
|
145
|
-
has_rdoc:
|