traject_horizon 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/traject/horizon_reader.rb +22 -3
- data/lib/traject_horizon/version.rb +1 -1
- data/test/horizon_unicode_escape_test.rb +7 -2
- metadata +53 -40
- checksums.yaml +0 -7
@@ -117,6 +117,11 @@ module Traject
|
|
117
117
|
# codepoint escaping to actual UTF-8 bytes. Defaults to true. Will be ignored
|
118
118
|
# unless horizon.destination_encoding is UTF8 though.
|
119
119
|
#
|
120
|
+
# [horizon.character_reference_translate] Default true. Convert HTML/XML-style
|
121
|
+
# character references like "‏" to actual UTF-8 bytes, when converting
|
122
|
+
# to UTF8. These character references are oddly legal representations of UTF8 in
|
123
|
+
# MARC8. http://www.loc.gov/marc/specifications/speccharconversion.html#lossless
|
124
|
+
#
|
120
125
|
# == Misc
|
121
126
|
#
|
122
127
|
# [horizon.batch_size] Batch size to use for fetching item/copy info on each bib. Default 400.
|
@@ -248,14 +253,27 @@ module Traject
|
|
248
253
|
end
|
249
254
|
|
250
255
|
# Converts from Marc8 to UTF8 if neccesary.
|
251
|
-
# Also replaces horizon <U+nnnn> codes if needed
|
256
|
+
# Also replaces horizon <U+nnnn> codes if needed, as well as weird Horizon HTML-escaped rlm
|
252
257
|
def convert_text!(text, error_handler)
|
253
258
|
text = AnselToUnicode.new(error_handler, true).convert(text) if convert_marc8_to_utf8?
|
254
259
|
|
255
260
|
# Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
|
256
261
|
# codepoint, turn it UTF8 for that codepoint
|
257
|
-
if settings["horizon.
|
258
|
-
|
262
|
+
if settings["horizon.destination_encoding"] == "UTF8" &&
|
263
|
+
settings["horizon.codepoint_translate"].to_s == "true" || settings["horizon.character_reference_translate"]
|
264
|
+
|
265
|
+
regexp = if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.character_reference_translate"].to_s == "true"
|
266
|
+
# unicode codepoint in either HTML char reference form OR
|
267
|
+
# weird horizon form.
|
268
|
+
/(?:\<U\+|&#x)([0-9A-Fa-f]{4})(?:\>|;)/
|
269
|
+
elsif settings["horizon.codepoint_translate"].to_s == "true"
|
270
|
+
# just weird horizon form
|
271
|
+
/\<U\+([0-9A-Fa-f]{4})\>/
|
272
|
+
else # just character references
|
273
|
+
/&#x([0-9A-Fa-f]{4});/
|
274
|
+
end
|
275
|
+
|
276
|
+
text.gsub!(regexp) do
|
259
277
|
[$1.hex].pack("U")
|
260
278
|
end
|
261
279
|
end
|
@@ -682,6 +700,7 @@ module Traject
|
|
682
700
|
"horizon.source_encoding" => "MARC8",
|
683
701
|
"horizon.destination_encoding" => "UTF8",
|
684
702
|
"horizon.codepoint_translate" => true,
|
703
|
+
"horizon.character_reference_translate" => true,
|
685
704
|
|
686
705
|
"horizon.item_tag" => "991",
|
687
706
|
# Crazy isnull() in the call_type join to join to call_type directly on item
|
@@ -12,8 +12,13 @@ describe "turning weird Horizon escape sequences into unicode" do
|
|
12
12
|
end
|
13
13
|
|
14
14
|
it "converts" do
|
15
|
-
converted = @reader.convert_text!("A hangul character: <U+1112>, okay<!>", org.marc4j.ErrorHandler.new)
|
16
|
-
assert_equal "A hangul character: ᄒ, okay<!>", converted
|
15
|
+
converted = @reader.convert_text!("A hangul character: <U+1112>, okay<!> U+1000>", org.marc4j.ErrorHandler.new)
|
16
|
+
assert_equal "A hangul character: ᄒ, okay<!> U+1000>", converted
|
17
|
+
end
|
18
|
+
|
19
|
+
it "converts rlm" do
|
20
|
+
converted = @reader.convert_text!("Weird ‏ but these aren't changed #x2000; ÈF etc.", org.marc4j.ErrorHandler.new)
|
21
|
+
assert_equal "Weird \u200F but these aren't changed #x2000; ÈF etc.", converted
|
17
22
|
end
|
18
23
|
|
19
24
|
end
|
metadata
CHANGED
@@ -1,86 +1,97 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject_horizon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Jonathan Rochkind
|
8
|
-
autorequire:
|
9
|
+
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2013-
|
12
|
+
date: 2013-11-06 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: traject
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - '>='
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
21
18
|
requirements:
|
22
|
-
- - '>='
|
19
|
+
- - ! '>='
|
23
20
|
- !ruby/object:Gem::Version
|
24
21
|
version: '0'
|
25
|
-
prerelease: false
|
26
22
|
type: :runtime
|
27
|
-
|
28
|
-
name: marc-marc4j
|
23
|
+
prerelease: false
|
29
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
30
26
|
requirements:
|
31
|
-
- - '>='
|
27
|
+
- - ! '>='
|
32
28
|
- !ruby/object:Gem::Version
|
33
29
|
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: marc-marc4j
|
34
32
|
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
35
34
|
requirements:
|
36
|
-
- - '>='
|
35
|
+
- - ! '>='
|
37
36
|
- !ruby/object:Gem::Version
|
38
37
|
version: '0'
|
39
|
-
prerelease: false
|
40
38
|
type: :runtime
|
41
|
-
|
42
|
-
name: bundler
|
39
|
+
prerelease: false
|
43
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
44
42
|
requirements:
|
45
|
-
- -
|
43
|
+
- - ! '>='
|
46
44
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: bundler
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
49
50
|
requirements:
|
50
51
|
- - ~>
|
51
52
|
- !ruby/object:Gem::Version
|
52
53
|
version: '1.3'
|
53
|
-
prerelease: false
|
54
54
|
type: :development
|
55
|
-
|
56
|
-
name: rake
|
55
|
+
prerelease: false
|
57
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '1.3'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rake
|
62
64
|
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
63
66
|
requirements:
|
64
|
-
- - '>='
|
67
|
+
- - ! '>='
|
65
68
|
- !ruby/object:Gem::Version
|
66
69
|
version: '0'
|
67
|
-
prerelease: false
|
68
70
|
type: :development
|
69
|
-
|
70
|
-
name: minitest
|
71
|
+
prerelease: false
|
71
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
72
74
|
requirements:
|
73
|
-
- - '>='
|
75
|
+
- - ! '>='
|
74
76
|
- !ruby/object:Gem::Version
|
75
77
|
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: minitest
|
76
80
|
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
77
82
|
requirements:
|
78
|
-
- - '>='
|
83
|
+
- - ! '>='
|
79
84
|
- !ruby/object:Gem::Version
|
80
85
|
version: '0'
|
81
|
-
prerelease: false
|
82
86
|
type: :development
|
83
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
description:
|
84
95
|
email:
|
85
96
|
- jonathan@dnil.net
|
86
97
|
executables: []
|
@@ -105,28 +116,30 @@ files:
|
|
105
116
|
homepage: http://github.com/jrochkind/traject_horizon
|
106
117
|
licenses:
|
107
118
|
- MIT
|
108
|
-
|
109
|
-
post_install_message:
|
119
|
+
post_install_message:
|
110
120
|
rdoc_options: []
|
111
121
|
require_paths:
|
112
122
|
- lib
|
113
123
|
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
none: false
|
114
125
|
requirements:
|
115
|
-
- - '>='
|
126
|
+
- - ! '>='
|
116
127
|
- !ruby/object:Gem::Version
|
117
128
|
version: '0'
|
118
129
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
none: false
|
119
131
|
requirements:
|
120
|
-
- - '>='
|
132
|
+
- - ! '>='
|
121
133
|
- !ruby/object:Gem::Version
|
122
134
|
version: '0'
|
123
135
|
requirements: []
|
124
|
-
rubyforge_project:
|
125
|
-
rubygems_version:
|
126
|
-
signing_key:
|
127
|
-
specification_version:
|
136
|
+
rubyforge_project:
|
137
|
+
rubygems_version: 1.8.23
|
138
|
+
signing_key:
|
139
|
+
specification_version: 3
|
128
140
|
summary: Horizon ILS MARC Exporter, a plugin for the traject tool
|
129
141
|
test_files:
|
130
142
|
- test/horizon_bib_auth_merge_test.rb
|
131
143
|
- test/horizon_unicode_escape_test.rb
|
132
144
|
- test/test_helper.rb
|
145
|
+
has_rdoc:
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 2f8ac4a546bb69032c0da921fa9a749ef83a6d96
|
4
|
-
data.tar.gz: 530cc90de2a1eb2ffb52eda7601bbc5076be3bf7
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: d5e490f9706b94fa747b21c689255320b9cd96dfe6cd1b93c63d32952496e293a90f550d5a8449d87d4a05c39a8a59cd049399e004bd75f031a9a55048ffb718
|
7
|
-
data.tar.gz: a766c0d3a2418017d0dcfe562737b4b647bb28e8e9d05dad9fc55be617e6de69c53dcbdd415bb153e4e56de1ea8bdd3cb0599728ef5dc729f16c23049a00764b
|