traject_horizon 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/traject/horizon_reader.rb +22 -3
- data/lib/traject_horizon/version.rb +1 -1
- data/test/horizon_unicode_escape_test.rb +7 -2
- metadata +53 -40
- checksums.yaml +0 -7
@@ -117,6 +117,11 @@ module Traject
|
|
117
117
|
# codepoint escaping to actual UTF-8 bytes. Defaults to true. Will be ignored
|
118
118
|
# unless horizon.destination_encoding is UTF8 though.
|
119
119
|
#
|
120
|
+
# [horizon.character_reference_translate] Default true. Convert HTML/XML-style
|
121
|
+
# character references like "‏" to actual UTF-8 bytes, when converting
|
122
|
+
# to UTF8. These character references are oddly legal representations of UTF8 in
|
123
|
+
# MARC8. http://www.loc.gov/marc/specifications/speccharconversion.html#lossless
|
124
|
+
#
|
120
125
|
# == Misc
|
121
126
|
#
|
122
127
|
# [horizon.batch_size] Batch size to use for fetching item/copy info on each bib. Default 400.
|
@@ -248,14 +253,27 @@ module Traject
|
|
248
253
|
end
|
249
254
|
|
250
255
|
# Converts from Marc8 to UTF8 if neccesary.
|
251
|
-
# Also replaces horizon <U+nnnn> codes if needed
|
256
|
+
# Also replaces horizon <U+nnnn> codes if needed, as well as weird Horizon HTML-escaped rlm
|
252
257
|
def convert_text!(text, error_handler)
|
253
258
|
text = AnselToUnicode.new(error_handler, true).convert(text) if convert_marc8_to_utf8?
|
254
259
|
|
255
260
|
# Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
|
256
261
|
# codepoint, turn it UTF8 for that codepoint
|
257
|
-
if settings["horizon.
|
258
|
-
|
262
|
+
if settings["horizon.destination_encoding"] == "UTF8" &&
|
263
|
+
settings["horizon.codepoint_translate"].to_s == "true" || settings["horizon.character_reference_translate"]
|
264
|
+
|
265
|
+
regexp = if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.character_reference_translate"].to_s == "true"
|
266
|
+
# unicode codepoint in either HTML char reference form OR
|
267
|
+
# weird horizon form.
|
268
|
+
/(?:\<U\+|&#x)([0-9A-Fa-f]{4})(?:\>|;)/
|
269
|
+
elsif settings["horizon.codepoint_translate"].to_s == "true"
|
270
|
+
# just weird horizon form
|
271
|
+
/\<U\+([0-9A-Fa-f]{4})\>/
|
272
|
+
else # just character references
|
273
|
+
/&#x([0-9A-Fa-f]{4});/
|
274
|
+
end
|
275
|
+
|
276
|
+
text.gsub!(regexp) do
|
259
277
|
[$1.hex].pack("U")
|
260
278
|
end
|
261
279
|
end
|
@@ -682,6 +700,7 @@ module Traject
|
|
682
700
|
"horizon.source_encoding" => "MARC8",
|
683
701
|
"horizon.destination_encoding" => "UTF8",
|
684
702
|
"horizon.codepoint_translate" => true,
|
703
|
+
"horizon.character_reference_translate" => true,
|
685
704
|
|
686
705
|
"horizon.item_tag" => "991",
|
687
706
|
# Crazy isnull() in the call_type join to join to call_type directly on item
|
@@ -12,8 +12,13 @@ describe "turning weird Horizon escape sequences into unicode" do
|
|
12
12
|
end
|
13
13
|
|
14
14
|
it "converts" do
|
15
|
-
converted = @reader.convert_text!("A hangul character: <U+1112>, okay<!>", org.marc4j.ErrorHandler.new)
|
16
|
-
assert_equal "A hangul character: ᄒ, okay<!>", converted
|
15
|
+
converted = @reader.convert_text!("A hangul character: <U+1112>, okay<!> U+1000>", org.marc4j.ErrorHandler.new)
|
16
|
+
assert_equal "A hangul character: ᄒ, okay<!> U+1000>", converted
|
17
|
+
end
|
18
|
+
|
19
|
+
it "converts rlm" do
|
20
|
+
converted = @reader.convert_text!("Weird ‏ but these aren't changed #x2000; ÈF etc.", org.marc4j.ErrorHandler.new)
|
21
|
+
assert_equal "Weird \u200F but these aren't changed #x2000; ÈF etc.", converted
|
17
22
|
end
|
18
23
|
|
19
24
|
end
|
metadata
CHANGED
@@ -1,86 +1,97 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject_horizon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Jonathan Rochkind
|
8
|
-
autorequire:
|
9
|
+
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2013-
|
12
|
+
date: 2013-11-06 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: traject
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - '>='
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
21
18
|
requirements:
|
22
|
-
- - '>='
|
19
|
+
- - ! '>='
|
23
20
|
- !ruby/object:Gem::Version
|
24
21
|
version: '0'
|
25
|
-
prerelease: false
|
26
22
|
type: :runtime
|
27
|
-
|
28
|
-
name: marc-marc4j
|
23
|
+
prerelease: false
|
29
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
30
26
|
requirements:
|
31
|
-
- - '>='
|
27
|
+
- - ! '>='
|
32
28
|
- !ruby/object:Gem::Version
|
33
29
|
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: marc-marc4j
|
34
32
|
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
35
34
|
requirements:
|
36
|
-
- - '>='
|
35
|
+
- - ! '>='
|
37
36
|
- !ruby/object:Gem::Version
|
38
37
|
version: '0'
|
39
|
-
prerelease: false
|
40
38
|
type: :runtime
|
41
|
-
|
42
|
-
name: bundler
|
39
|
+
prerelease: false
|
43
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
44
42
|
requirements:
|
45
|
-
- -
|
43
|
+
- - ! '>='
|
46
44
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: bundler
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
49
50
|
requirements:
|
50
51
|
- - ~>
|
51
52
|
- !ruby/object:Gem::Version
|
52
53
|
version: '1.3'
|
53
|
-
prerelease: false
|
54
54
|
type: :development
|
55
|
-
|
56
|
-
name: rake
|
55
|
+
prerelease: false
|
57
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '1.3'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rake
|
62
64
|
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
63
66
|
requirements:
|
64
|
-
- - '>='
|
67
|
+
- - ! '>='
|
65
68
|
- !ruby/object:Gem::Version
|
66
69
|
version: '0'
|
67
|
-
prerelease: false
|
68
70
|
type: :development
|
69
|
-
|
70
|
-
name: minitest
|
71
|
+
prerelease: false
|
71
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
72
74
|
requirements:
|
73
|
-
- - '>='
|
75
|
+
- - ! '>='
|
74
76
|
- !ruby/object:Gem::Version
|
75
77
|
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: minitest
|
76
80
|
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
77
82
|
requirements:
|
78
|
-
- - '>='
|
83
|
+
- - ! '>='
|
79
84
|
- !ruby/object:Gem::Version
|
80
85
|
version: '0'
|
81
|
-
prerelease: false
|
82
86
|
type: :development
|
83
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
description:
|
84
95
|
email:
|
85
96
|
- jonathan@dnil.net
|
86
97
|
executables: []
|
@@ -105,28 +116,30 @@ files:
|
|
105
116
|
homepage: http://github.com/jrochkind/traject_horizon
|
106
117
|
licenses:
|
107
118
|
- MIT
|
108
|
-
|
109
|
-
post_install_message:
|
119
|
+
post_install_message:
|
110
120
|
rdoc_options: []
|
111
121
|
require_paths:
|
112
122
|
- lib
|
113
123
|
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
none: false
|
114
125
|
requirements:
|
115
|
-
- - '>='
|
126
|
+
- - ! '>='
|
116
127
|
- !ruby/object:Gem::Version
|
117
128
|
version: '0'
|
118
129
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
none: false
|
119
131
|
requirements:
|
120
|
-
- - '>='
|
132
|
+
- - ! '>='
|
121
133
|
- !ruby/object:Gem::Version
|
122
134
|
version: '0'
|
123
135
|
requirements: []
|
124
|
-
rubyforge_project:
|
125
|
-
rubygems_version:
|
126
|
-
signing_key:
|
127
|
-
specification_version:
|
136
|
+
rubyforge_project:
|
137
|
+
rubygems_version: 1.8.23
|
138
|
+
signing_key:
|
139
|
+
specification_version: 3
|
128
140
|
summary: Horizon ILS MARC Exporter, a plugin for the traject tool
|
129
141
|
test_files:
|
130
142
|
- test/horizon_bib_auth_merge_test.rb
|
131
143
|
- test/horizon_unicode_escape_test.rb
|
132
144
|
- test/test_helper.rb
|
145
|
+
has_rdoc:
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 2f8ac4a546bb69032c0da921fa9a749ef83a6d96
|
4
|
-
data.tar.gz: 530cc90de2a1eb2ffb52eda7601bbc5076be3bf7
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: d5e490f9706b94fa747b21c689255320b9cd96dfe6cd1b93c63d32952496e293a90f550d5a8449d87d4a05c39a8a59cd049399e004bd75f031a9a55048ffb718
|
7
|
-
data.tar.gz: a766c0d3a2418017d0dcfe562737b4b647bb28e8e9d05dad9fc55be617e6de69c53dcbdd415bb153e4e56de1ea8bdd3cb0599728ef5dc729f16c23049a00764b
|