traject_horizon 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -117,6 +117,11 @@ module Traject
117
117
  # codepoint escaping to actual UTF-8 bytes. Defaults to true. Will be ignored
118
118
  # unless horizon.destination_encoding is UTF8 though.
119
119
  #
120
+ # [horizon.character_reference_translate] Default true. Convert HTML/XML-style
121
+ # character references like "‏" to actual UTF-8 bytes, when converting
122
+ # to UTF8. These character references are oddly legal representations of UTF8 in
123
+ # MARC8. http://www.loc.gov/marc/specifications/speccharconversion.html#lossless
124
+ #
120
125
  # == Misc
121
126
  #
122
127
  # [horizon.batch_size] Batch size to use for fetching item/copy info on each bib. Default 400.
@@ -248,14 +253,27 @@ module Traject
248
253
  end
249
254
 
250
255
  # Converts from Marc8 to UTF8 if neccesary.
251
- # Also replaces horizon <U+nnnn> codes if needed.
256
+ # Also replaces horizon <U+nnnn> codes if needed, as well as weird Horizon HTML-escaped rlm
252
257
  def convert_text!(text, error_handler)
253
258
  text = AnselToUnicode.new(error_handler, true).convert(text) if convert_marc8_to_utf8?
254
259
 
255
260
  # Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
256
261
  # codepoint, turn it UTF8 for that codepoint
257
- if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.destination_encoding"] == "UTF8"
258
- text.gsub!(/\<U\+([0-9A-Fa-f]{4})\>/) do
262
+ if settings["horizon.destination_encoding"] == "UTF8" &&
263
+ settings["horizon.codepoint_translate"].to_s == "true" || settings["horizon.character_reference_translate"]
264
+
265
+ regexp = if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.character_reference_translate"].to_s == "true"
266
+ # unicode codepoint in either HTML char reference form OR
267
+ # weird horizon form.
268
+ /(?:\<U\+|&#x)([0-9A-Fa-f]{4})(?:\>|;)/
269
+ elsif settings["horizon.codepoint_translate"].to_s == "true"
270
+ # just weird horizon form
271
+ /\<U\+([0-9A-Fa-f]{4})\>/
272
+ else # just character references
273
+ /&#x([0-9A-Fa-f]{4});/
274
+ end
275
+
276
+ text.gsub!(regexp) do
259
277
  [$1.hex].pack("U")
260
278
  end
261
279
  end
@@ -682,6 +700,7 @@ module Traject
682
700
  "horizon.source_encoding" => "MARC8",
683
701
  "horizon.destination_encoding" => "UTF8",
684
702
  "horizon.codepoint_translate" => true,
703
+ "horizon.character_reference_translate" => true,
685
704
 
686
705
  "horizon.item_tag" => "991",
687
706
  # Crazy isnull() in the call_type join to join to call_type directly on item
@@ -1,3 +1,3 @@
1
1
  module TrajectHorizon
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -12,8 +12,13 @@ describe "turning weird Horizon escape sequences into unicode" do
12
12
  end
13
13
 
14
14
  it "converts" do
15
- converted = @reader.convert_text!("A hangul character: <U+1112>, okay<!>", org.marc4j.ErrorHandler.new)
16
- assert_equal "A hangul character: ᄒ, okay<!>", converted
15
+ converted = @reader.convert_text!("A hangul character: <U+1112>, okay<!> U+1000>", org.marc4j.ErrorHandler.new)
16
+ assert_equal "A hangul character: ᄒ, okay<!> U+1000>", converted
17
+ end
18
+
19
+ it "converts rlm" do
20
+ converted = @reader.convert_text!("Weird &#x200F; but these aren't changed #x2000; &#200F etc.", org.marc4j.ErrorHandler.new)
21
+ assert_equal "Weird \u200F but these aren't changed #x2000; &#200F etc.", converted
17
22
  end
18
23
 
19
24
  end
metadata CHANGED
@@ -1,86 +1,97 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject_horizon
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Jonathan Rochkind
8
- autorequire:
9
+ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-10-21 00:00:00.000000000 Z
12
+ date: 2013-11-06 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: traject
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '>='
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
21
18
  requirements:
22
- - - '>='
19
+ - - ! '>='
23
20
  - !ruby/object:Gem::Version
24
21
  version: '0'
25
- prerelease: false
26
22
  type: :runtime
27
- - !ruby/object:Gem::Dependency
28
- name: marc-marc4j
23
+ prerelease: false
29
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
30
26
  requirements:
31
- - - '>='
27
+ - - ! '>='
32
28
  - !ruby/object:Gem::Version
33
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: marc-marc4j
34
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
35
34
  requirements:
36
- - - '>='
35
+ - - ! '>='
37
36
  - !ruby/object:Gem::Version
38
37
  version: '0'
39
- prerelease: false
40
38
  type: :runtime
41
- - !ruby/object:Gem::Dependency
42
- name: bundler
39
+ prerelease: false
43
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
44
42
  requirements:
45
- - - ~>
43
+ - - ! '>='
46
44
  - !ruby/object:Gem::Version
47
- version: '1.3'
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
48
  requirement: !ruby/object:Gem::Requirement
49
+ none: false
49
50
  requirements:
50
51
  - - ~>
51
52
  - !ruby/object:Gem::Version
52
53
  version: '1.3'
53
- prerelease: false
54
54
  type: :development
55
- - !ruby/object:Gem::Dependency
56
- name: rake
55
+ prerelease: false
57
56
  version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
58
  requirements:
59
- - - '>='
59
+ - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
62
64
  requirement: !ruby/object:Gem::Requirement
65
+ none: false
63
66
  requirements:
64
- - - '>='
67
+ - - ! '>='
65
68
  - !ruby/object:Gem::Version
66
69
  version: '0'
67
- prerelease: false
68
70
  type: :development
69
- - !ruby/object:Gem::Dependency
70
- name: minitest
71
+ prerelease: false
71
72
  version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
72
74
  requirements:
73
- - - '>='
75
+ - - ! '>='
74
76
  - !ruby/object:Gem::Version
75
77
  version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: minitest
76
80
  requirement: !ruby/object:Gem::Requirement
81
+ none: false
77
82
  requirements:
78
- - - '>='
83
+ - - ! '>='
79
84
  - !ruby/object:Gem::Version
80
85
  version: '0'
81
- prerelease: false
82
86
  type: :development
83
- description:
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description:
84
95
  email:
85
96
  - jonathan@dnil.net
86
97
  executables: []
@@ -105,28 +116,30 @@ files:
105
116
  homepage: http://github.com/jrochkind/traject_horizon
106
117
  licenses:
107
118
  - MIT
108
- metadata: {}
109
- post_install_message:
119
+ post_install_message:
110
120
  rdoc_options: []
111
121
  require_paths:
112
122
  - lib
113
123
  required_ruby_version: !ruby/object:Gem::Requirement
124
+ none: false
114
125
  requirements:
115
- - - '>='
126
+ - - ! '>='
116
127
  - !ruby/object:Gem::Version
117
128
  version: '0'
118
129
  required_rubygems_version: !ruby/object:Gem::Requirement
130
+ none: false
119
131
  requirements:
120
- - - '>='
132
+ - - ! '>='
121
133
  - !ruby/object:Gem::Version
122
134
  version: '0'
123
135
  requirements: []
124
- rubyforge_project:
125
- rubygems_version: 2.1.5
126
- signing_key:
127
- specification_version: 4
136
+ rubyforge_project:
137
+ rubygems_version: 1.8.23
138
+ signing_key:
139
+ specification_version: 3
128
140
  summary: Horizon ILS MARC Exporter, a plugin for the traject tool
129
141
  test_files:
130
142
  - test/horizon_bib_auth_merge_test.rb
131
143
  - test/horizon_unicode_escape_test.rb
132
144
  - test/test_helper.rb
145
+ has_rdoc:
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 2f8ac4a546bb69032c0da921fa9a749ef83a6d96
4
- data.tar.gz: 530cc90de2a1eb2ffb52eda7601bbc5076be3bf7
5
- SHA512:
6
- metadata.gz: d5e490f9706b94fa747b21c689255320b9cd96dfe6cd1b93c63d32952496e293a90f550d5a8449d87d4a05c39a8a59cd049399e004bd75f031a9a55048ffb718
7
- data.tar.gz: a766c0d3a2418017d0dcfe562737b4b647bb28e8e9d05dad9fc55be617e6de69c53dcbdd415bb153e4e56de1ea8bdd3cb0599728ef5dc729f16c23049a00764b