traject_horizon 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -117,6 +117,11 @@ module Traject
117
117
  # codepoint escaping to actual UTF-8 bytes. Defaults to true. Will be ignored
118
118
  # unless horizon.destination_encoding is UTF8 though.
119
119
  #
120
+ # [horizon.character_reference_translate] Default true. Convert HTML/XML-style
121
+ # character references like "‏" to actual UTF-8 bytes, when converting
122
+ # to UTF8. These character references are oddly legal representations of UTF8 in
123
+ # MARC8. http://www.loc.gov/marc/specifications/speccharconversion.html#lossless
124
+ #
120
125
  # == Misc
121
126
  #
122
127
  # [horizon.batch_size] Batch size to use for fetching item/copy info on each bib. Default 400.
@@ -248,14 +253,27 @@ module Traject
248
253
  end
249
254
 
250
255
  # Converts from Marc8 to UTF8 if neccesary.
251
- # Also replaces horizon <U+nnnn> codes if needed.
256
+ # Also replaces horizon <U+nnnn> codes if needed, as well as weird Horizon HTML-escaped rlm
252
257
  def convert_text!(text, error_handler)
253
258
  text = AnselToUnicode.new(error_handler, true).convert(text) if convert_marc8_to_utf8?
254
259
 
255
260
  # Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
256
261
  # codepoint, turn it UTF8 for that codepoint
257
- if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.destination_encoding"] == "UTF8"
258
- text.gsub!(/\<U\+([0-9A-Fa-f]{4})\>/) do
262
+ if settings["horizon.destination_encoding"] == "UTF8" &&
263
+ settings["horizon.codepoint_translate"].to_s == "true" || settings["horizon.character_reference_translate"]
264
+
265
+ regexp = if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.character_reference_translate"].to_s == "true"
266
+ # unicode codepoint in either HTML char reference form OR
267
+ # weird horizon form.
268
+ /(?:\<U\+|&#x)([0-9A-Fa-f]{4})(?:\>|;)/
269
+ elsif settings["horizon.codepoint_translate"].to_s == "true"
270
+ # just weird horizon form
271
+ /\<U\+([0-9A-Fa-f]{4})\>/
272
+ else # just character references
273
+ /&#x([0-9A-Fa-f]{4});/
274
+ end
275
+
276
+ text.gsub!(regexp) do
259
277
  [$1.hex].pack("U")
260
278
  end
261
279
  end
@@ -682,6 +700,7 @@ module Traject
682
700
  "horizon.source_encoding" => "MARC8",
683
701
  "horizon.destination_encoding" => "UTF8",
684
702
  "horizon.codepoint_translate" => true,
703
+ "horizon.character_reference_translate" => true,
685
704
 
686
705
  "horizon.item_tag" => "991",
687
706
  # Crazy isnull() in the call_type join to join to call_type directly on item
@@ -1,3 +1,3 @@
1
1
  module TrajectHorizon
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -12,8 +12,13 @@ describe "turning weird Horizon escape sequences into unicode" do
12
12
  end
13
13
 
14
14
  it "converts" do
15
- converted = @reader.convert_text!("A hangul character: <U+1112>, okay<!>", org.marc4j.ErrorHandler.new)
16
- assert_equal "A hangul character: ᄒ, okay<!>", converted
15
+ converted = @reader.convert_text!("A hangul character: <U+1112>, okay<!> U+1000>", org.marc4j.ErrorHandler.new)
16
+ assert_equal "A hangul character: ᄒ, okay<!> U+1000>", converted
17
+ end
18
+
19
+ it "converts rlm" do
20
+ converted = @reader.convert_text!("Weird &#x200F; but these aren't changed #x2000; &#200F etc.", org.marc4j.ErrorHandler.new)
21
+ assert_equal "Weird \u200F but these aren't changed #x2000; &#200F etc.", converted
17
22
  end
18
23
 
19
24
  end
metadata CHANGED
@@ -1,86 +1,97 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject_horizon
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Jonathan Rochkind
8
- autorequire:
9
+ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-10-21 00:00:00.000000000 Z
12
+ date: 2013-11-06 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: traject
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '>='
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
21
18
  requirements:
22
- - - '>='
19
+ - - ! '>='
23
20
  - !ruby/object:Gem::Version
24
21
  version: '0'
25
- prerelease: false
26
22
  type: :runtime
27
- - !ruby/object:Gem::Dependency
28
- name: marc-marc4j
23
+ prerelease: false
29
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
30
26
  requirements:
31
- - - '>='
27
+ - - ! '>='
32
28
  - !ruby/object:Gem::Version
33
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: marc-marc4j
34
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
35
34
  requirements:
36
- - - '>='
35
+ - - ! '>='
37
36
  - !ruby/object:Gem::Version
38
37
  version: '0'
39
- prerelease: false
40
38
  type: :runtime
41
- - !ruby/object:Gem::Dependency
42
- name: bundler
39
+ prerelease: false
43
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
44
42
  requirements:
45
- - - ~>
43
+ - - ! '>='
46
44
  - !ruby/object:Gem::Version
47
- version: '1.3'
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
48
  requirement: !ruby/object:Gem::Requirement
49
+ none: false
49
50
  requirements:
50
51
  - - ~>
51
52
  - !ruby/object:Gem::Version
52
53
  version: '1.3'
53
- prerelease: false
54
54
  type: :development
55
- - !ruby/object:Gem::Dependency
56
- name: rake
55
+ prerelease: false
57
56
  version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
58
  requirements:
59
- - - '>='
59
+ - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
62
64
  requirement: !ruby/object:Gem::Requirement
65
+ none: false
63
66
  requirements:
64
- - - '>='
67
+ - - ! '>='
65
68
  - !ruby/object:Gem::Version
66
69
  version: '0'
67
- prerelease: false
68
70
  type: :development
69
- - !ruby/object:Gem::Dependency
70
- name: minitest
71
+ prerelease: false
71
72
  version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
72
74
  requirements:
73
- - - '>='
75
+ - - ! '>='
74
76
  - !ruby/object:Gem::Version
75
77
  version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: minitest
76
80
  requirement: !ruby/object:Gem::Requirement
81
+ none: false
77
82
  requirements:
78
- - - '>='
83
+ - - ! '>='
79
84
  - !ruby/object:Gem::Version
80
85
  version: '0'
81
- prerelease: false
82
86
  type: :development
83
- description:
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description:
84
95
  email:
85
96
  - jonathan@dnil.net
86
97
  executables: []
@@ -105,28 +116,30 @@ files:
105
116
  homepage: http://github.com/jrochkind/traject_horizon
106
117
  licenses:
107
118
  - MIT
108
- metadata: {}
109
- post_install_message:
119
+ post_install_message:
110
120
  rdoc_options: []
111
121
  require_paths:
112
122
  - lib
113
123
  required_ruby_version: !ruby/object:Gem::Requirement
124
+ none: false
114
125
  requirements:
115
- - - '>='
126
+ - - ! '>='
116
127
  - !ruby/object:Gem::Version
117
128
  version: '0'
118
129
  required_rubygems_version: !ruby/object:Gem::Requirement
130
+ none: false
119
131
  requirements:
120
- - - '>='
132
+ - - ! '>='
121
133
  - !ruby/object:Gem::Version
122
134
  version: '0'
123
135
  requirements: []
124
- rubyforge_project:
125
- rubygems_version: 2.1.5
126
- signing_key:
127
- specification_version: 4
136
+ rubyforge_project:
137
+ rubygems_version: 1.8.23
138
+ signing_key:
139
+ specification_version: 3
128
140
  summary: Horizon ILS MARC Exporter, a plugin for the traject tool
129
141
  test_files:
130
142
  - test/horizon_bib_auth_merge_test.rb
131
143
  - test/horizon_unicode_escape_test.rb
132
144
  - test/test_helper.rb
145
+ has_rdoc:
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 2f8ac4a546bb69032c0da921fa9a749ef83a6d96
4
- data.tar.gz: 530cc90de2a1eb2ffb52eda7601bbc5076be3bf7
5
- SHA512:
6
- metadata.gz: d5e490f9706b94fa747b21c689255320b9cd96dfe6cd1b93c63d32952496e293a90f550d5a8449d87d4a05c39a8a59cd049399e004bd75f031a9a55048ffb718
7
- data.tar.gz: a766c0d3a2418017d0dcfe562737b4b647bb28e8e9d05dad9fc55be617e6de69c53dcbdd415bb153e4e56de1ea8bdd3cb0599728ef5dc729f16c23049a00764b