traject_horizon 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c445af099fb059edc31fcf8a82737032e97ea149
4
+ data.tar.gz: 3db072af3d1202b9235d4a0dbd9c3fcd2c29699f
5
+ SHA512:
6
+ metadata.gz: 53780c5c0e8eb1d3f3929f4bb3b42f688decfafddd0c1e38ad2920b2f9e5ec690cbd656255d3d9d4f44e10bcc73b3868ce203d201e54f212360d0ffff613fca6
7
+ data.tar.gz: af84bb6b374d0bf2f4489361c9cf524b2670511369eefe0ec615f3d5b7bf0fde5950ac17466d5659fde59192e68be0f28bc531214b19faf5a993d05a9f70d49b
@@ -122,6 +122,10 @@ module Traject
122
122
  # to UTF8. These character references are oddly legal representations of UTF8 in
123
123
  # MARC8. http://www.loc.gov/marc/specifications/speccharconversion.html#lossless
124
124
  #
125
+ # Note HorizonReader will also remove control chars from output (except for ones
126
+ # with legal meaning in binary MARC) -- these are errors in Horizon db which mean
127
+ # nothing, are illegal in MARC binary serialization, and can mess things up.
128
+ #
125
129
  # == Misc
126
130
  #
127
131
  # [horizon.batch_size] Batch size to use for fetching item/copy info on each bib. Default 400.
@@ -253,14 +257,17 @@ module Traject
253
257
  end
254
258
 
255
259
  # Converts from Marc8 to UTF8 if neccesary.
256
- # Also replaces horizon <U+nnnn> codes if needed, as well as weird Horizon HTML-escaped rlm
260
+ #
261
+ # Also replaces escaped unicode codepoints using custom Horizon "<U+nnnn>" format
262
+ # Or standard MARC 'lossless encoding' "&#xHHHH;" format.
257
263
  def convert_text!(text, error_handler)
258
264
  text = AnselToUnicode.new(error_handler, true).convert(text) if convert_marc8_to_utf8?
259
265
 
260
266
  # Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
261
267
  # codepoint, turn it UTF8 for that codepoint
262
268
  if settings["horizon.destination_encoding"] == "UTF8" &&
263
- settings["horizon.codepoint_translate"].to_s == "true" || settings["horizon.character_reference_translate"]
269
+ (settings["horizon.codepoint_translate"].to_s == "true" ||
270
+ settings["horizon.character_reference_translate"].to_s == "true")
264
271
 
265
272
  regexp = if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.character_reference_translate"].to_s == "true"
266
273
  # unicode codepoint in either HTML char reference form OR
@@ -278,6 +285,12 @@ module Traject
278
285
  end
279
286
  end
280
287
 
288
+ # eliminate illegal control chars. All ASCII less than 0x20
289
+ # _except_ for four legal ones (including MARC delimiters).
290
+ # http://www.loc.gov/marc/specifications/specchargeneral.html#controlfunction
291
+ # this is all bytes from 0x00 to 0x19 except for the allowed 1B, 1D, 1E, 1F.
292
+ text.gsub!(/[\x00-\x1A\x1C]/, '')
293
+
281
294
  return text
282
295
  end
283
296
 
@@ -1,3 +1,3 @@
1
1
  module TrajectHorizon
2
- VERSION = "1.1.0"
2
+ VERSION = "1.1.1"
3
3
  end
@@ -21,6 +21,12 @@ describe "turning weird Horizon escape sequences into unicode" do
21
21
  assert_equal "Weird \u200F but these aren't changed #x2000; &#200F etc.", converted
22
22
  end
23
23
 
24
+ it "erases illegal control chars" do
25
+ converted = @reader.convert_text!("Some illegal control chars: '\x07' '\x02'; and a legal one: '\x1F'.", org.marc4j.ErrorHandler.new)
26
+
27
+ assert_equal "Some illegal control chars: '' ''; and a legal one: '\x1F'.", converted
28
+ end
29
+
24
30
  end
25
31
 
26
32
  end
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.add_dependency "traject"
21
21
 
22
22
  spec.add_dependency "marc-marc4j" # for marc4j jar files
23
+ spec.add_dependency "ensure_valid_encoding", ">= 0.5.3"
23
24
 
24
25
  spec.add_development_dependency "bundler", "~> 1.3"
25
26
  spec.add_development_dependency "rake"
metadata CHANGED
@@ -1,97 +1,100 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject_horizon
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
5
- prerelease:
4
+ version: 1.1.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - Jonathan Rochkind
9
- autorequire:
8
+ autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-11-06 00:00:00.000000000 Z
11
+ date: 2013-11-11 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: traject
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
16
20
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
21
  requirements:
19
- - - ! '>='
22
+ - - '>='
20
23
  - !ruby/object:Gem::Version
21
24
  version: '0'
22
- type: :runtime
23
25
  prerelease: false
26
+ type: :runtime
27
+ - !ruby/object:Gem::Dependency
28
+ name: marc-marc4j
24
29
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
30
  requirements:
27
- - - ! '>='
31
+ - - '>='
28
32
  - !ruby/object:Gem::Version
29
33
  version: '0'
30
- - !ruby/object:Gem::Dependency
31
- name: marc-marc4j
32
34
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
35
  requirements:
35
- - - ! '>='
36
+ - - '>='
36
37
  - !ruby/object:Gem::Version
37
38
  version: '0'
38
- type: :runtime
39
39
  prerelease: false
40
+ type: :runtime
41
+ - !ruby/object:Gem::Dependency
42
+ name: ensure_valid_encoding
40
43
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
44
  requirements:
43
- - - ! '>='
45
+ - - '>='
44
46
  - !ruby/object:Gem::Version
45
- version: '0'
46
- - !ruby/object:Gem::Dependency
47
- name: bundler
47
+ version: 0.5.3
48
48
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
49
  requirements:
51
- - - ~>
50
+ - - '>='
52
51
  - !ruby/object:Gem::Version
53
- version: '1.3'
54
- type: :development
52
+ version: 0.5.3
55
53
  prerelease: false
54
+ type: :runtime
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
56
57
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
61
  version: '1.3'
62
- - !ruby/object:Gem::Dependency
63
- name: rake
64
62
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
63
  requirements:
67
- - - ! '>='
64
+ - - ~>
68
65
  - !ruby/object:Gem::Version
69
- version: '0'
70
- type: :development
66
+ version: '1.3'
71
67
  prerelease: false
68
+ type: :development
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
72
71
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
72
  requirements:
75
- - - ! '>='
73
+ - - '>='
76
74
  - !ruby/object:Gem::Version
77
75
  version: '0'
78
- - !ruby/object:Gem::Dependency
79
- name: minitest
80
76
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
77
  requirements:
83
- - - ! '>='
78
+ - - '>='
84
79
  - !ruby/object:Gem::Version
85
80
  version: '0'
86
- type: :development
87
81
  prerelease: false
82
+ type: :development
83
+ - !ruby/object:Gem::Dependency
84
+ name: minitest
88
85
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
86
  requirements:
91
- - - ! '>='
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '>='
92
93
  - !ruby/object:Gem::Version
93
94
  version: '0'
94
- description:
95
+ prerelease: false
96
+ type: :development
97
+ description:
95
98
  email:
96
99
  - jonathan@dnil.net
97
100
  executables: []
@@ -108,7 +111,7 @@ files:
108
111
  - lib/traject_horizon.rb
109
112
  - lib/traject_horizon/version.rb
110
113
  - test/horizon_bib_auth_merge_test.rb
111
- - test/horizon_unicode_escape_test.rb
114
+ - test/horizon_convert_text_test.rb
112
115
  - test/test_helper.rb
113
116
  - traject_horizon.gemspec
114
117
  - vendor/jtds/.DS_Store
@@ -116,30 +119,28 @@ files:
116
119
  homepage: http://github.com/jrochkind/traject_horizon
117
120
  licenses:
118
121
  - MIT
119
- post_install_message:
122
+ metadata: {}
123
+ post_install_message:
120
124
  rdoc_options: []
121
125
  require_paths:
122
126
  - lib
123
127
  required_ruby_version: !ruby/object:Gem::Requirement
124
- none: false
125
128
  requirements:
126
- - - ! '>='
129
+ - - '>='
127
130
  - !ruby/object:Gem::Version
128
131
  version: '0'
129
132
  required_rubygems_version: !ruby/object:Gem::Requirement
130
- none: false
131
133
  requirements:
132
- - - ! '>='
134
+ - - '>='
133
135
  - !ruby/object:Gem::Version
134
136
  version: '0'
135
137
  requirements: []
136
- rubyforge_project:
137
- rubygems_version: 1.8.23
138
- signing_key:
139
- specification_version: 3
138
+ rubyforge_project:
139
+ rubygems_version: 2.1.9
140
+ signing_key:
141
+ specification_version: 4
140
142
  summary: Horizon ILS MARC Exporter, a plugin for the traject tool
141
143
  test_files:
142
144
  - test/horizon_bib_auth_merge_test.rb
143
- - test/horizon_unicode_escape_test.rb
145
+ - test/horizon_convert_text_test.rb
144
146
  - test/test_helper.rb
145
- has_rdoc: