traject_horizon 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c445af099fb059edc31fcf8a82737032e97ea149
4
+ data.tar.gz: 3db072af3d1202b9235d4a0dbd9c3fcd2c29699f
5
+ SHA512:
6
+ metadata.gz: 53780c5c0e8eb1d3f3929f4bb3b42f688decfafddd0c1e38ad2920b2f9e5ec690cbd656255d3d9d4f44e10bcc73b3868ce203d201e54f212360d0ffff613fca6
7
+ data.tar.gz: af84bb6b374d0bf2f4489361c9cf524b2670511369eefe0ec615f3d5b7bf0fde5950ac17466d5659fde59192e68be0f28bc531214b19faf5a993d05a9f70d49b
@@ -122,6 +122,10 @@ module Traject
122
122
  # to UTF8. These character references are oddly legal representations of UTF8 in
123
123
  # MARC8. http://www.loc.gov/marc/specifications/speccharconversion.html#lossless
124
124
  #
125
+ # Note HorizonReader will also remove control chars from output (except for ones
126
+ # with legal meaning in binary MARC) -- these are errors in Horizon db which mean
127
+ # nothing, are illegal in MARC binary serialization, and can mess things up.
128
+ #
125
129
  # == Misc
126
130
  #
127
131
  # [horizon.batch_size] Batch size to use for fetching item/copy info on each bib. Default 400.
@@ -253,14 +257,17 @@ module Traject
253
257
  end
254
258
 
255
259
  # Converts from Marc8 to UTF8 if neccesary.
256
- # Also replaces horizon <U+nnnn> codes if needed, as well as weird Horizon HTML-escaped rlm
260
+ #
261
+ # Also replaces escaped unicode codepoints using custom Horizon "<U+nnnn>" format
262
+ # Or standard MARC 'lossless encoding' "&#xHHHH;" format.
257
263
  def convert_text!(text, error_handler)
258
264
  text = AnselToUnicode.new(error_handler, true).convert(text) if convert_marc8_to_utf8?
259
265
 
260
266
  # Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
261
267
  # codepoint, turn it UTF8 for that codepoint
262
268
  if settings["horizon.destination_encoding"] == "UTF8" &&
263
- settings["horizon.codepoint_translate"].to_s == "true" || settings["horizon.character_reference_translate"]
269
+ (settings["horizon.codepoint_translate"].to_s == "true" ||
270
+ settings["horizon.character_reference_translate"].to_s == "true")
264
271
 
265
272
  regexp = if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.character_reference_translate"].to_s == "true"
266
273
  # unicode codepoint in either HTML char reference form OR
@@ -278,6 +285,12 @@ module Traject
278
285
  end
279
286
  end
280
287
 
288
+ # eliminate illegal control chars. All ASCII less than 0x20
289
+ # _except_ for four legal ones (including MARC delimiters).
290
+ # http://www.loc.gov/marc/specifications/specchargeneral.html#controlfunction
291
+ # this is all bytes from 0x00 to 0x19 except for the allowed 1B, 1D, 1E, 1F.
292
+ text.gsub!(/[\x00-\x1A\x1C]/, '')
293
+
281
294
  return text
282
295
  end
283
296
 
@@ -1,3 +1,3 @@
1
1
  module TrajectHorizon
2
- VERSION = "1.1.0"
2
+ VERSION = "1.1.1"
3
3
  end
@@ -21,6 +21,12 @@ describe "turning weird Horizon escape sequences into unicode" do
21
21
  assert_equal "Weird \u200F but these aren't changed #x2000; &#200F etc.", converted
22
22
  end
23
23
 
24
+ it "erases illegal control chars" do
25
+ converted = @reader.convert_text!("Some illegal control chars: '\x07' '\x02'; and a legal one: '\x1F'.", org.marc4j.ErrorHandler.new)
26
+
27
+ assert_equal "Some illegal control chars: '' ''; and a legal one: '\x1F'.", converted
28
+ end
29
+
24
30
  end
25
31
 
26
32
  end
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.add_dependency "traject"
21
21
 
22
22
  spec.add_dependency "marc-marc4j" # for marc4j jar files
23
+ spec.add_dependency "ensure_valid_encoding", ">= 0.5.3"
23
24
 
24
25
  spec.add_development_dependency "bundler", "~> 1.3"
25
26
  spec.add_development_dependency "rake"
metadata CHANGED
@@ -1,97 +1,100 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject_horizon
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
5
- prerelease:
4
+ version: 1.1.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - Jonathan Rochkind
9
- autorequire:
8
+ autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-11-06 00:00:00.000000000 Z
11
+ date: 2013-11-11 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: traject
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
16
20
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
21
  requirements:
19
- - - ! '>='
22
+ - - '>='
20
23
  - !ruby/object:Gem::Version
21
24
  version: '0'
22
- type: :runtime
23
25
  prerelease: false
26
+ type: :runtime
27
+ - !ruby/object:Gem::Dependency
28
+ name: marc-marc4j
24
29
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
30
  requirements:
27
- - - ! '>='
31
+ - - '>='
28
32
  - !ruby/object:Gem::Version
29
33
  version: '0'
30
- - !ruby/object:Gem::Dependency
31
- name: marc-marc4j
32
34
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
35
  requirements:
35
- - - ! '>='
36
+ - - '>='
36
37
  - !ruby/object:Gem::Version
37
38
  version: '0'
38
- type: :runtime
39
39
  prerelease: false
40
+ type: :runtime
41
+ - !ruby/object:Gem::Dependency
42
+ name: ensure_valid_encoding
40
43
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
44
  requirements:
43
- - - ! '>='
45
+ - - '>='
44
46
  - !ruby/object:Gem::Version
45
- version: '0'
46
- - !ruby/object:Gem::Dependency
47
- name: bundler
47
+ version: 0.5.3
48
48
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
49
  requirements:
51
- - - ~>
50
+ - - '>='
52
51
  - !ruby/object:Gem::Version
53
- version: '1.3'
54
- type: :development
52
+ version: 0.5.3
55
53
  prerelease: false
54
+ type: :runtime
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
56
57
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
61
  version: '1.3'
62
- - !ruby/object:Gem::Dependency
63
- name: rake
64
62
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
63
  requirements:
67
- - - ! '>='
64
+ - - ~>
68
65
  - !ruby/object:Gem::Version
69
- version: '0'
70
- type: :development
66
+ version: '1.3'
71
67
  prerelease: false
68
+ type: :development
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
72
71
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
72
  requirements:
75
- - - ! '>='
73
+ - - '>='
76
74
  - !ruby/object:Gem::Version
77
75
  version: '0'
78
- - !ruby/object:Gem::Dependency
79
- name: minitest
80
76
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
77
  requirements:
83
- - - ! '>='
78
+ - - '>='
84
79
  - !ruby/object:Gem::Version
85
80
  version: '0'
86
- type: :development
87
81
  prerelease: false
82
+ type: :development
83
+ - !ruby/object:Gem::Dependency
84
+ name: minitest
88
85
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
86
  requirements:
91
- - - ! '>='
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '>='
92
93
  - !ruby/object:Gem::Version
93
94
  version: '0'
94
- description:
95
+ prerelease: false
96
+ type: :development
97
+ description:
95
98
  email:
96
99
  - jonathan@dnil.net
97
100
  executables: []
@@ -108,7 +111,7 @@ files:
108
111
  - lib/traject_horizon.rb
109
112
  - lib/traject_horizon/version.rb
110
113
  - test/horizon_bib_auth_merge_test.rb
111
- - test/horizon_unicode_escape_test.rb
114
+ - test/horizon_convert_text_test.rb
112
115
  - test/test_helper.rb
113
116
  - traject_horizon.gemspec
114
117
  - vendor/jtds/.DS_Store
@@ -116,30 +119,28 @@ files:
116
119
  homepage: http://github.com/jrochkind/traject_horizon
117
120
  licenses:
118
121
  - MIT
119
- post_install_message:
122
+ metadata: {}
123
+ post_install_message:
120
124
  rdoc_options: []
121
125
  require_paths:
122
126
  - lib
123
127
  required_ruby_version: !ruby/object:Gem::Requirement
124
- none: false
125
128
  requirements:
126
- - - ! '>='
129
+ - - '>='
127
130
  - !ruby/object:Gem::Version
128
131
  version: '0'
129
132
  required_rubygems_version: !ruby/object:Gem::Requirement
130
- none: false
131
133
  requirements:
132
- - - ! '>='
134
+ - - '>='
133
135
  - !ruby/object:Gem::Version
134
136
  version: '0'
135
137
  requirements: []
136
- rubyforge_project:
137
- rubygems_version: 1.8.23
138
- signing_key:
139
- specification_version: 3
138
+ rubyforge_project:
139
+ rubygems_version: 2.1.9
140
+ signing_key:
141
+ specification_version: 4
140
142
  summary: Horizon ILS MARC Exporter, a plugin for the traject tool
141
143
  test_files:
142
144
  - test/horizon_bib_auth_merge_test.rb
143
- - test/horizon_unicode_escape_test.rb
145
+ - test/horizon_convert_text_test.rb
144
146
  - test/test_helper.rb
145
- has_rdoc: