charazard 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34b9d5367e647921915519d5a3a252f683ddf91b
4
- data.tar.gz: b762b59f2e9ddeeefb4ade4afe6511a6756d1074
3
+ metadata.gz: 78cecee8b3d4cbc404f29ca445fe056e50560d32
4
+ data.tar.gz: 28d6d6e0982f2250ace5d28bc3451b1d2b646d38
5
5
  SHA512:
6
- metadata.gz: 887e42a951e9fcb07146bcf9f7adc7dbde56f706dd6d964d32d345112bc27508a38d6210430218e2165ca785d07f1c5e37d840e74198938dc3a35c58f8b19a81
7
- data.tar.gz: dd537a7e4d2ae0d675f06731a3aac65e729a0b28f341d6c4adcc8ec60073ab312aefa605722b03b3b093d76e5470af79bf430dc3d38b5e683a24390399334e1e
6
+ metadata.gz: b2f8ff14f35fad23fe61330df3f892a0a429799bbf24e936d20b9b94697834009154754c687d33b622ec46316e59613b991dac1b6b3ec62064434f5f9cd6018e
7
+ data.tar.gz: 59d6e5d5f2cf07daacbf9acb9e2ad36a1266f4d551090c3a9cd92ba6bd1609b4c1d74fa33066dd4f8ee4b7ee1caaa2be1cca02a7966268f2804ef0d72b5bee92
data/README.markdown CHANGED
@@ -16,26 +16,15 @@ Charazard.fix_invalid_unicode_literals("\x93Smart quotes\x94 \xC3\x9Cber Unicode
16
16
 
17
17
  `Charazard.fix_invalid_unicode_literals` can be used in combination with
18
18
  [`filter_io`](https://github.com/jasoncodes/filter_io) to filter CSV streams.
19
- Here’s an example that handles UTF-8/ISO-8859-1 with mixed line endings:
19
+ Since this is such a common use case, Charazard includes a handy
20
+ [helper class](https://github.com/jasoncodes/charazard/blob/master/lib/charazard/io.rb):
20
21
 
21
22
  ``` ruby
22
- require 'filter_io'
23
- require 'charazard'
23
+ require 'charazard/io'
24
24
  require 'csv'
25
25
 
26
26
  File.open(filename, external_encoding: 'UTF-8') do |io|
27
- io = FilterIO.new(io) do |data, state|
28
- # fix invalid UTF-8 literals
29
- data = Charazard.fix_invalid_unicode_literals(data)
30
-
31
- # grab another chunk if the last character is a delimiter
32
- raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
33
- # normalise line endings to LF
34
- data = data.gsub /\r\n|\r|\n/, "\n"
35
-
36
- data
37
- end
38
-
27
+ io = Charazard::IO.new(io)
39
28
  CSV.parse(io, row_sep: "\n") do |row|
40
29
  p row
41
30
  end
data/charazard.gemspec CHANGED
@@ -21,5 +21,7 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_development_dependency 'bundler', '~> 1.6'
23
23
  spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'minitest'
25
+ spec.add_development_dependency 'filter_io'
24
26
  spec.add_development_dependency 'pry'
25
27
  end
@@ -0,0 +1,20 @@
1
+ require 'charazard'
2
+ require 'filter_io'
3
+
4
+ module Charazard
5
+ class IO < FilterIO
6
+ def initialize(io)
7
+ super do |data, state|
8
+ # fix invalid UTF-8 literals
9
+ data = Charazard.fix_invalid_unicode_literals(data)
10
+
11
+ # grab another chunk if the last character is a delimiter
12
+ raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
13
+ # normalise line endings to LF
14
+ data = data.gsub /\r\n|\r|\n/, "\n"
15
+
16
+ data
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,3 +1,3 @@
1
1
  module Charazard
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
@@ -0,0 +1,13 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'test_helper'
4
+ require 'charazard/io'
5
+ require 'csv'
6
+
7
+ describe Charazard::IO do
8
+ it 'converts mixed character encodings into valid UTF-8' do
9
+ src = StringIO.new "Name,Character\nEm dash,\x97\r\nSmart quotes,\x93Quoted String\x94\r"
10
+ dst = Charazard::IO.new(src)
11
+ assert_equal "Name,Character\nEm dash,—\nSmart quotes,“Quoted String”\n", dst.read
12
+ end
13
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charazard
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason Weathered
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-11 00:00:00.000000000 Z
11
+ date: 2014-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -52,6 +52,34 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: filter_io
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: pry
57
85
  requirement: !ruby/object:Gem::Requirement
@@ -80,7 +108,9 @@ files:
80
108
  - Rakefile
81
109
  - charazard.gemspec
82
110
  - lib/charazard.rb
111
+ - lib/charazard/io.rb
83
112
  - lib/charazard/version.rb
113
+ - test/charazard_io_test.rb
84
114
  - test/charazard_test.rb
85
115
  - test/test_helper.rb
86
116
  homepage: https://github.com/jasoncodes/charazard
@@ -108,5 +138,6 @@ signing_key:
108
138
  specification_version: 4
109
139
  summary: Cleans up bad character encodings with liberal application of fire.
110
140
  test_files:
141
+ - test/charazard_io_test.rb
111
142
  - test/charazard_test.rb
112
143
  - test/test_helper.rb