charazard 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34b9d5367e647921915519d5a3a252f683ddf91b
4
- data.tar.gz: b762b59f2e9ddeeefb4ade4afe6511a6756d1074
3
+ metadata.gz: 78cecee8b3d4cbc404f29ca445fe056e50560d32
4
+ data.tar.gz: 28d6d6e0982f2250ace5d28bc3451b1d2b646d38
5
5
  SHA512:
6
- metadata.gz: 887e42a951e9fcb07146bcf9f7adc7dbde56f706dd6d964d32d345112bc27508a38d6210430218e2165ca785d07f1c5e37d840e74198938dc3a35c58f8b19a81
7
- data.tar.gz: dd537a7e4d2ae0d675f06731a3aac65e729a0b28f341d6c4adcc8ec60073ab312aefa605722b03b3b093d76e5470af79bf430dc3d38b5e683a24390399334e1e
6
+ metadata.gz: b2f8ff14f35fad23fe61330df3f892a0a429799bbf24e936d20b9b94697834009154754c687d33b622ec46316e59613b991dac1b6b3ec62064434f5f9cd6018e
7
+ data.tar.gz: 59d6e5d5f2cf07daacbf9acb9e2ad36a1266f4d551090c3a9cd92ba6bd1609b4c1d74fa33066dd4f8ee4b7ee1caaa2be1cca02a7966268f2804ef0d72b5bee92
data/README.markdown CHANGED
@@ -16,26 +16,15 @@ Charazard.fix_invalid_unicode_literals("\x93Smart quotes\x94 \xC3\x9Cber Unicode
16
16
 
17
17
  `Charazard.fix_invalid_unicode_literals` can be used in combination with
18
18
  [`filter_io`](https://github.com/jasoncodes/filter_io) to filter CSV streams.
19
- Here’s an example that handles UTF-8/ISO-8859-1 with mixed line endings:
19
+ Since this is such a common use case, Charazard includes a handy
20
+ [helper class](https://github.com/jasoncodes/charazard/blob/master/lib/charazard/io.rb):
20
21
 
21
22
  ``` ruby
22
- require 'filter_io'
23
- require 'charazard'
23
+ require 'charazard/io'
24
24
  require 'csv'
25
25
 
26
26
  File.open(filename, external_encoding: 'UTF-8') do |io|
27
- io = FilterIO.new(io) do |data, state|
28
- # fix invalid UTF-8 literals
29
- data = Charazard.fix_invalid_unicode_literals(data)
30
-
31
- # grab another chunk if the last character is a delimiter
32
- raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
33
- # normalise line endings to LF
34
- data = data.gsub /\r\n|\r|\n/, "\n"
35
-
36
- data
37
- end
38
-
27
+ io = Charazard::IO.new(io)
39
28
  CSV.parse(io, row_sep: "\n") do |row|
40
29
  p row
41
30
  end
data/charazard.gemspec CHANGED
@@ -21,5 +21,7 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_development_dependency 'bundler', '~> 1.6'
23
23
  spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'minitest'
25
+ spec.add_development_dependency 'filter_io'
24
26
  spec.add_development_dependency 'pry'
25
27
  end
@@ -0,0 +1,20 @@
1
+ require 'charazard'
2
+ require 'filter_io'
3
+
4
+ module Charazard
5
+ class IO < FilterIO
6
+ def initialize(io)
7
+ super do |data, state|
8
+ # fix invalid UTF-8 literals
9
+ data = Charazard.fix_invalid_unicode_literals(data)
10
+
11
+ # grab another chunk if the last character is a delimiter
12
+ raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
13
+ # normalise line endings to LF
14
+ data = data.gsub /\r\n|\r|\n/, "\n"
15
+
16
+ data
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,3 +1,3 @@
1
1
  module Charazard
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
@@ -0,0 +1,13 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'test_helper'
4
+ require 'charazard/io'
5
+ require 'csv'
6
+
7
+ describe Charazard::IO do
8
+ it 'converts mixed character encodings into valid UTF-8' do
9
+ src = StringIO.new "Name,Character\nEm dash,\x97\r\nSmart quotes,\x93Quoted String\x94\r"
10
+ dst = Charazard::IO.new(src)
11
+ assert_equal "Name,Character\nEm dash,—\nSmart quotes,“Quoted String”\n", dst.read
12
+ end
13
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charazard
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason Weathered
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-11 00:00:00.000000000 Z
11
+ date: 2014-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -52,6 +52,34 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: filter_io
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: pry
57
85
  requirement: !ruby/object:Gem::Requirement
@@ -80,7 +108,9 @@ files:
80
108
  - Rakefile
81
109
  - charazard.gemspec
82
110
  - lib/charazard.rb
111
+ - lib/charazard/io.rb
83
112
  - lib/charazard/version.rb
113
+ - test/charazard_io_test.rb
84
114
  - test/charazard_test.rb
85
115
  - test/test_helper.rb
86
116
  homepage: https://github.com/jasoncodes/charazard
@@ -108,5 +138,6 @@ signing_key:
108
138
  specification_version: 4
109
139
  summary: Cleans up bad character encodings with liberal application of fire.
110
140
  test_files:
141
+ - test/charazard_io_test.rb
111
142
  - test/charazard_test.rb
112
143
  - test/test_helper.rb