data_kit 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3fdf7cb54e3359090864cc1f2021822a91b382ba
4
- data.tar.gz: 065fc5738cad3b21dfd62a79474b326ee2722bc4
3
+ metadata.gz: a079b89a65b3f20db63058e070d6b12d19db436f
4
+ data.tar.gz: 78868c7945412096389eca3d05e15a495d73f08b
5
5
  SHA512:
6
- metadata.gz: 4d0283c91997c819063e7f7b38aee1cba5a97f8f6c43acc1d321d50945a5728607e2bcf44ae994ba6d7731b46e0e016bc13d0ef765a49348c3f6a6987b6e1297
7
- data.tar.gz: 5f8b249b33841efcd8d38e4a899ce7a4465b6dcd08c05d77bcb4f42f82bc24f9f957cf319c5cf8c84f83e8c84c2bea854e93206d1fee901f229bc8c690c989c5
6
+ metadata.gz: f1a80f34c222c89ee9af223796c06d50087dfddd9def99d4d1b6edd4c650ec6fd0f1dc8770dbf5c8f6c62433a8987e4f157e945ac9d0b188507a6e51feb67562
7
+ data.tar.gz: 8f4c7dac994fe348c69eba8b3b2a62105c33f8f4565efdc2effd3a4f41b3582f34b0b8ed8d86b89fa9a1986e1f152147563967f43e32426d255c5fb910728ec5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- data_kit (0.0.7)
4
+ data_kit (0.0.8)
5
5
  rcsv
6
6
  timeliness
7
7
 
@@ -17,6 +17,7 @@ module DataKit
17
17
  def each_row(&block)
18
18
  handle.rewind
19
19
  Rcsv.parse(handle, :header => :skip, :columns => columns, :row_as_hash => true) do |row|
20
+ puts row.inspect
20
21
  yield row
21
22
  end
22
23
  end
@@ -39,7 +40,10 @@ module DataKit
39
40
  @handle = File.open(path)
40
41
  end
41
42
 
42
- @handle.set_encoding(Encoding.find("UTF-8"))
43
+ @handle.set_encoding(
44
+ Encoding.find("BINARY"), Encoding.find("UTF-8"),
45
+ {:invalid => :replace, :undef => :replace, :replace => ''}
46
+ )
43
47
  end
44
48
 
45
49
  def set_headers
@@ -32,15 +32,18 @@ class Rcsv
32
32
 
33
33
  initial_position = csv_data.pos
34
34
 
35
+ first_line = csv_data.each_line.first
36
+ field_count = first_line.split(raw_options[:col_sep]).length
37
+
35
38
  case options[:header]
36
39
  when :use
37
- header = self.raw_parse(StringIO.new(csv_data.each_line.first), raw_options).first
40
+ header = self.raw_parse(StringIO.new(first_line), raw_options).first
38
41
  raw_options[:offset_rows] += 1
39
42
  when :skip
40
- header = (0..(csv_data.each_line.first.split(raw_options[:col_sep]).count)).to_a
43
+ header = (0..field_count).to_a
41
44
  raw_options[:offset_rows] += 1
42
45
  when :none
43
- header = (0..(csv_data.each_line.first.split(raw_options[:col_sep]).count)).to_a
46
+ header = (0..field_count).to_a
44
47
  end
45
48
 
46
49
  raw_options[:row_as_hash] = options[:row_as_hash] # Setting after header parsing
@@ -1,3 +1,3 @@
1
1
  module DataKit
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
@@ -9,6 +9,10 @@ describe DataKit::CSV::Parser do
9
9
  data_path('carriage_returns.csv')
10
10
  }
11
11
 
12
+ let(:vc_companies_path) {
13
+ data_path('vc_backed_companies.csv')
14
+ }
15
+
12
16
  it "should initialize" do
13
17
  csv = DataKit::CSV::Parser.new(path)
14
18
 
@@ -47,4 +51,15 @@ describe DataKit::CSV::Parser do
47
51
 
48
52
  count.should == 10
49
53
  end
54
+
55
+ it "should parse CSVs with unknown encodings" do
56
+ csv = DataKit::CSV::Parser.new(File.open(vc_companies_path))
57
+
58
+ count = 0
59
+ csv.each_row do |row|
60
+ count += 1
61
+ end
62
+
63
+ count.should == 2
64
+ end
50
65
  end
@@ -0,0 +1 @@
1
+ Company Name,LOCATION,Still operating? Yes/No,Employees ,Phone,Email,LinkDin Profile,Website Url
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mode Analytics
@@ -155,6 +155,7 @@ files:
155
155
  - spec/fixtures/carriage_returns.csv
156
156
  - spec/fixtures/standard.csv
157
157
  - spec/fixtures/utf8.csv
158
+ - spec/fixtures/vc_backed_companies.csv
158
159
  - spec/spec_helper.rb
159
160
  homepage: http://www.modeanalytics.com/
160
161
  licenses:
@@ -197,4 +198,5 @@ test_files:
197
198
  - spec/fixtures/carriage_returns.csv
198
199
  - spec/fixtures/standard.csv
199
200
  - spec/fixtures/utf8.csv
201
+ - spec/fixtures/vc_backed_companies.csv
200
202
  - spec/spec_helper.rb