data_kit 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3fdf7cb54e3359090864cc1f2021822a91b382ba
4
- data.tar.gz: 065fc5738cad3b21dfd62a79474b326ee2722bc4
3
+ metadata.gz: a079b89a65b3f20db63058e070d6b12d19db436f
4
+ data.tar.gz: 78868c7945412096389eca3d05e15a495d73f08b
5
5
  SHA512:
6
- metadata.gz: 4d0283c91997c819063e7f7b38aee1cba5a97f8f6c43acc1d321d50945a5728607e2bcf44ae994ba6d7731b46e0e016bc13d0ef765a49348c3f6a6987b6e1297
7
- data.tar.gz: 5f8b249b33841efcd8d38e4a899ce7a4465b6dcd08c05d77bcb4f42f82bc24f9f957cf319c5cf8c84f83e8c84c2bea854e93206d1fee901f229bc8c690c989c5
6
+ metadata.gz: f1a80f34c222c89ee9af223796c06d50087dfddd9def99d4d1b6edd4c650ec6fd0f1dc8770dbf5c8f6c62433a8987e4f157e945ac9d0b188507a6e51feb67562
7
+ data.tar.gz: 8f4c7dac994fe348c69eba8b3b2a62105c33f8f4565efdc2effd3a4f41b3582f34b0b8ed8d86b89fa9a1986e1f152147563967f43e32426d255c5fb910728ec5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- data_kit (0.0.7)
4
+ data_kit (0.0.8)
5
5
  rcsv
6
6
  timeliness
7
7
 
@@ -17,6 +17,7 @@ module DataKit
17
17
  def each_row(&block)
18
18
  handle.rewind
19
19
  Rcsv.parse(handle, :header => :skip, :columns => columns, :row_as_hash => true) do |row|
20
+ puts row.inspect
20
21
  yield row
21
22
  end
22
23
  end
@@ -39,7 +40,10 @@ module DataKit
39
40
  @handle = File.open(path)
40
41
  end
41
42
 
42
- @handle.set_encoding(Encoding.find("UTF-8"))
43
+ @handle.set_encoding(
44
+ Encoding.find("BINARY"), Encoding.find("UTF-8"),
45
+ {:invalid => :replace, :undef => :replace, :replace => ''}
46
+ )
43
47
  end
44
48
 
45
49
  def set_headers
@@ -32,15 +32,18 @@ class Rcsv
32
32
 
33
33
  initial_position = csv_data.pos
34
34
 
35
+ first_line = csv_data.each_line.first
36
+ field_count = first_line.split(raw_options[:col_sep]).length
37
+
35
38
  case options[:header]
36
39
  when :use
37
- header = self.raw_parse(StringIO.new(csv_data.each_line.first), raw_options).first
40
+ header = self.raw_parse(StringIO.new(first_line), raw_options).first
38
41
  raw_options[:offset_rows] += 1
39
42
  when :skip
40
- header = (0..(csv_data.each_line.first.split(raw_options[:col_sep]).count)).to_a
43
+ header = (0..field_count).to_a
41
44
  raw_options[:offset_rows] += 1
42
45
  when :none
43
- header = (0..(csv_data.each_line.first.split(raw_options[:col_sep]).count)).to_a
46
+ header = (0..field_count).to_a
44
47
  end
45
48
 
46
49
  raw_options[:row_as_hash] = options[:row_as_hash] # Setting after header parsing
@@ -1,3 +1,3 @@
1
1
  module DataKit
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
@@ -9,6 +9,10 @@ describe DataKit::CSV::Parser do
9
9
  data_path('carriage_returns.csv')
10
10
  }
11
11
 
12
+ let(:vc_companies_path) {
13
+ data_path('vc_backed_companies.csv')
14
+ }
15
+
12
16
  it "should initialize" do
13
17
  csv = DataKit::CSV::Parser.new(path)
14
18
 
@@ -47,4 +51,15 @@ describe DataKit::CSV::Parser do
47
51
 
48
52
  count.should == 10
49
53
  end
54
+
55
+ it "should parse CSVs with unknown encodings" do
56
+ csv = DataKit::CSV::Parser.new(File.open(vc_companies_path))
57
+
58
+ count = 0
59
+ csv.each_row do |row|
60
+ count += 1
61
+ end
62
+
63
+ count.should == 2
64
+ end
50
65
  end
@@ -0,0 +1 @@
1
+ Company Name,LOCATION,Still operating? Yes/No,Employees ,Phone,Email,LinkDin Profile,Website Url
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mode Analytics
@@ -155,6 +155,7 @@ files:
155
155
  - spec/fixtures/carriage_returns.csv
156
156
  - spec/fixtures/standard.csv
157
157
  - spec/fixtures/utf8.csv
158
+ - spec/fixtures/vc_backed_companies.csv
158
159
  - spec/spec_helper.rb
159
160
  homepage: http://www.modeanalytics.com/
160
161
  licenses:
@@ -197,4 +198,5 @@ test_files:
197
198
  - spec/fixtures/carriage_returns.csv
198
199
  - spec/fixtures/standard.csv
199
200
  - spec/fixtures/utf8.csv
201
+ - spec/fixtures/vc_backed_companies.csv
200
202
  - spec/spec_helper.rb