data_kit 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/data_kit/csv/parser.rb +5 -1
- data/lib/data_kit/patches/rcsv.rb +6 -3
- data/lib/data_kit/version.rb +1 -1
- data/spec/csv/parser_spec.rb +15 -0
- data/spec/fixtures/vc_backed_companies.csv +1 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a079b89a65b3f20db63058e070d6b12d19db436f
|
4
|
+
data.tar.gz: 78868c7945412096389eca3d05e15a495d73f08b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f1a80f34c222c89ee9af223796c06d50087dfddd9def99d4d1b6edd4c650ec6fd0f1dc8770dbf5c8f6c62433a8987e4f157e945ac9d0b188507a6e51feb67562
|
7
|
+
data.tar.gz: 8f4c7dac994fe348c69eba8b3b2a62105c33f8f4565efdc2effd3a4f41b3582f34b0b8ed8d86b89fa9a1986e1f152147563967f43e32426d255c5fb910728ec5
|
data/Gemfile.lock
CHANGED
data/lib/data_kit/csv/parser.rb
CHANGED
@@ -17,6 +17,7 @@ module DataKit
|
|
17
17
|
def each_row(&block)
|
18
18
|
handle.rewind
|
19
19
|
Rcsv.parse(handle, :header => :skip, :columns => columns, :row_as_hash => true) do |row|
|
20
|
+
puts row.inspect
|
20
21
|
yield row
|
21
22
|
end
|
22
23
|
end
|
@@ -39,7 +40,10 @@ module DataKit
|
|
39
40
|
@handle = File.open(path)
|
40
41
|
end
|
41
42
|
|
42
|
-
@handle.set_encoding(
|
43
|
+
@handle.set_encoding(
|
44
|
+
Encoding.find("BINARY"), Encoding.find("UTF-8"),
|
45
|
+
{:invalid => :replace, :undef => :replace, :replace => ''}
|
46
|
+
)
|
43
47
|
end
|
44
48
|
|
45
49
|
def set_headers
|
@@ -32,15 +32,18 @@ class Rcsv
|
|
32
32
|
|
33
33
|
initial_position = csv_data.pos
|
34
34
|
|
35
|
+
first_line = csv_data.each_line.first
|
36
|
+
field_count = first_line.split(raw_options[:col_sep]).length
|
37
|
+
|
35
38
|
case options[:header]
|
36
39
|
when :use
|
37
|
-
header = self.raw_parse(StringIO.new(
|
40
|
+
header = self.raw_parse(StringIO.new(first_line), raw_options).first
|
38
41
|
raw_options[:offset_rows] += 1
|
39
42
|
when :skip
|
40
|
-
header = (0..
|
43
|
+
header = (0..field_count).to_a
|
41
44
|
raw_options[:offset_rows] += 1
|
42
45
|
when :none
|
43
|
-
header = (0..
|
46
|
+
header = (0..field_count).to_a
|
44
47
|
end
|
45
48
|
|
46
49
|
raw_options[:row_as_hash] = options[:row_as_hash] # Setting after header parsing
|
data/lib/data_kit/version.rb
CHANGED
data/spec/csv/parser_spec.rb
CHANGED
@@ -9,6 +9,10 @@ describe DataKit::CSV::Parser do
|
|
9
9
|
data_path('carriage_returns.csv')
|
10
10
|
}
|
11
11
|
|
12
|
+
let(:vc_companies_path) {
|
13
|
+
data_path('vc_backed_companies.csv')
|
14
|
+
}
|
15
|
+
|
12
16
|
it "should initialize" do
|
13
17
|
csv = DataKit::CSV::Parser.new(path)
|
14
18
|
|
@@ -47,4 +51,15 @@ describe DataKit::CSV::Parser do
|
|
47
51
|
|
48
52
|
count.should == 10
|
49
53
|
end
|
54
|
+
|
55
|
+
it "should parse CSVs with unknown encodings" do
|
56
|
+
csv = DataKit::CSV::Parser.new(File.open(vc_companies_path))
|
57
|
+
|
58
|
+
count = 0
|
59
|
+
csv.each_row do |row|
|
60
|
+
count += 1
|
61
|
+
end
|
62
|
+
|
63
|
+
count.should == 2
|
64
|
+
end
|
50
65
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Company Name,LOCATION,Still operating? Yes/No,Employees ,Phone,Email,LinkDin Profile,Website Url
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
@@ -155,6 +155,7 @@ files:
|
|
155
155
|
- spec/fixtures/carriage_returns.csv
|
156
156
|
- spec/fixtures/standard.csv
|
157
157
|
- spec/fixtures/utf8.csv
|
158
|
+
- spec/fixtures/vc_backed_companies.csv
|
158
159
|
- spec/spec_helper.rb
|
159
160
|
homepage: http://www.modeanalytics.com/
|
160
161
|
licenses:
|
@@ -197,4 +198,5 @@ test_files:
|
|
197
198
|
- spec/fixtures/carriage_returns.csv
|
198
199
|
- spec/fixtures/standard.csv
|
199
200
|
- spec/fixtures/utf8.csv
|
201
|
+
- spec/fixtures/vc_backed_companies.csv
|
200
202
|
- spec/spec_helper.rb
|