data_kit 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/data_kit/csv/parser.rb +1 -5
- data/lib/data_kit/csv/schema_analyzer.rb +1 -0
- data/lib/data_kit/version.rb +1 -1
- data/spec/csv/converter_spec.rb +16 -1
- data/spec/fixtures/utf8.csv +100 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd32e91aed4ac35a8dbb104e577d9d7454025733
|
4
|
+
data.tar.gz: 31e8c8ef04ba50e94bffb8a29104362a14e56e08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77effe4f14b155f04c3f7d177a4ba0072cb02d558a2916bb66d8709484422e28e34153090b467a828dfc3ebf14d967d97f0b66cf5ba9a6c9ccbca14cc62952ab
|
7
|
+
data.tar.gz: e94f0b6657c0b7a105a6dd324df4ce8824c21d6acce11eee50a55973ef108873641e340597ef989cedda71043a10466e28f05540fa8a23ad6cb2176c8e2a002e
|
data/Gemfile.lock
CHANGED
data/lib/data_kit/csv/parser.rb
CHANGED
@@ -3,10 +3,6 @@ require 'rcsv'
|
|
3
3
|
module DataKit
|
4
4
|
module CSV
|
5
5
|
class Parser
|
6
|
-
# Encode streams from BINARY into UTF-8
|
7
|
-
InternalEnc = Encoding.find("UTF-8")
|
8
|
-
ExternalEnc = Encoding.find("BINARY")
|
9
|
-
|
10
6
|
attr_reader :path
|
11
7
|
attr_reader :handle
|
12
8
|
attr_reader :headers
|
@@ -43,7 +39,7 @@ module DataKit
|
|
43
39
|
@handle = File.open(path)
|
44
40
|
end
|
45
41
|
|
46
|
-
@handle.set_encoding(
|
42
|
+
@handle.set_encoding(Encoding.find("UTF-8"))
|
47
43
|
end
|
48
44
|
|
49
45
|
def set_headers
|
data/lib/data_kit/version.rb
CHANGED
data/spec/csv/converter_spec.rb
CHANGED
@@ -13,6 +13,10 @@ describe DataKit::CSV::Converter do
|
|
13
13
|
DataKit::CSV::Parser.new(data_path('standard.csv'))
|
14
14
|
}
|
15
15
|
|
16
|
+
let(:utf8csv) {
|
17
|
+
DataKit::CSV::Parser.new(data_path('utf8.csv'))
|
18
|
+
}
|
19
|
+
|
16
20
|
it "should initialize and execute" do
|
17
21
|
analysis = DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => 1)
|
18
22
|
converter = DataKit::CSV::Converter.new(csv, analysis, target)
|
@@ -24,7 +28,7 @@ describe DataKit::CSV::Converter do
|
|
24
28
|
row_count.should == 11
|
25
29
|
end
|
26
30
|
|
27
|
-
it "should convert using the
|
31
|
+
it "should convert using the convenience method" do
|
28
32
|
analysis = DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => 1)
|
29
33
|
converter = DataKit::CSV::Converter.convert(csv, analysis, target)
|
30
34
|
|
@@ -32,4 +36,15 @@ describe DataKit::CSV::Converter do
|
|
32
36
|
CSV.open(target).each { |row| row_count += 1 }
|
33
37
|
row_count.should == 11
|
34
38
|
end
|
39
|
+
|
40
|
+
it "should convert rows with utf8 characters" do
|
41
|
+
analysis = DataKit::CSV::SchemaAnalyzer.analyze(utf8csv, :sampling_rate => 1)
|
42
|
+
converter = DataKit::CSV::Converter.new(csv, analysis, target)
|
43
|
+
|
44
|
+
converter.execute
|
45
|
+
|
46
|
+
row_count = 0
|
47
|
+
CSV.open(target).each { |row| row_count += 1 }
|
48
|
+
row_count.should == 11
|
49
|
+
end
|
35
50
|
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
id,first_name,last_name,email,country,ip_address
|
2
|
+
1,Juan,Powell,jpowell@gabtune.mil,Belize,174.115.161.126
|
3
|
+
2,Elizabeth,Thomas,ethomas@vidoo.biz,San Marino,237.239.51.102
|
4
|
+
3,Andrea,Nelson,anelson@yakijo.org,Macedonia,5.158.57.111
|
5
|
+
4,Matthew,Jones,mjones@photofeed.com,Chad,183.112.197.55
|
6
|
+
5,Patrick,Wright,pwright@oyoyo.edu,Oman,164.108.233.48
|
7
|
+
6,Sharon,Riley,sriley@rhybox.net,Czech Republic,166.230.65.220
|
8
|
+
7,Albert,Arnold,aarnold@gigashots.net,Cook Islands,114.85.247.243
|
9
|
+
8,John,Scott,jscott@yambee.name,Georgia,94.199.125.106
|
10
|
+
9,Philip,Reynolds,preynolds@skipstorm.mil,Comoros,10.215.98.115
|
11
|
+
10,Mark,Anderson,manderson@browsebug.info,Spain,42.191.208.43
|
12
|
+
11,Paula,Gomez,pgomez@edgeify.net,Montserrat,19.144.148.158
|
13
|
+
12,Antonio,Cook,acook@jaxspan.info,Saint Martin,120.195.173.63
|
14
|
+
13,Terry,Duncan,tduncan@izio.name,Bangladesh,91.238.167.163
|
15
|
+
14,Anthony,Russell,arussell@zoomcast.com,Albania,34.18.207.137
|
16
|
+
15,Ruby,Lane,rlane@pixope.org,"Palestinian Territory, Occupied",195.14.7.172
|
17
|
+
16,Robin,Spencer,rspencer@leenti.mil,Panama,184.7.185.174
|
18
|
+
17,Melissa,Washington,mwashington@katz.com,"Korea, North",224.91.93.188
|
19
|
+
18,Barbara,Nelson,bnelson@dablist.gov,Cambodia,192.247.213.41
|
20
|
+
19,Janet,Mason,jmason@wikizz.mil,Guinea-Bissau,30.17.150.124
|
21
|
+
20,Michelle,Green,mgreen@twiyo.biz,United States Virgin Islands,22.139.120.215
|
22
|
+
21,David,Smith,dsmith@quinu.net,United States Virgin Islands,148.157.14.156
|
23
|
+
22,Anna,Martin,amartin@zoonoodle.gov,Cameroon,123.80.1.226
|
24
|
+
23,Judith,Holmes,jholmes@mudo.gov,Jamaica,110.154.217.110
|
25
|
+
24,Carolyn,Riley,criley@meemm.gov,Ethiopia,246.252.115.105
|
26
|
+
25,Carolyn,Lynch,clynch@voonyx.org,South Africa,133.159.182.203
|
27
|
+
26,Kimberly,Torres,ktorres@bluezoom.biz,Armenia,3.201.239.96
|
28
|
+
27,Paula,Andrews,pandrews@brainbox.com,Turks and Caicos Islands,161.2.154.206
|
29
|
+
28,Joe,Ramos,jramos@kimia.info,Australia,189.102.57.100
|
30
|
+
29,Shirley,Fowler,sfowler@ntag.gov,Mexico,144.132.244.41
|
31
|
+
30,Louise,Morales,lmorales@flashspan.org,Mali,177.85.64.237
|
32
|
+
31,Patrick,Vasquez,pvasquez@oozz.com,Jordan,52.197.37.130
|
33
|
+
32,Gloria,Carpenter,gcarpenter@jabbercube.net,Seychelles,74.26.218.240
|
34
|
+
33,Bruce,Anderson,banderson@yabox.com,Mali,2.51.248.44
|
35
|
+
34,Richard,Hunter,rhunter@brightbean.name,Guam,57.38.118.134
|
36
|
+
35,Keith,Kennedy,kkennedy@pixoboo.mil,Vietnam,253.240.67.38
|
37
|
+
36,Charles,Ramirez,cramirez@meejo.org,Malawi,148.110.48.106
|
38
|
+
37,Lawrence,Willis,lwillis@devpulse.net,Åland,226.179.18.113
|
39
|
+
38,Arthur,Frazier,afrazier@jabbertype.gov,Jamaica,191.8.127.69
|
40
|
+
39,Joseph,Jordan,jjordan@brainverse.edu,Belarus,35.133.77.128
|
41
|
+
40,Steve,Robertson,srobertson@yata.gov,Guinea-Bissau,214.196.72.59
|
42
|
+
41,Christine,Williams,cwilliams@dablist.name,Grenada,207.37.134.135
|
43
|
+
42,Brandon,Payne,bpayne@oyondu.net,Seychelles,103.172.50.228
|
44
|
+
43,Charles,Fernandez,cfernandez@zava.edu,Palau,156.40.43.254
|
45
|
+
44,Lisa,James,ljames@dazzlesphere.org,Mayotte,88.191.96.192
|
46
|
+
45,Helen,Stephens,hstephens@devbug.edu,Nauru,65.240.201.247
|
47
|
+
46,Donna,Barnes,dbarnes@npath.edu,Guyana,53.79.21.160
|
48
|
+
47,Willie,Berry,wberry@chatterbridge.edu,Ascension Island,27.173.100.171
|
49
|
+
48,Jerry,Fernandez,jfernandez@realcube.edu,China,54.147.218.106
|
50
|
+
49,Jessica,Stewart,jstewart@gabvine.biz,Djibouti,220.164.122.4
|
51
|
+
50,Paul,Stevens,pstevens@skalith.com,Qatar,181.118.119.183
|
52
|
+
51,Jeffrey,Frazier,jfrazier@oyoba.info,Saint Martin,247.167.19.66
|
53
|
+
52,Emily,Bell,ebell@lazz.info,Venezuela,56.23.2.77
|
54
|
+
53,Wayne,Banks,wbanks@dabshots.org,Denmark,210.58.240.119
|
55
|
+
54,Helen,Stone,hstone@mita.gov,Belgium,25.191.200.47
|
56
|
+
55,Laura,Stone,lstone@oloo.com,Yugoslavia,98.117.184.246
|
57
|
+
56,Amanda,Ward,award@yotz.com,Estonia,197.191.111.153
|
58
|
+
57,Carlos,Duncan,cduncan@einti.edu,Moldova,88.215.57.115
|
59
|
+
58,Catherine,Holmes,cholmes@photojam.gov,Rwanda,171.27.114.159
|
60
|
+
59,Janice,Hernandez,jhernandez@kwimbee.biz,Qatar,180.13.57.100
|
61
|
+
60,Judith,Vasquez,jvasquez@janyx.biz,Niue,151.169.187.79
|
62
|
+
61,Dennis,Henderson,dhenderson@browsedrive.gov,Mozambique,220.48.212.254
|
63
|
+
62,Jeremy,Ford,jford@meezzy.biz,Sudan,248.135.67.205
|
64
|
+
63,Dennis,Hunter,dhunter@rooxo.net,Latvia,176.187.214.230
|
65
|
+
64,Stephanie,Montgomery,smontgomery@babbleblab.net,France,137.214.192.40
|
66
|
+
65,Henry,Olson,holson@tazz.info,Puerto Rico,176.207.8.142
|
67
|
+
66,Jesse,Edwards,jedwards@dabfeed.mil,Argentina,147.16.35.23
|
68
|
+
67,Randy,Peterson,rpeterson@wikizz.biz,Antigua and Barbuda,179.119.53.133
|
69
|
+
68,Joan,Holmes,jholmes@mydo.name,United States Virgin Islands,0.59.105.147
|
70
|
+
69,Jane,Vasquez,jvasquez@edgepulse.name,Bosnia and Herzegovina,179.67.30.193
|
71
|
+
70,Heather,Hernandez,hhernandez@feedfish.com,Falkland Islands (Malvinas),154.132.147.163
|
72
|
+
71,Linda,Webb,lwebb@feedfire.name,Gabon,118.24.235.210
|
73
|
+
72,Richard,Daniels,rdaniels@jetwire.org,Jordan,42.10.7.222
|
74
|
+
73,Terry,Fernandez,tfernandez@twitterbeat.org,Cook Islands,173.98.100.195
|
75
|
+
74,Virginia,Willis,vwillis@jabberbean.biz,Argentina,130.235.152.57
|
76
|
+
75,Richard,Austin,raustin@livefish.com,Zambia,167.130.14.74
|
77
|
+
76,Carol,Griffin,cgriffin@fatz.mil,Grenada,1.189.205.12
|
78
|
+
77,Brandon,Gutierrez,bgutierrez@zoomcast.edu,Vatican City State (Holy See),37.247.25.100
|
79
|
+
78,Samuel,Ellis,sellis@yodo.net,Pakistan,10.183.35.75
|
80
|
+
79,Stephanie,Fields,sfields@podcat.gov,Finland,133.98.9.101
|
81
|
+
80,Rebecca,Kim,rkim@brainlounge.mil,Guadeloupe,155.122.101.233
|
82
|
+
81,Debra,Warren,dwarren@dazzlesphere.org,Cameroon,41.110.64.145
|
83
|
+
82,Brenda,Henderson,bhenderson@jetpulse.mil,Swaziland,216.4.186.151
|
84
|
+
83,Victor,Hughes,vhughes@vipe.name,Macedonia,144.105.122.32
|
85
|
+
84,Gerald,Romero,gromero@quinu.mil,France,114.177.65.254
|
86
|
+
85,Raymond,Gomez,rgomez@realbridge.edu,Egypt,26.255.191.232
|
87
|
+
86,Anne,Chavez,achavez@roodel.biz,Croatia,193.55.29.211
|
88
|
+
87,Patrick,Willis,pwillis@dynabox.com,Georgia,156.207.126.24
|
89
|
+
88,Alice,Gardner,agardner@aivee.info,Solomon Islands,71.46.147.126
|
90
|
+
89,Jane,Harvey,jharvey@cogibox.edu,Svalbard and Jan Mayen Islands,25.33.240.168
|
91
|
+
90,Rachel,Brown,rbrown@rhyloo.com,Equatorial Guinea,220.113.103.62
|
92
|
+
91,Brenda,Hudson,bhudson@wikizz.biz,Brazil,67.187.212.52
|
93
|
+
92,Lisa,Johnston,ljohnston@devshare.info,Qatar,13.43.69.191
|
94
|
+
93,Shawn,Gonzales,sgonzales@fatz.gov,Sri Lanka,162.236.113.63
|
95
|
+
94,Christine,Stanley,cstanley@voonyx.biz,American Samoa,77.123.162.28
|
96
|
+
95,Annie,Thomas,athomas@ntags.mil,Guadeloupe,91.226.131.120
|
97
|
+
96,Wayne,Rodriguez,wrodriguez@topicshots.com,Antarctica,238.217.34.126
|
98
|
+
97,Daniel,Hall,dhall@skippad.edu,Gibraltar,231.242.152.83
|
99
|
+
98,Virginia,Watson,vwatson@chatterpoint.mil,Northern Mariana Islands,188.211.155.117
|
100
|
+
99,Benjamin,Henderson,bhenderson@zoovu.com,Netherlands Antilles,192.19.128.146
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
@@ -154,6 +154,7 @@ files:
|
|
154
154
|
- spec/dataset/schema_spec.rb
|
155
155
|
- spec/fixtures/carriage_returns.csv
|
156
156
|
- spec/fixtures/standard.csv
|
157
|
+
- spec/fixtures/utf8.csv
|
157
158
|
- spec/spec_helper.rb
|
158
159
|
homepage: http://www.modeanalytics.com/
|
159
160
|
licenses:
|
@@ -195,5 +196,5 @@ test_files:
|
|
195
196
|
- spec/dataset/schema_spec.rb
|
196
197
|
- spec/fixtures/carriage_returns.csv
|
197
198
|
- spec/fixtures/standard.csv
|
199
|
+
- spec/fixtures/utf8.csv
|
198
200
|
- spec/spec_helper.rb
|
199
|
-
has_rdoc:
|