data_kit 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/data_kit/csv/parser.rb +1 -5
- data/lib/data_kit/csv/schema_analyzer.rb +1 -0
- data/lib/data_kit/version.rb +1 -1
- data/spec/csv/converter_spec.rb +16 -1
- data/spec/fixtures/utf8.csv +100 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd32e91aed4ac35a8dbb104e577d9d7454025733
|
4
|
+
data.tar.gz: 31e8c8ef04ba50e94bffb8a29104362a14e56e08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77effe4f14b155f04c3f7d177a4ba0072cb02d558a2916bb66d8709484422e28e34153090b467a828dfc3ebf14d967d97f0b66cf5ba9a6c9ccbca14cc62952ab
|
7
|
+
data.tar.gz: e94f0b6657c0b7a105a6dd324df4ce8824c21d6acce11eee50a55973ef108873641e340597ef989cedda71043a10466e28f05540fa8a23ad6cb2176c8e2a002e
|
data/Gemfile.lock
CHANGED
data/lib/data_kit/csv/parser.rb
CHANGED
@@ -3,10 +3,6 @@ require 'rcsv'
|
|
3
3
|
module DataKit
|
4
4
|
module CSV
|
5
5
|
class Parser
|
6
|
-
# Encode streams from BINARY into UTF-8
|
7
|
-
InternalEnc = Encoding.find("UTF-8")
|
8
|
-
ExternalEnc = Encoding.find("BINARY")
|
9
|
-
|
10
6
|
attr_reader :path
|
11
7
|
attr_reader :handle
|
12
8
|
attr_reader :headers
|
@@ -43,7 +39,7 @@ module DataKit
|
|
43
39
|
@handle = File.open(path)
|
44
40
|
end
|
45
41
|
|
46
|
-
@handle.set_encoding(
|
42
|
+
@handle.set_encoding(Encoding.find("UTF-8"))
|
47
43
|
end
|
48
44
|
|
49
45
|
def set_headers
|
data/lib/data_kit/version.rb
CHANGED
data/spec/csv/converter_spec.rb
CHANGED
@@ -13,6 +13,10 @@ describe DataKit::CSV::Converter do
|
|
13
13
|
DataKit::CSV::Parser.new(data_path('standard.csv'))
|
14
14
|
}
|
15
15
|
|
16
|
+
let(:utf8csv) {
|
17
|
+
DataKit::CSV::Parser.new(data_path('utf8.csv'))
|
18
|
+
}
|
19
|
+
|
16
20
|
it "should initialize and execute" do
|
17
21
|
analysis = DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => 1)
|
18
22
|
converter = DataKit::CSV::Converter.new(csv, analysis, target)
|
@@ -24,7 +28,7 @@ describe DataKit::CSV::Converter do
|
|
24
28
|
row_count.should == 11
|
25
29
|
end
|
26
30
|
|
27
|
-
it "should convert using the
|
31
|
+
it "should convert using the convenience method" do
|
28
32
|
analysis = DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => 1)
|
29
33
|
converter = DataKit::CSV::Converter.convert(csv, analysis, target)
|
30
34
|
|
@@ -32,4 +36,15 @@ describe DataKit::CSV::Converter do
|
|
32
36
|
CSV.open(target).each { |row| row_count += 1 }
|
33
37
|
row_count.should == 11
|
34
38
|
end
|
39
|
+
|
40
|
+
it "should convert rows with utf8 characters" do
|
41
|
+
analysis = DataKit::CSV::SchemaAnalyzer.analyze(utf8csv, :sampling_rate => 1)
|
42
|
+
converter = DataKit::CSV::Converter.new(csv, analysis, target)
|
43
|
+
|
44
|
+
converter.execute
|
45
|
+
|
46
|
+
row_count = 0
|
47
|
+
CSV.open(target).each { |row| row_count += 1 }
|
48
|
+
row_count.should == 11
|
49
|
+
end
|
35
50
|
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
id,first_name,last_name,email,country,ip_address
|
2
|
+
1,Juan,Powell,jpowell@gabtune.mil,Belize,174.115.161.126
|
3
|
+
2,Elizabeth,Thomas,ethomas@vidoo.biz,San Marino,237.239.51.102
|
4
|
+
3,Andrea,Nelson,anelson@yakijo.org,Macedonia,5.158.57.111
|
5
|
+
4,Matthew,Jones,mjones@photofeed.com,Chad,183.112.197.55
|
6
|
+
5,Patrick,Wright,pwright@oyoyo.edu,Oman,164.108.233.48
|
7
|
+
6,Sharon,Riley,sriley@rhybox.net,Czech Republic,166.230.65.220
|
8
|
+
7,Albert,Arnold,aarnold@gigashots.net,Cook Islands,114.85.247.243
|
9
|
+
8,John,Scott,jscott@yambee.name,Georgia,94.199.125.106
|
10
|
+
9,Philip,Reynolds,preynolds@skipstorm.mil,Comoros,10.215.98.115
|
11
|
+
10,Mark,Anderson,manderson@browsebug.info,Spain,42.191.208.43
|
12
|
+
11,Paula,Gomez,pgomez@edgeify.net,Montserrat,19.144.148.158
|
13
|
+
12,Antonio,Cook,acook@jaxspan.info,Saint Martin,120.195.173.63
|
14
|
+
13,Terry,Duncan,tduncan@izio.name,Bangladesh,91.238.167.163
|
15
|
+
14,Anthony,Russell,arussell@zoomcast.com,Albania,34.18.207.137
|
16
|
+
15,Ruby,Lane,rlane@pixope.org,"Palestinian Territory, Occupied",195.14.7.172
|
17
|
+
16,Robin,Spencer,rspencer@leenti.mil,Panama,184.7.185.174
|
18
|
+
17,Melissa,Washington,mwashington@katz.com,"Korea, North",224.91.93.188
|
19
|
+
18,Barbara,Nelson,bnelson@dablist.gov,Cambodia,192.247.213.41
|
20
|
+
19,Janet,Mason,jmason@wikizz.mil,Guinea-Bissau,30.17.150.124
|
21
|
+
20,Michelle,Green,mgreen@twiyo.biz,United States Virgin Islands,22.139.120.215
|
22
|
+
21,David,Smith,dsmith@quinu.net,United States Virgin Islands,148.157.14.156
|
23
|
+
22,Anna,Martin,amartin@zoonoodle.gov,Cameroon,123.80.1.226
|
24
|
+
23,Judith,Holmes,jholmes@mudo.gov,Jamaica,110.154.217.110
|
25
|
+
24,Carolyn,Riley,criley@meemm.gov,Ethiopia,246.252.115.105
|
26
|
+
25,Carolyn,Lynch,clynch@voonyx.org,South Africa,133.159.182.203
|
27
|
+
26,Kimberly,Torres,ktorres@bluezoom.biz,Armenia,3.201.239.96
|
28
|
+
27,Paula,Andrews,pandrews@brainbox.com,Turks and Caicos Islands,161.2.154.206
|
29
|
+
28,Joe,Ramos,jramos@kimia.info,Australia,189.102.57.100
|
30
|
+
29,Shirley,Fowler,sfowler@ntag.gov,Mexico,144.132.244.41
|
31
|
+
30,Louise,Morales,lmorales@flashspan.org,Mali,177.85.64.237
|
32
|
+
31,Patrick,Vasquez,pvasquez@oozz.com,Jordan,52.197.37.130
|
33
|
+
32,Gloria,Carpenter,gcarpenter@jabbercube.net,Seychelles,74.26.218.240
|
34
|
+
33,Bruce,Anderson,banderson@yabox.com,Mali,2.51.248.44
|
35
|
+
34,Richard,Hunter,rhunter@brightbean.name,Guam,57.38.118.134
|
36
|
+
35,Keith,Kennedy,kkennedy@pixoboo.mil,Vietnam,253.240.67.38
|
37
|
+
36,Charles,Ramirez,cramirez@meejo.org,Malawi,148.110.48.106
|
38
|
+
37,Lawrence,Willis,lwillis@devpulse.net,Åland,226.179.18.113
|
39
|
+
38,Arthur,Frazier,afrazier@jabbertype.gov,Jamaica,191.8.127.69
|
40
|
+
39,Joseph,Jordan,jjordan@brainverse.edu,Belarus,35.133.77.128
|
41
|
+
40,Steve,Robertson,srobertson@yata.gov,Guinea-Bissau,214.196.72.59
|
42
|
+
41,Christine,Williams,cwilliams@dablist.name,Grenada,207.37.134.135
|
43
|
+
42,Brandon,Payne,bpayne@oyondu.net,Seychelles,103.172.50.228
|
44
|
+
43,Charles,Fernandez,cfernandez@zava.edu,Palau,156.40.43.254
|
45
|
+
44,Lisa,James,ljames@dazzlesphere.org,Mayotte,88.191.96.192
|
46
|
+
45,Helen,Stephens,hstephens@devbug.edu,Nauru,65.240.201.247
|
47
|
+
46,Donna,Barnes,dbarnes@npath.edu,Guyana,53.79.21.160
|
48
|
+
47,Willie,Berry,wberry@chatterbridge.edu,Ascension Island,27.173.100.171
|
49
|
+
48,Jerry,Fernandez,jfernandez@realcube.edu,China,54.147.218.106
|
50
|
+
49,Jessica,Stewart,jstewart@gabvine.biz,Djibouti,220.164.122.4
|
51
|
+
50,Paul,Stevens,pstevens@skalith.com,Qatar,181.118.119.183
|
52
|
+
51,Jeffrey,Frazier,jfrazier@oyoba.info,Saint Martin,247.167.19.66
|
53
|
+
52,Emily,Bell,ebell@lazz.info,Venezuela,56.23.2.77
|
54
|
+
53,Wayne,Banks,wbanks@dabshots.org,Denmark,210.58.240.119
|
55
|
+
54,Helen,Stone,hstone@mita.gov,Belgium,25.191.200.47
|
56
|
+
55,Laura,Stone,lstone@oloo.com,Yugoslavia,98.117.184.246
|
57
|
+
56,Amanda,Ward,award@yotz.com,Estonia,197.191.111.153
|
58
|
+
57,Carlos,Duncan,cduncan@einti.edu,Moldova,88.215.57.115
|
59
|
+
58,Catherine,Holmes,cholmes@photojam.gov,Rwanda,171.27.114.159
|
60
|
+
59,Janice,Hernandez,jhernandez@kwimbee.biz,Qatar,180.13.57.100
|
61
|
+
60,Judith,Vasquez,jvasquez@janyx.biz,Niue,151.169.187.79
|
62
|
+
61,Dennis,Henderson,dhenderson@browsedrive.gov,Mozambique,220.48.212.254
|
63
|
+
62,Jeremy,Ford,jford@meezzy.biz,Sudan,248.135.67.205
|
64
|
+
63,Dennis,Hunter,dhunter@rooxo.net,Latvia,176.187.214.230
|
65
|
+
64,Stephanie,Montgomery,smontgomery@babbleblab.net,France,137.214.192.40
|
66
|
+
65,Henry,Olson,holson@tazz.info,Puerto Rico,176.207.8.142
|
67
|
+
66,Jesse,Edwards,jedwards@dabfeed.mil,Argentina,147.16.35.23
|
68
|
+
67,Randy,Peterson,rpeterson@wikizz.biz,Antigua and Barbuda,179.119.53.133
|
69
|
+
68,Joan,Holmes,jholmes@mydo.name,United States Virgin Islands,0.59.105.147
|
70
|
+
69,Jane,Vasquez,jvasquez@edgepulse.name,Bosnia and Herzegovina,179.67.30.193
|
71
|
+
70,Heather,Hernandez,hhernandez@feedfish.com,Falkland Islands (Malvinas),154.132.147.163
|
72
|
+
71,Linda,Webb,lwebb@feedfire.name,Gabon,118.24.235.210
|
73
|
+
72,Richard,Daniels,rdaniels@jetwire.org,Jordan,42.10.7.222
|
74
|
+
73,Terry,Fernandez,tfernandez@twitterbeat.org,Cook Islands,173.98.100.195
|
75
|
+
74,Virginia,Willis,vwillis@jabberbean.biz,Argentina,130.235.152.57
|
76
|
+
75,Richard,Austin,raustin@livefish.com,Zambia,167.130.14.74
|
77
|
+
76,Carol,Griffin,cgriffin@fatz.mil,Grenada,1.189.205.12
|
78
|
+
77,Brandon,Gutierrez,bgutierrez@zoomcast.edu,Vatican City State (Holy See),37.247.25.100
|
79
|
+
78,Samuel,Ellis,sellis@yodo.net,Pakistan,10.183.35.75
|
80
|
+
79,Stephanie,Fields,sfields@podcat.gov,Finland,133.98.9.101
|
81
|
+
80,Rebecca,Kim,rkim@brainlounge.mil,Guadeloupe,155.122.101.233
|
82
|
+
81,Debra,Warren,dwarren@dazzlesphere.org,Cameroon,41.110.64.145
|
83
|
+
82,Brenda,Henderson,bhenderson@jetpulse.mil,Swaziland,216.4.186.151
|
84
|
+
83,Victor,Hughes,vhughes@vipe.name,Macedonia,144.105.122.32
|
85
|
+
84,Gerald,Romero,gromero@quinu.mil,France,114.177.65.254
|
86
|
+
85,Raymond,Gomez,rgomez@realbridge.edu,Egypt,26.255.191.232
|
87
|
+
86,Anne,Chavez,achavez@roodel.biz,Croatia,193.55.29.211
|
88
|
+
87,Patrick,Willis,pwillis@dynabox.com,Georgia,156.207.126.24
|
89
|
+
88,Alice,Gardner,agardner@aivee.info,Solomon Islands,71.46.147.126
|
90
|
+
89,Jane,Harvey,jharvey@cogibox.edu,Svalbard and Jan Mayen Islands,25.33.240.168
|
91
|
+
90,Rachel,Brown,rbrown@rhyloo.com,Equatorial Guinea,220.113.103.62
|
92
|
+
91,Brenda,Hudson,bhudson@wikizz.biz,Brazil,67.187.212.52
|
93
|
+
92,Lisa,Johnston,ljohnston@devshare.info,Qatar,13.43.69.191
|
94
|
+
93,Shawn,Gonzales,sgonzales@fatz.gov,Sri Lanka,162.236.113.63
|
95
|
+
94,Christine,Stanley,cstanley@voonyx.biz,American Samoa,77.123.162.28
|
96
|
+
95,Annie,Thomas,athomas@ntags.mil,Guadeloupe,91.226.131.120
|
97
|
+
96,Wayne,Rodriguez,wrodriguez@topicshots.com,Antarctica,238.217.34.126
|
98
|
+
97,Daniel,Hall,dhall@skippad.edu,Gibraltar,231.242.152.83
|
99
|
+
98,Virginia,Watson,vwatson@chatterpoint.mil,Northern Mariana Islands,188.211.155.117
|
100
|
+
99,Benjamin,Henderson,bhenderson@zoovu.com,Netherlands Antilles,192.19.128.146
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
@@ -154,6 +154,7 @@ files:
|
|
154
154
|
- spec/dataset/schema_spec.rb
|
155
155
|
- spec/fixtures/carriage_returns.csv
|
156
156
|
- spec/fixtures/standard.csv
|
157
|
+
- spec/fixtures/utf8.csv
|
157
158
|
- spec/spec_helper.rb
|
158
159
|
homepage: http://www.modeanalytics.com/
|
159
160
|
licenses:
|
@@ -195,5 +196,5 @@ test_files:
|
|
195
196
|
- spec/dataset/schema_spec.rb
|
196
197
|
- spec/fixtures/carriage_returns.csv
|
197
198
|
- spec/fixtures/standard.csv
|
199
|
+
- spec/fixtures/utf8.csv
|
198
200
|
- spec/spec_helper.rb
|
199
|
-
has_rdoc:
|