data_kit 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eed0243b61997177b645f36dd755c121dd36c177
4
- data.tar.gz: a9631c21ab9bf41820e8f4d28bec5bcf7a0c43b5
3
+ metadata.gz: dd32e91aed4ac35a8dbb104e577d9d7454025733
4
+ data.tar.gz: 31e8c8ef04ba50e94bffb8a29104362a14e56e08
5
5
  SHA512:
6
- metadata.gz: b8468fc7bda9be6701312139e1d2cd3ad5933c0d1ddd0d38f4e9e373ba1dd27b176e1a1928c4a1790cd8d8e1873fdbecae9adc86266478f438595d0f8d32f1a0
7
- data.tar.gz: 4f5dcdab6b2a42d05ba8a71bf38391b8109ba4adb6a46e5077f73dbfad796a9fd5706b5d07343a22e56aa438982bac5b27e889a964e4444b7327e3958a0340ed
6
+ metadata.gz: 77effe4f14b155f04c3f7d177a4ba0072cb02d558a2916bb66d8709484422e28e34153090b467a828dfc3ebf14d967d97f0b66cf5ba9a6c9ccbca14cc62952ab
7
+ data.tar.gz: e94f0b6657c0b7a105a6dd324df4ce8824c21d6acce11eee50a55973ef108873641e340597ef989cedda71043a10466e28f05540fa8a23ad6cb2176c8e2a002e
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- data_kit (0.0.2)
4
+ data_kit (0.0.4)
5
5
  rcsv
6
6
  timeliness
7
7
 
@@ -3,10 +3,6 @@ require 'rcsv'
3
3
  module DataKit
4
4
  module CSV
5
5
  class Parser
6
- # Encode streams from BINARY into UTF-8
7
- InternalEnc = Encoding.find("UTF-8")
8
- ExternalEnc = Encoding.find("BINARY")
9
-
10
6
  attr_reader :path
11
7
  attr_reader :handle
12
8
  attr_reader :headers
@@ -43,7 +39,7 @@ module DataKit
43
39
  @handle = File.open(path)
44
40
  end
45
41
 
46
- @handle.set_encoding(ExternalEnc, InternalEnc)
42
+ @handle.set_encoding(Encoding.find("UTF-8"))
47
43
  end
48
44
 
49
45
  def set_headers
@@ -20,6 +20,7 @@ module DataKit
20
20
  if random.rand <= sampling_rate
21
21
  analysis.increment_sample
22
22
  row.keys.each do |field_name|
23
+ row[field_name].force_encoding('UTF-8')
23
24
  analysis.insert(field_name.to_s, row[field_name])
24
25
  end
25
26
  end
@@ -1,3 +1,3 @@
1
1
  module DataKit
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -13,6 +13,10 @@ describe DataKit::CSV::Converter do
13
13
  DataKit::CSV::Parser.new(data_path('standard.csv'))
14
14
  }
15
15
 
16
+ let(:utf8csv) {
17
+ DataKit::CSV::Parser.new(data_path('utf8.csv'))
18
+ }
19
+
16
20
  it "should initialize and execute" do
17
21
  analysis = DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => 1)
18
22
  converter = DataKit::CSV::Converter.new(csv, analysis, target)
@@ -24,7 +28,7 @@ describe DataKit::CSV::Converter do
24
28
  row_count.should == 11
25
29
  end
26
30
 
27
- it "should convert using the convience method" do
31
+ it "should convert using the convenience method" do
28
32
  analysis = DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => 1)
29
33
  converter = DataKit::CSV::Converter.convert(csv, analysis, target)
30
34
 
@@ -32,4 +36,15 @@ describe DataKit::CSV::Converter do
32
36
  CSV.open(target).each { |row| row_count += 1 }
33
37
  row_count.should == 11
34
38
  end
39
+
40
+ it "should convert rows with utf8 characters" do
41
+ analysis = DataKit::CSV::SchemaAnalyzer.analyze(utf8csv, :sampling_rate => 1)
42
+ converter = DataKit::CSV::Converter.new(csv, analysis, target)
43
+
44
+ converter.execute
45
+
46
+ row_count = 0
47
+ CSV.open(target).each { |row| row_count += 1 }
48
+ row_count.should == 11
49
+ end
35
50
  end
@@ -0,0 +1,100 @@
1
+ id,first_name,last_name,email,country,ip_address
2
+ 1,Juan,Powell,jpowell@gabtune.mil,Belize,174.115.161.126
3
+ 2,Elizabeth,Thomas,ethomas@vidoo.biz,San Marino,237.239.51.102
4
+ 3,Andrea,Nelson,anelson@yakijo.org,Macedonia,5.158.57.111
5
+ 4,Matthew,Jones,mjones@photofeed.com,Chad,183.112.197.55
6
+ 5,Patrick,Wright,pwright@oyoyo.edu,Oman,164.108.233.48
7
+ 6,Sharon,Riley,sriley@rhybox.net,Czech Republic,166.230.65.220
8
+ 7,Albert,Arnold,aarnold@gigashots.net,Cook Islands,114.85.247.243
9
+ 8,John,Scott,jscott@yambee.name,Georgia,94.199.125.106
10
+ 9,Philip,Reynolds,preynolds@skipstorm.mil,Comoros,10.215.98.115
11
+ 10,Mark,Anderson,manderson@browsebug.info,Spain,42.191.208.43
12
+ 11,Paula,Gomez,pgomez@edgeify.net,Montserrat,19.144.148.158
13
+ 12,Antonio,Cook,acook@jaxspan.info,Saint Martin,120.195.173.63
14
+ 13,Terry,Duncan,tduncan@izio.name,Bangladesh,91.238.167.163
15
+ 14,Anthony,Russell,arussell@zoomcast.com,Albania,34.18.207.137
16
+ 15,Ruby,Lane,rlane@pixope.org,"Palestinian Territory, Occupied",195.14.7.172
17
+ 16,Robin,Spencer,rspencer@leenti.mil,Panama,184.7.185.174
18
+ 17,Melissa,Washington,mwashington@katz.com,"Korea, North",224.91.93.188
19
+ 18,Barbara,Nelson,bnelson@dablist.gov,Cambodia,192.247.213.41
20
+ 19,Janet,Mason,jmason@wikizz.mil,Guinea-Bissau,30.17.150.124
21
+ 20,Michelle,Green,mgreen@twiyo.biz,United States Virgin Islands,22.139.120.215
22
+ 21,David,Smith,dsmith@quinu.net,United States Virgin Islands,148.157.14.156
23
+ 22,Anna,Martin,amartin@zoonoodle.gov,Cameroon,123.80.1.226
24
+ 23,Judith,Holmes,jholmes@mudo.gov,Jamaica,110.154.217.110
25
+ 24,Carolyn,Riley,criley@meemm.gov,Ethiopia,246.252.115.105
26
+ 25,Carolyn,Lynch,clynch@voonyx.org,South Africa,133.159.182.203
27
+ 26,Kimberly,Torres,ktorres@bluezoom.biz,Armenia,3.201.239.96
28
+ 27,Paula,Andrews,pandrews@brainbox.com,Turks and Caicos Islands,161.2.154.206
29
+ 28,Joe,Ramos,jramos@kimia.info,Australia,189.102.57.100
30
+ 29,Shirley,Fowler,sfowler@ntag.gov,Mexico,144.132.244.41
31
+ 30,Louise,Morales,lmorales@flashspan.org,Mali,177.85.64.237
32
+ 31,Patrick,Vasquez,pvasquez@oozz.com,Jordan,52.197.37.130
33
+ 32,Gloria,Carpenter,gcarpenter@jabbercube.net,Seychelles,74.26.218.240
34
+ 33,Bruce,Anderson,banderson@yabox.com,Mali,2.51.248.44
35
+ 34,Richard,Hunter,rhunter@brightbean.name,Guam,57.38.118.134
36
+ 35,Keith,Kennedy,kkennedy@pixoboo.mil,Vietnam,253.240.67.38
37
+ 36,Charles,Ramirez,cramirez@meejo.org,Malawi,148.110.48.106
38
+ 37,Lawrence,Willis,lwillis@devpulse.net,Åland,226.179.18.113
39
+ 38,Arthur,Frazier,afrazier@jabbertype.gov,Jamaica,191.8.127.69
40
+ 39,Joseph,Jordan,jjordan@brainverse.edu,Belarus,35.133.77.128
41
+ 40,Steve,Robertson,srobertson@yata.gov,Guinea-Bissau,214.196.72.59
42
+ 41,Christine,Williams,cwilliams@dablist.name,Grenada,207.37.134.135
43
+ 42,Brandon,Payne,bpayne@oyondu.net,Seychelles,103.172.50.228
44
+ 43,Charles,Fernandez,cfernandez@zava.edu,Palau,156.40.43.254
45
+ 44,Lisa,James,ljames@dazzlesphere.org,Mayotte,88.191.96.192
46
+ 45,Helen,Stephens,hstephens@devbug.edu,Nauru,65.240.201.247
47
+ 46,Donna,Barnes,dbarnes@npath.edu,Guyana,53.79.21.160
48
+ 47,Willie,Berry,wberry@chatterbridge.edu,Ascension Island,27.173.100.171
49
+ 48,Jerry,Fernandez,jfernandez@realcube.edu,China,54.147.218.106
50
+ 49,Jessica,Stewart,jstewart@gabvine.biz,Djibouti,220.164.122.4
51
+ 50,Paul,Stevens,pstevens@skalith.com,Qatar,181.118.119.183
52
+ 51,Jeffrey,Frazier,jfrazier@oyoba.info,Saint Martin,247.167.19.66
53
+ 52,Emily,Bell,ebell@lazz.info,Venezuela,56.23.2.77
54
+ 53,Wayne,Banks,wbanks@dabshots.org,Denmark,210.58.240.119
55
+ 54,Helen,Stone,hstone@mita.gov,Belgium,25.191.200.47
56
+ 55,Laura,Stone,lstone@oloo.com,Yugoslavia,98.117.184.246
57
+ 56,Amanda,Ward,award@yotz.com,Estonia,197.191.111.153
58
+ 57,Carlos,Duncan,cduncan@einti.edu,Moldova,88.215.57.115
59
+ 58,Catherine,Holmes,cholmes@photojam.gov,Rwanda,171.27.114.159
60
+ 59,Janice,Hernandez,jhernandez@kwimbee.biz,Qatar,180.13.57.100
61
+ 60,Judith,Vasquez,jvasquez@janyx.biz,Niue,151.169.187.79
62
+ 61,Dennis,Henderson,dhenderson@browsedrive.gov,Mozambique,220.48.212.254
63
+ 62,Jeremy,Ford,jford@meezzy.biz,Sudan,248.135.67.205
64
+ 63,Dennis,Hunter,dhunter@rooxo.net,Latvia,176.187.214.230
65
+ 64,Stephanie,Montgomery,smontgomery@babbleblab.net,France,137.214.192.40
66
+ 65,Henry,Olson,holson@tazz.info,Puerto Rico,176.207.8.142
67
+ 66,Jesse,Edwards,jedwards@dabfeed.mil,Argentina,147.16.35.23
68
+ 67,Randy,Peterson,rpeterson@wikizz.biz,Antigua and Barbuda,179.119.53.133
69
+ 68,Joan,Holmes,jholmes@mydo.name,United States Virgin Islands,0.59.105.147
70
+ 69,Jane,Vasquez,jvasquez@edgepulse.name,Bosnia and Herzegovina,179.67.30.193
71
+ 70,Heather,Hernandez,hhernandez@feedfish.com,Falkland Islands (Malvinas),154.132.147.163
72
+ 71,Linda,Webb,lwebb@feedfire.name,Gabon,118.24.235.210
73
+ 72,Richard,Daniels,rdaniels@jetwire.org,Jordan,42.10.7.222
74
+ 73,Terry,Fernandez,tfernandez@twitterbeat.org,Cook Islands,173.98.100.195
75
+ 74,Virginia,Willis,vwillis@jabberbean.biz,Argentina,130.235.152.57
76
+ 75,Richard,Austin,raustin@livefish.com,Zambia,167.130.14.74
77
+ 76,Carol,Griffin,cgriffin@fatz.mil,Grenada,1.189.205.12
78
+ 77,Brandon,Gutierrez,bgutierrez@zoomcast.edu,Vatican City State (Holy See),37.247.25.100
79
+ 78,Samuel,Ellis,sellis@yodo.net,Pakistan,10.183.35.75
80
+ 79,Stephanie,Fields,sfields@podcat.gov,Finland,133.98.9.101
81
+ 80,Rebecca,Kim,rkim@brainlounge.mil,Guadeloupe,155.122.101.233
82
+ 81,Debra,Warren,dwarren@dazzlesphere.org,Cameroon,41.110.64.145
83
+ 82,Brenda,Henderson,bhenderson@jetpulse.mil,Swaziland,216.4.186.151
84
+ 83,Victor,Hughes,vhughes@vipe.name,Macedonia,144.105.122.32
85
+ 84,Gerald,Romero,gromero@quinu.mil,France,114.177.65.254
86
+ 85,Raymond,Gomez,rgomez@realbridge.edu,Egypt,26.255.191.232
87
+ 86,Anne,Chavez,achavez@roodel.biz,Croatia,193.55.29.211
88
+ 87,Patrick,Willis,pwillis@dynabox.com,Georgia,156.207.126.24
89
+ 88,Alice,Gardner,agardner@aivee.info,Solomon Islands,71.46.147.126
90
+ 89,Jane,Harvey,jharvey@cogibox.edu,Svalbard and Jan Mayen Islands,25.33.240.168
91
+ 90,Rachel,Brown,rbrown@rhyloo.com,Equatorial Guinea,220.113.103.62
92
+ 91,Brenda,Hudson,bhudson@wikizz.biz,Brazil,67.187.212.52
93
+ 92,Lisa,Johnston,ljohnston@devshare.info,Qatar,13.43.69.191
94
+ 93,Shawn,Gonzales,sgonzales@fatz.gov,Sri Lanka,162.236.113.63
95
+ 94,Christine,Stanley,cstanley@voonyx.biz,American Samoa,77.123.162.28
96
+ 95,Annie,Thomas,athomas@ntags.mil,Guadeloupe,91.226.131.120
97
+ 96,Wayne,Rodriguez,wrodriguez@topicshots.com,Antarctica,238.217.34.126
98
+ 97,Daniel,Hall,dhall@skippad.edu,Gibraltar,231.242.152.83
99
+ 98,Virginia,Watson,vwatson@chatterpoint.mil,Northern Mariana Islands,188.211.155.117
100
+ 99,Benjamin,Henderson,bhenderson@zoovu.com,Netherlands Antilles,192.19.128.146
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mode Analytics
@@ -154,6 +154,7 @@ files:
154
154
  - spec/dataset/schema_spec.rb
155
155
  - spec/fixtures/carriage_returns.csv
156
156
  - spec/fixtures/standard.csv
157
+ - spec/fixtures/utf8.csv
157
158
  - spec/spec_helper.rb
158
159
  homepage: http://www.modeanalytics.com/
159
160
  licenses:
@@ -195,5 +196,5 @@ test_files:
195
196
  - spec/dataset/schema_spec.rb
196
197
  - spec/fixtures/carriage_returns.csv
197
198
  - spec/fixtures/standard.csv
199
+ - spec/fixtures/utf8.csv
198
200
  - spec/spec_helper.rb
199
- has_rdoc: