data_kit 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/data_kit/csv/schema_analysis.rb +3 -3
- data/lib/data_kit/csv/schema_analyzer.rb +9 -2
- data/lib/data_kit/version.rb +1 -1
- data/spec/csv/schema_analyzer_spec.rb +30 -1
- metadata +1 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 11a12efb158e60cac6e71ac09e37799f811a65e4
|
4
|
+
data.tar.gz: 66ed506814c7273bdc980ada09049ebaabaa3a52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2f3eb974bde2f7bdbdcd34fdc4ea57f4ee64c4876768f436a2a5e54f79eba3254f26247b786bbacee869dea17b91de716950fab97ac8ceeb3843f02d51f8a52
|
7
|
+
data.tar.gz: e1f92f4116af3087938dfcc715605c7d805dd87b408f0dfa4c9e9494587c925451e0d749c528df5eaac400b063d54efbbf97fdb02e3f783981f02d09e33cbc3d
|
data/Gemfile.lock
CHANGED
@@ -15,10 +15,10 @@ module DataKit
|
|
15
15
|
|
16
16
|
@type_hints = {}
|
17
17
|
|
18
|
-
if options[:use_type_hints].nil? || options[:use_type_hints] ==
|
19
|
-
@use_type_hints = false
|
20
|
-
else
|
18
|
+
if options[:use_type_hints].nil? || options[:use_type_hints] == true
|
21
19
|
@use_type_hints = true
|
20
|
+
else
|
21
|
+
@use_type_hints = false
|
22
22
|
end
|
23
23
|
|
24
24
|
fields.each do |field_name|
|
@@ -4,16 +4,23 @@ module DataKit
|
|
4
4
|
attr_accessor :csv
|
5
5
|
attr_accessor :keys
|
6
6
|
attr_accessor :sampling_rate
|
7
|
-
|
7
|
+
attr_accessor :use_type_hints
|
8
|
+
|
8
9
|
def initialize(csv, options = {})
|
9
10
|
@csv = csv
|
10
11
|
@keys = options[:keys] || []
|
11
12
|
@sampling_rate = options[:sampling_rate] || 0.1
|
13
|
+
|
14
|
+
if options[:use_type_hints].nil? || options[:use_type_hints] == true
|
15
|
+
@use_type_hints = true
|
16
|
+
else
|
17
|
+
@use_type_hints = false
|
18
|
+
end
|
12
19
|
end
|
13
20
|
|
14
21
|
def execute
|
15
22
|
random = Random.new
|
16
|
-
analysis = SchemaAnalysis.new(csv.headers)
|
23
|
+
analysis = SchemaAnalysis.new(csv.headers, :use_type_hints => use_type_hints)
|
17
24
|
|
18
25
|
csv.each_row do |row|
|
19
26
|
analysis.increment_total
|
data/lib/data_kit/version.rb
CHANGED
@@ -19,6 +19,12 @@ describe DataKit::CSV::SchemaAnalyzer do
|
|
19
19
|
analyzer.csv.should == csv
|
20
20
|
analyzer.keys.should == []
|
21
21
|
analyzer.sampling_rate.should == 0.1
|
22
|
+
analyzer.use_type_hints.should == true
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should initialize with type hints turned off" do
|
26
|
+
analyzer = DataKit::CSV::SchemaAnalyzer.new(csv, :use_type_hints => false)
|
27
|
+
analyzer.use_type_hints.should == false
|
22
28
|
end
|
23
29
|
|
24
30
|
it "should initialize schema with an IO" do
|
@@ -29,7 +35,7 @@ describe DataKit::CSV::SchemaAnalyzer do
|
|
29
35
|
analyzer.sampling_rate.should == 0.1
|
30
36
|
end
|
31
37
|
|
32
|
-
it "should execute an analysis" do
|
38
|
+
it "should execute an analysis with type hints" do
|
33
39
|
analysis = DataKit::CSV::SchemaAnalyzer.new(csv, :sampling_rate => 0.5).execute
|
34
40
|
|
35
41
|
analysis.type?('id').should == :integer
|
@@ -45,6 +51,29 @@ describe DataKit::CSV::SchemaAnalyzer do
|
|
45
51
|
|
46
52
|
analysis.row_count.should == 10
|
47
53
|
analysis.sample_count.should be < 10
|
54
|
+
analysis.use_type_hints.should == true
|
55
|
+
|
56
|
+
puts analysis.type_hints.inspect
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should execute an analysis without type hints" do
|
61
|
+
analysis = DataKit::CSV::SchemaAnalyzer.new(csv, :sampling_rate => 0.5, :use_type_hints => false).execute
|
62
|
+
|
63
|
+
analysis.type?('id').should == :integer
|
64
|
+
analysis.type?('first_name').should == :string
|
65
|
+
analysis.type?('last_name').should == :string
|
66
|
+
analysis.type?('email').should == :string
|
67
|
+
analysis.type?('country').should == :string
|
68
|
+
analysis.type?('ip_address').should == :string
|
69
|
+
analysis.type?('amount').should == :number
|
70
|
+
analysis.type?('active').should == :boolean
|
71
|
+
analysis.type?('activated_at').should == :datetime
|
72
|
+
analysis.type?('address').should == :string
|
73
|
+
|
74
|
+
analysis.row_count.should == 10
|
75
|
+
analysis.sample_count.should be < 10
|
76
|
+
analysis.use_type_hints.should == false
|
48
77
|
end
|
49
78
|
|
50
79
|
it "should calculate a sampling_rate" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
@@ -198,4 +198,3 @@ test_files:
|
|
198
198
|
- spec/fixtures/standard.csv
|
199
199
|
- spec/fixtures/utf8.csv
|
200
200
|
- spec/spec_helper.rb
|
201
|
-
has_rdoc:
|