data_kit 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/data_kit/csv/schema_analysis.rb +3 -3
- data/lib/data_kit/csv/schema_analyzer.rb +9 -2
- data/lib/data_kit/version.rb +1 -1
- data/spec/csv/schema_analyzer_spec.rb +30 -1
- metadata +1 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 11a12efb158e60cac6e71ac09e37799f811a65e4
|
4
|
+
data.tar.gz: 66ed506814c7273bdc980ada09049ebaabaa3a52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2f3eb974bde2f7bdbdcd34fdc4ea57f4ee64c4876768f436a2a5e54f79eba3254f26247b786bbacee869dea17b91de716950fab97ac8ceeb3843f02d51f8a52
|
7
|
+
data.tar.gz: e1f92f4116af3087938dfcc715605c7d805dd87b408f0dfa4c9e9494587c925451e0d749c528df5eaac400b063d54efbbf97fdb02e3f783981f02d09e33cbc3d
|
data/Gemfile.lock
CHANGED
@@ -15,10 +15,10 @@ module DataKit
|
|
15
15
|
|
16
16
|
@type_hints = {}
|
17
17
|
|
18
|
-
if options[:use_type_hints].nil? || options[:use_type_hints] ==
|
19
|
-
@use_type_hints = false
|
20
|
-
else
|
18
|
+
if options[:use_type_hints].nil? || options[:use_type_hints] == true
|
21
19
|
@use_type_hints = true
|
20
|
+
else
|
21
|
+
@use_type_hints = false
|
22
22
|
end
|
23
23
|
|
24
24
|
fields.each do |field_name|
|
@@ -4,16 +4,23 @@ module DataKit
|
|
4
4
|
attr_accessor :csv
|
5
5
|
attr_accessor :keys
|
6
6
|
attr_accessor :sampling_rate
|
7
|
-
|
7
|
+
attr_accessor :use_type_hints
|
8
|
+
|
8
9
|
def initialize(csv, options = {})
|
9
10
|
@csv = csv
|
10
11
|
@keys = options[:keys] || []
|
11
12
|
@sampling_rate = options[:sampling_rate] || 0.1
|
13
|
+
|
14
|
+
if options[:use_type_hints].nil? || options[:use_type_hints] == true
|
15
|
+
@use_type_hints = true
|
16
|
+
else
|
17
|
+
@use_type_hints = false
|
18
|
+
end
|
12
19
|
end
|
13
20
|
|
14
21
|
def execute
|
15
22
|
random = Random.new
|
16
|
-
analysis = SchemaAnalysis.new(csv.headers)
|
23
|
+
analysis = SchemaAnalysis.new(csv.headers, :use_type_hints => use_type_hints)
|
17
24
|
|
18
25
|
csv.each_row do |row|
|
19
26
|
analysis.increment_total
|
data/lib/data_kit/version.rb
CHANGED
@@ -19,6 +19,12 @@ describe DataKit::CSV::SchemaAnalyzer do
|
|
19
19
|
analyzer.csv.should == csv
|
20
20
|
analyzer.keys.should == []
|
21
21
|
analyzer.sampling_rate.should == 0.1
|
22
|
+
analyzer.use_type_hints.should == true
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should initialize with type hints turned off" do
|
26
|
+
analyzer = DataKit::CSV::SchemaAnalyzer.new(csv, :use_type_hints => false)
|
27
|
+
analyzer.use_type_hints.should == false
|
22
28
|
end
|
23
29
|
|
24
30
|
it "should initialize schema with an IO" do
|
@@ -29,7 +35,7 @@ describe DataKit::CSV::SchemaAnalyzer do
|
|
29
35
|
analyzer.sampling_rate.should == 0.1
|
30
36
|
end
|
31
37
|
|
32
|
-
it "should execute an analysis" do
|
38
|
+
it "should execute an analysis with type hints" do
|
33
39
|
analysis = DataKit::CSV::SchemaAnalyzer.new(csv, :sampling_rate => 0.5).execute
|
34
40
|
|
35
41
|
analysis.type?('id').should == :integer
|
@@ -45,6 +51,29 @@ describe DataKit::CSV::SchemaAnalyzer do
|
|
45
51
|
|
46
52
|
analysis.row_count.should == 10
|
47
53
|
analysis.sample_count.should be < 10
|
54
|
+
analysis.use_type_hints.should == true
|
55
|
+
|
56
|
+
puts analysis.type_hints.inspect
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should execute an analysis without type hints" do
|
61
|
+
analysis = DataKit::CSV::SchemaAnalyzer.new(csv, :sampling_rate => 0.5, :use_type_hints => false).execute
|
62
|
+
|
63
|
+
analysis.type?('id').should == :integer
|
64
|
+
analysis.type?('first_name').should == :string
|
65
|
+
analysis.type?('last_name').should == :string
|
66
|
+
analysis.type?('email').should == :string
|
67
|
+
analysis.type?('country').should == :string
|
68
|
+
analysis.type?('ip_address').should == :string
|
69
|
+
analysis.type?('amount').should == :number
|
70
|
+
analysis.type?('active').should == :boolean
|
71
|
+
analysis.type?('activated_at').should == :datetime
|
72
|
+
analysis.type?('address').should == :string
|
73
|
+
|
74
|
+
analysis.row_count.should == 10
|
75
|
+
analysis.sample_count.should be < 10
|
76
|
+
analysis.use_type_hints.should == false
|
48
77
|
end
|
49
78
|
|
50
79
|
it "should calculate a sampling_rate" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
@@ -198,4 +198,3 @@ test_files:
|
|
198
198
|
- spec/fixtures/standard.csv
|
199
199
|
- spec/fixtures/utf8.csv
|
200
200
|
- spec/spec_helper.rb
|
201
|
-
has_rdoc:
|