data_kit 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e93d042b5d65309bf8d99ff311b0a9d9dd3b2899
4
- data.tar.gz: 7ddf9b7d5a11ffc59ef0c093dcd2378270099410
3
+ metadata.gz: 11a12efb158e60cac6e71ac09e37799f811a65e4
4
+ data.tar.gz: 66ed506814c7273bdc980ada09049ebaabaa3a52
5
5
  SHA512:
6
- metadata.gz: f4231fed6d650d0b5ba39771469c014ccfe70f1f360c9a299604ee5a4e616566b5bc7cf8feab9cbb5f89e3ae4bfe5387cf528dfa664a9f0d25151a6bade4f34b
7
- data.tar.gz: 5caa3516fcb3b8aa5eeef7ef57e9a8c27e9108a580ebd77917502e2c4f9bd1d2f2c90459a85663028e081684a8b3dab7845cdda8879f267fe9a2a966e0fe975e
6
+ metadata.gz: f2f3eb974bde2f7bdbdcd34fdc4ea57f4ee64c4876768f436a2a5e54f79eba3254f26247b786bbacee869dea17b91de716950fab97ac8ceeb3843f02d51f8a52
7
+ data.tar.gz: e1f92f4116af3087938dfcc715605c7d805dd87b408f0dfa4c9e9494587c925451e0d749c528df5eaac400b063d54efbbf97fdb02e3f783981f02d09e33cbc3d
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- data_kit (0.0.4)
4
+ data_kit (0.0.6)
5
5
  rcsv
6
6
  timeliness
7
7
 
@@ -15,10 +15,10 @@ module DataKit
15
15
 
16
16
  @type_hints = {}
17
17
 
18
- if options[:use_type_hints].nil? || options[:use_type_hints] == false
19
- @use_type_hints = false
20
- else
18
+ if options[:use_type_hints].nil? || options[:use_type_hints] == true
21
19
  @use_type_hints = true
20
+ else
21
+ @use_type_hints = false
22
22
  end
23
23
 
24
24
  fields.each do |field_name|
@@ -4,16 +4,23 @@ module DataKit
4
4
  attr_accessor :csv
5
5
  attr_accessor :keys
6
6
  attr_accessor :sampling_rate
7
-
7
+ attr_accessor :use_type_hints
8
+
8
9
  def initialize(csv, options = {})
9
10
  @csv = csv
10
11
  @keys = options[:keys] || []
11
12
  @sampling_rate = options[:sampling_rate] || 0.1
13
+
14
+ if options[:use_type_hints].nil? || options[:use_type_hints] == true
15
+ @use_type_hints = true
16
+ else
17
+ @use_type_hints = false
18
+ end
12
19
  end
13
20
 
14
21
  def execute
15
22
  random = Random.new
16
- analysis = SchemaAnalysis.new(csv.headers)
23
+ analysis = SchemaAnalysis.new(csv.headers, :use_type_hints => use_type_hints)
17
24
 
18
25
  csv.each_row do |row|
19
26
  analysis.increment_total
@@ -1,3 +1,3 @@
1
1
  module DataKit
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -19,6 +19,12 @@ describe DataKit::CSV::SchemaAnalyzer do
19
19
  analyzer.csv.should == csv
20
20
  analyzer.keys.should == []
21
21
  analyzer.sampling_rate.should == 0.1
22
+ analyzer.use_type_hints.should == true
23
+ end
24
+
25
+ it "should initialize with type hints turned off" do
26
+ analyzer = DataKit::CSV::SchemaAnalyzer.new(csv, :use_type_hints => false)
27
+ analyzer.use_type_hints.should == false
22
28
  end
23
29
 
24
30
  it "should initialize schema with an IO" do
@@ -29,7 +35,7 @@ describe DataKit::CSV::SchemaAnalyzer do
29
35
  analyzer.sampling_rate.should == 0.1
30
36
  end
31
37
 
32
- it "should execute an analysis" do
38
+ it "should execute an analysis with type hints" do
33
39
  analysis = DataKit::CSV::SchemaAnalyzer.new(csv, :sampling_rate => 0.5).execute
34
40
 
35
41
  analysis.type?('id').should == :integer
@@ -45,6 +51,29 @@ describe DataKit::CSV::SchemaAnalyzer do
45
51
 
46
52
  analysis.row_count.should == 10
47
53
  analysis.sample_count.should be < 10
54
+ analysis.use_type_hints.should == true
55
+
56
+ puts analysis.type_hints.inspect
57
+
58
+ end
59
+
60
+ it "should execute an analysis without type hints" do
61
+ analysis = DataKit::CSV::SchemaAnalyzer.new(csv, :sampling_rate => 0.5, :use_type_hints => false).execute
62
+
63
+ analysis.type?('id').should == :integer
64
+ analysis.type?('first_name').should == :string
65
+ analysis.type?('last_name').should == :string
66
+ analysis.type?('email').should == :string
67
+ analysis.type?('country').should == :string
68
+ analysis.type?('ip_address').should == :string
69
+ analysis.type?('amount').should == :number
70
+ analysis.type?('active').should == :boolean
71
+ analysis.type?('activated_at').should == :datetime
72
+ analysis.type?('address').should == :string
73
+
74
+ analysis.row_count.should == 10
75
+ analysis.sample_count.should be < 10
76
+ analysis.use_type_hints.should == false
48
77
  end
49
78
 
50
79
  it "should calculate a sampling_rate" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mode Analytics
@@ -198,4 +198,3 @@ test_files:
198
198
  - spec/fixtures/standard.csv
199
199
  - spec/fixtures/utf8.csv
200
200
  - spec/spec_helper.rb
201
- has_rdoc: