data_kit 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/data_kit/csv/schema_analysis.rb +21 -2
- data/lib/data_kit/dataset/field.rb +16 -1
- data/lib/data_kit/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e93d042b5d65309bf8d99ff311b0a9d9dd3b2899
|
4
|
+
data.tar.gz: 7ddf9b7d5a11ffc59ef0c093dcd2378270099410
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4231fed6d650d0b5ba39771469c014ccfe70f1f360c9a299604ee5a4e616566b5bc7cf8feab9cbb5f89e3ae4bfe5387cf528dfa664a9f0d25151a6bade4f34b
|
7
|
+
data.tar.gz: 5caa3516fcb3b8aa5eeef7ef57e9a8c27e9108a580ebd77917502e2c4f9bd1d2f2c90459a85663028e081684a8b3dab7845cdda8879f267fe9a2a966e0fe975e
|
@@ -6,12 +6,24 @@ module DataKit
|
|
6
6
|
attr_reader :row_count
|
7
7
|
attr_reader :sample_count
|
8
8
|
|
9
|
-
|
9
|
+
attr_reader :type_hints
|
10
|
+
attr_reader :use_type_hints
|
11
|
+
|
12
|
+
def initialize(fields, options = {})
|
10
13
|
@fields, @types = fields, {}
|
11
14
|
@row_count, @sample_count = 0, 0
|
12
15
|
|
16
|
+
@type_hints = {}
|
17
|
+
|
18
|
+
if options[:use_type_hints].nil? || options[:use_type_hints] == false
|
19
|
+
@use_type_hints = false
|
20
|
+
else
|
21
|
+
@use_type_hints = true
|
22
|
+
end
|
23
|
+
|
13
24
|
fields.each do |field_name|
|
14
25
|
@types[field_name] = {}
|
26
|
+
@type_hints[field_name] = :string
|
15
27
|
Dataset::Field::Types.each do |type|
|
16
28
|
@types[field_name][type] = 0
|
17
29
|
end
|
@@ -27,7 +39,14 @@ module DataKit
|
|
27
39
|
end
|
28
40
|
|
29
41
|
def insert(field_name, value)
|
30
|
-
|
42
|
+
if use_type_hints
|
43
|
+
type = Dataset::Field.type?(value, type_hints[field_name])
|
44
|
+
@type_hints[field_name] = type # cache the most recent type
|
45
|
+
else
|
46
|
+
type = Dataset::Field.type?(value)
|
47
|
+
end
|
48
|
+
|
49
|
+
@types[field_name][type] += 1
|
31
50
|
end
|
32
51
|
|
33
52
|
def field_types
|
@@ -23,8 +23,13 @@ module DataKit
|
|
23
23
|
end
|
24
24
|
|
25
25
|
class << self
|
26
|
-
def type?(value)
|
26
|
+
def type?(value, hint_type = nil)
|
27
27
|
return :null if value.nil?
|
28
|
+
|
29
|
+
if hint_type && is_type?(value, hint_type)
|
30
|
+
return hint_type
|
31
|
+
end
|
32
|
+
|
28
33
|
reformatted = Converters::Number.reformat(value)
|
29
34
|
|
30
35
|
if Converters::Integer.match?(reformatted)
|
@@ -40,6 +45,16 @@ module DataKit
|
|
40
45
|
end
|
41
46
|
end
|
42
47
|
|
48
|
+
def is_type?(value, type)
|
49
|
+
case type
|
50
|
+
when :integer then Converters::Integer.match?(Converters::Number.reformat(value))
|
51
|
+
when :number then Converters::Number.match?(Converters::Number.reformat(value))
|
52
|
+
when :boolean then Converters::Boolean.match?(value)
|
53
|
+
when :datetime then Converters::DateTime.match?(value)
|
54
|
+
when :string then false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
43
58
|
def convert(value, type)
|
44
59
|
return nil if type == :null || value.nil?
|
45
60
|
reformatted = Converters::Number.reformat(value)
|
data/lib/data_kit/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
@@ -198,3 +198,4 @@ test_files:
|
|
198
198
|
- spec/fixtures/standard.csv
|
199
199
|
- spec/fixtures/utf8.csv
|
200
200
|
- spec/spec_helper.rb
|
201
|
+
has_rdoc:
|