data_kit 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/data_kit/csv/schema_analysis.rb +21 -2
- data/lib/data_kit/dataset/field.rb +16 -1
- data/lib/data_kit/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e93d042b5d65309bf8d99ff311b0a9d9dd3b2899
|
4
|
+
data.tar.gz: 7ddf9b7d5a11ffc59ef0c093dcd2378270099410
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4231fed6d650d0b5ba39771469c014ccfe70f1f360c9a299604ee5a4e616566b5bc7cf8feab9cbb5f89e3ae4bfe5387cf528dfa664a9f0d25151a6bade4f34b
|
7
|
+
data.tar.gz: 5caa3516fcb3b8aa5eeef7ef57e9a8c27e9108a580ebd77917502e2c4f9bd1d2f2c90459a85663028e081684a8b3dab7845cdda8879f267fe9a2a966e0fe975e
|
@@ -6,12 +6,24 @@ module DataKit
|
|
6
6
|
attr_reader :row_count
|
7
7
|
attr_reader :sample_count
|
8
8
|
|
9
|
-
|
9
|
+
attr_reader :type_hints
|
10
|
+
attr_reader :use_type_hints
|
11
|
+
|
12
|
+
def initialize(fields, options = {})
|
10
13
|
@fields, @types = fields, {}
|
11
14
|
@row_count, @sample_count = 0, 0
|
12
15
|
|
16
|
+
@type_hints = {}
|
17
|
+
|
18
|
+
if options[:use_type_hints].nil? || options[:use_type_hints] == false
|
19
|
+
@use_type_hints = false
|
20
|
+
else
|
21
|
+
@use_type_hints = true
|
22
|
+
end
|
23
|
+
|
13
24
|
fields.each do |field_name|
|
14
25
|
@types[field_name] = {}
|
26
|
+
@type_hints[field_name] = :string
|
15
27
|
Dataset::Field::Types.each do |type|
|
16
28
|
@types[field_name][type] = 0
|
17
29
|
end
|
@@ -27,7 +39,14 @@ module DataKit
|
|
27
39
|
end
|
28
40
|
|
29
41
|
def insert(field_name, value)
|
30
|
-
|
42
|
+
if use_type_hints
|
43
|
+
type = Dataset::Field.type?(value, type_hints[field_name])
|
44
|
+
@type_hints[field_name] = type # cache the most recent type
|
45
|
+
else
|
46
|
+
type = Dataset::Field.type?(value)
|
47
|
+
end
|
48
|
+
|
49
|
+
@types[field_name][type] += 1
|
31
50
|
end
|
32
51
|
|
33
52
|
def field_types
|
@@ -23,8 +23,13 @@ module DataKit
|
|
23
23
|
end
|
24
24
|
|
25
25
|
class << self
|
26
|
-
def type?(value)
|
26
|
+
def type?(value, hint_type = nil)
|
27
27
|
return :null if value.nil?
|
28
|
+
|
29
|
+
if hint_type && is_type?(value, hint_type)
|
30
|
+
return hint_type
|
31
|
+
end
|
32
|
+
|
28
33
|
reformatted = Converters::Number.reformat(value)
|
29
34
|
|
30
35
|
if Converters::Integer.match?(reformatted)
|
@@ -40,6 +45,16 @@ module DataKit
|
|
40
45
|
end
|
41
46
|
end
|
42
47
|
|
48
|
+
def is_type?(value, type)
|
49
|
+
case type
|
50
|
+
when :integer then Converters::Integer.match?(Converters::Number.reformat(value))
|
51
|
+
when :number then Converters::Number.match?(Converters::Number.reformat(value))
|
52
|
+
when :boolean then Converters::Boolean.match?(value)
|
53
|
+
when :datetime then Converters::DateTime.match?(value)
|
54
|
+
when :string then false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
43
58
|
def convert(value, type)
|
44
59
|
return nil if type == :null || value.nil?
|
45
60
|
reformatted = Converters::Number.reformat(value)
|
data/lib/data_kit/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
@@ -198,3 +198,4 @@ test_files:
|
|
198
198
|
- spec/fixtures/standard.csv
|
199
199
|
- spec/fixtures/utf8.csv
|
200
200
|
- spec/spec_helper.rb
|
201
|
+
has_rdoc:
|