data_kit 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dd32e91aed4ac35a8dbb104e577d9d7454025733
4
- data.tar.gz: 31e8c8ef04ba50e94bffb8a29104362a14e56e08
3
+ metadata.gz: e93d042b5d65309bf8d99ff311b0a9d9dd3b2899
4
+ data.tar.gz: 7ddf9b7d5a11ffc59ef0c093dcd2378270099410
5
5
  SHA512:
6
- metadata.gz: 77effe4f14b155f04c3f7d177a4ba0072cb02d558a2916bb66d8709484422e28e34153090b467a828dfc3ebf14d967d97f0b66cf5ba9a6c9ccbca14cc62952ab
7
- data.tar.gz: e94f0b6657c0b7a105a6dd324df4ce8824c21d6acce11eee50a55973ef108873641e340597ef989cedda71043a10466e28f05540fa8a23ad6cb2176c8e2a002e
6
+ metadata.gz: f4231fed6d650d0b5ba39771469c014ccfe70f1f360c9a299604ee5a4e616566b5bc7cf8feab9cbb5f89e3ae4bfe5387cf528dfa664a9f0d25151a6bade4f34b
7
+ data.tar.gz: 5caa3516fcb3b8aa5eeef7ef57e9a8c27e9108a580ebd77917502e2c4f9bd1d2f2c90459a85663028e081684a8b3dab7845cdda8879f267fe9a2a966e0fe975e
@@ -6,12 +6,24 @@ module DataKit
6
6
  attr_reader :row_count
7
7
  attr_reader :sample_count
8
8
 
9
- def initialize(fields)
9
+ attr_reader :type_hints
10
+ attr_reader :use_type_hints
11
+
12
+ def initialize(fields, options = {})
10
13
  @fields, @types = fields, {}
11
14
  @row_count, @sample_count = 0, 0
12
15
 
16
+ @type_hints = {}
17
+
18
+ if options[:use_type_hints].nil? || options[:use_type_hints] == false
19
+ @use_type_hints = false
20
+ else
21
+ @use_type_hints = true
22
+ end
23
+
13
24
  fields.each do |field_name|
14
25
  @types[field_name] = {}
26
+ @type_hints[field_name] = :string
15
27
  Dataset::Field::Types.each do |type|
16
28
  @types[field_name][type] = 0
17
29
  end
@@ -27,7 +39,14 @@ module DataKit
27
39
  end
28
40
 
29
41
  def insert(field_name, value)
30
- @types[field_name][Dataset::Field.type?(value)] += 1
42
+ if use_type_hints
43
+ type = Dataset::Field.type?(value, type_hints[field_name])
44
+ @type_hints[field_name] = type # cache the most recent type
45
+ else
46
+ type = Dataset::Field.type?(value)
47
+ end
48
+
49
+ @types[field_name][type] += 1
31
50
  end
32
51
 
33
52
  def field_types
@@ -23,8 +23,13 @@ module DataKit
23
23
  end
24
24
 
25
25
  class << self
26
- def type?(value)
26
+ def type?(value, hint_type = nil)
27
27
  return :null if value.nil?
28
+
29
+ if hint_type && is_type?(value, hint_type)
30
+ return hint_type
31
+ end
32
+
28
33
  reformatted = Converters::Number.reformat(value)
29
34
 
30
35
  if Converters::Integer.match?(reformatted)
@@ -40,6 +45,16 @@ module DataKit
40
45
  end
41
46
  end
42
47
 
48
+ def is_type?(value, type)
49
+ case type
50
+ when :integer then Converters::Integer.match?(Converters::Number.reformat(value))
51
+ when :number then Converters::Number.match?(Converters::Number.reformat(value))
52
+ when :boolean then Converters::Boolean.match?(value)
53
+ when :datetime then Converters::DateTime.match?(value)
54
+ when :string then false
55
+ end
56
+ end
57
+
43
58
  def convert(value, type)
44
59
  return nil if type == :null || value.nil?
45
60
  reformatted = Converters::Number.reformat(value)
@@ -1,3 +1,3 @@
1
1
  module DataKit
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mode Analytics
@@ -198,3 +198,4 @@ test_files:
198
198
  - spec/fixtures/standard.csv
199
199
  - spec/fixtures/utf8.csv
200
200
  - spec/spec_helper.rb
201
+ has_rdoc: