schema-inference 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b5da2e518ffc5bd291ce69ba382f20a3afbc2c06
4
- data.tar.gz: 44f330a78d095a596841020bcf8a5e983d12921b
3
+ metadata.gz: c1d7af131c500e6f39e2215da26ad17f9baf961c
4
+ data.tar.gz: 6d9055d8b1ba6a50342c819fab15b20a2ac2192d
5
5
  SHA512:
6
- metadata.gz: d030451925707394967f79dcd0e7ff287d735c5176e02bd68803aeff05800bc91273fe49157a868be8b4c21f82998c1e4619d372e20d42bd19c8d895c6156c47
7
- data.tar.gz: 0ac460013951cb20a47db9d0e4d0ae0f25f61b421255603f64c206280c5b22fd0d55a8f5df2d97f97a7c4a54a2b8a1ab3b25639fbe7f67a8579dea3272139163
6
+ metadata.gz: 7355e4693fde74f238ef0d9e26ff469b327c61e4bdb7fb9d4fd19155e7db1dffc6221c46009c59b0eae3cb3ca6c65121cbf753f7bdc3a764ae8ea21355be3fcb
7
+ data.tar.gz: 01d7e5023340336bbc2d27dcf6b754dc66ad4d09ad34d3d5f2024decbbd7178ebd05f59c2d03079b7c1e88ef09bce5dbfd8077c5967e07742b53c25ac683eed2
data/README.md CHANGED
@@ -1,3 +1,6 @@
1
+ [![Build
2
+ Status](https://travis-ci.org/Phybbit/schema-inference.svg?branch=master)](https://travis-ci.org/Phybbit/schema-inference)
3
+
1
4
  # Schema::Inference
2
5
 
3
6
  Supports inferring tabular schemas from deep nested data structures.
@@ -50,7 +50,7 @@ module Schema
50
50
  raise ArgumentError, 'dataset must be an array or a hash'
51
51
  end
52
52
 
53
- FIXNUM_MAX = (2**(0.size * 8 -2) -1)
53
+ INT_MAX = 2_147_483_648
54
54
 
55
55
  def data_schema(data)
56
56
  table_schema = {}
@@ -76,8 +76,8 @@ module Schema
76
76
  field_schema[:types][field[:type]][:count] += 1
77
77
 
78
78
  if type_has_min_max?(field[:type])
79
- field_size = value_length(field[:inferred_value])
80
- field_schema[:types][field[:type]][:min] = [field_schema[:types][field[:type]][:min] || FIXNUM_MAX, field_size].min
79
+ field_size = value_length(field[:inferred_value], field[:type])
80
+ field_schema[:types][field[:type]][:min] = [field_schema[:types][field[:type]][:min] || INT_MAX, field_size].min
81
81
  field_schema[:types][field[:type]][:max] = [field_schema[:types][field[:type]][:max] || 0, field_size].max
82
82
  end
83
83
 
@@ -92,8 +92,8 @@ module Schema
92
92
  type == String || NumericTypes.include?(type)
93
93
  end
94
94
 
95
- def value_length(value)
96
- return value.length if value.is_a?(String)
95
+ def value_length(value, type)
96
+ return value.to_s.length if type == String
97
97
  value # leave as-is otherwise
98
98
  end
99
99
 
@@ -109,7 +109,7 @@ module Schema
109
109
  table_schema[k][:types][type] ||= { count: 0 }
110
110
  table_schema[k][:types][type][:count] += info[:count]
111
111
  if type_has_min_max?(type)
112
- table_schema[k][:types][type][:min] = [table_schema[k][:types][type][:min] || FIXNUM_MAX, info[:min]].min
112
+ table_schema[k][:types][type][:min] = [table_schema[k][:types][type][:min] || INT_MAX, info[:min]].min
113
113
  table_schema[k][:types][type][:max] = [table_schema[k][:types][type][:max] || 0, info[:max]].max
114
114
  end
115
115
  }
@@ -207,14 +207,20 @@ module Schema
207
207
 
208
208
  def detect_type_of(value)
209
209
  return Boolean if value.is_a?(TrueClass) || value.is_a?(FalseClass)
210
- return Integer if value.is_a? Integer
210
+
211
+ if value.is_a? Integer
212
+ return Integer if value.abs <= INT_MAX
213
+ return String
214
+ end
215
+
211
216
  return Numeric if value.is_a? Numeric
212
217
  return Time if value.is_a? Time
213
218
  return NilClass if value.is_a? NilClass
214
219
 
220
+
215
221
  if value.is_a? String
216
- return Integer if value =~ /^[-+]?[0-9]+$/
217
- return Numeric if value =~ /^[-+]?[0-9]*\.?[0-9]+$/
222
+ return Integer if value =~ /^[-+]?[0-9]+$/ && value.to_i.abs <= INT_MAX
223
+ return Numeric if value =~ /^[-+]?[0-9]*\.[0-9]+$/
218
224
  return Boolean if %w(false true).include?(value.downcase)
219
225
  return Time if Timeliness.parse(value) != nil
220
226
  return String
@@ -1,5 +1,5 @@
1
1
  module Schema
2
2
  module Inference
3
- VERSION = '1.2.1'
3
+ VERSION = '1.3.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: schema-inference
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eurico Doirado
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-31 00:00:00.000000000 Z
11
+ date: 2017-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -150,9 +150,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
150
150
  version: '0'
151
151
  requirements: []
152
152
  rubyforge_project:
153
- rubygems_version: 2.5.1
153
+ rubygems_version: 2.6.11
154
154
  signing_key:
155
155
  specification_version: 4
156
156
  summary: Supports inferring tabular schemas from deep nested structures.
157
157
  test_files: []
158
- has_rdoc: