schema-inference 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/schema/inference/schema_inferrer.rb +15 -9
- data/lib/schema/inference/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1d7af131c500e6f39e2215da26ad17f9baf961c
|
4
|
+
data.tar.gz: 6d9055d8b1ba6a50342c819fab15b20a2ac2192d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7355e4693fde74f238ef0d9e26ff469b327c61e4bdb7fb9d4fd19155e7db1dffc6221c46009c59b0eae3cb3ca6c65121cbf753f7bdc3a764ae8ea21355be3fcb
|
7
|
+
data.tar.gz: 01d7e5023340336bbc2d27dcf6b754dc66ad4d09ad34d3d5f2024decbbd7178ebd05f59c2d03079b7c1e88ef09bce5dbfd8077c5967e07742b53c25ac683eed2
|
data/README.md
CHANGED
@@ -50,7 +50,7 @@ module Schema
|
|
50
50
|
raise ArgumentError, 'dataset must be an array or a hash'
|
51
51
|
end
|
52
52
|
|
53
|
-
|
53
|
+
INT_MAX = 2_147_483_648
|
54
54
|
|
55
55
|
def data_schema(data)
|
56
56
|
table_schema = {}
|
@@ -76,8 +76,8 @@ module Schema
|
|
76
76
|
field_schema[:types][field[:type]][:count] += 1
|
77
77
|
|
78
78
|
if type_has_min_max?(field[:type])
|
79
|
-
field_size = value_length(field[:inferred_value])
|
80
|
-
field_schema[:types][field[:type]][:min] = [field_schema[:types][field[:type]][:min] ||
|
79
|
+
field_size = value_length(field[:inferred_value], field[:type])
|
80
|
+
field_schema[:types][field[:type]][:min] = [field_schema[:types][field[:type]][:min] || INT_MAX, field_size].min
|
81
81
|
field_schema[:types][field[:type]][:max] = [field_schema[:types][field[:type]][:max] || 0, field_size].max
|
82
82
|
end
|
83
83
|
|
@@ -92,8 +92,8 @@ module Schema
|
|
92
92
|
type == String || NumericTypes.include?(type)
|
93
93
|
end
|
94
94
|
|
95
|
-
def value_length(value)
|
96
|
-
return value.length if
|
95
|
+
def value_length(value, type)
|
96
|
+
return value.to_s.length if type == String
|
97
97
|
value # leave as-is otherwise
|
98
98
|
end
|
99
99
|
|
@@ -109,7 +109,7 @@ module Schema
|
|
109
109
|
table_schema[k][:types][type] ||= { count: 0 }
|
110
110
|
table_schema[k][:types][type][:count] += info[:count]
|
111
111
|
if type_has_min_max?(type)
|
112
|
-
table_schema[k][:types][type][:min] = [table_schema[k][:types][type][:min] ||
|
112
|
+
table_schema[k][:types][type][:min] = [table_schema[k][:types][type][:min] || INT_MAX, info[:min]].min
|
113
113
|
table_schema[k][:types][type][:max] = [table_schema[k][:types][type][:max] || 0, info[:max]].max
|
114
114
|
end
|
115
115
|
}
|
@@ -207,14 +207,20 @@ module Schema
|
|
207
207
|
|
208
208
|
def detect_type_of(value)
|
209
209
|
return Boolean if value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
210
|
-
|
210
|
+
|
211
|
+
if value.is_a? Integer
|
212
|
+
return Integer if value.abs <= INT_MAX
|
213
|
+
return String
|
214
|
+
end
|
215
|
+
|
211
216
|
return Numeric if value.is_a? Numeric
|
212
217
|
return Time if value.is_a? Time
|
213
218
|
return NilClass if value.is_a? NilClass
|
214
219
|
|
220
|
+
|
215
221
|
if value.is_a? String
|
216
|
-
return Integer if value =~ /^[-+]?[0-9]+$/
|
217
|
-
return Numeric if value =~ /^[-+]?[0-9]
|
222
|
+
return Integer if value =~ /^[-+]?[0-9]+$/ && value.to_i.abs <= INT_MAX
|
223
|
+
return Numeric if value =~ /^[-+]?[0-9]*\.[0-9]+$/
|
218
224
|
return Boolean if %w(false true).include?(value.downcase)
|
219
225
|
return Time if Timeliness.parse(value) != nil
|
220
226
|
return String
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: schema-inference
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eurico Doirado
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-08-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,9 +150,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
150
150
|
version: '0'
|
151
151
|
requirements: []
|
152
152
|
rubyforge_project:
|
153
|
-
rubygems_version: 2.
|
153
|
+
rubygems_version: 2.6.11
|
154
154
|
signing_key:
|
155
155
|
specification_version: 4
|
156
156
|
summary: Supports inferring tabular schemas from deep nested structures.
|
157
157
|
test_files: []
|
158
|
-
has_rdoc:
|