schema-inference 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/schema/inference/schema_inferrer.rb +15 -9
- data/lib/schema/inference/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c1d7af131c500e6f39e2215da26ad17f9baf961c
|
|
4
|
+
data.tar.gz: 6d9055d8b1ba6a50342c819fab15b20a2ac2192d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7355e4693fde74f238ef0d9e26ff469b327c61e4bdb7fb9d4fd19155e7db1dffc6221c46009c59b0eae3cb3ca6c65121cbf753f7bdc3a764ae8ea21355be3fcb
|
|
7
|
+
data.tar.gz: 01d7e5023340336bbc2d27dcf6b754dc66ad4d09ad34d3d5f2024decbbd7178ebd05f59c2d03079b7c1e88ef09bce5dbfd8077c5967e07742b53c25ac683eed2
|
data/README.md
CHANGED
|
@@ -50,7 +50,7 @@ module Schema
|
|
|
50
50
|
raise ArgumentError, 'dataset must be an array or a hash'
|
|
51
51
|
end
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
INT_MAX = 2_147_483_648
|
|
54
54
|
|
|
55
55
|
def data_schema(data)
|
|
56
56
|
table_schema = {}
|
|
@@ -76,8 +76,8 @@ module Schema
|
|
|
76
76
|
field_schema[:types][field[:type]][:count] += 1
|
|
77
77
|
|
|
78
78
|
if type_has_min_max?(field[:type])
|
|
79
|
-
field_size = value_length(field[:inferred_value])
|
|
80
|
-
field_schema[:types][field[:type]][:min] = [field_schema[:types][field[:type]][:min] ||
|
|
79
|
+
field_size = value_length(field[:inferred_value], field[:type])
|
|
80
|
+
field_schema[:types][field[:type]][:min] = [field_schema[:types][field[:type]][:min] || INT_MAX, field_size].min
|
|
81
81
|
field_schema[:types][field[:type]][:max] = [field_schema[:types][field[:type]][:max] || 0, field_size].max
|
|
82
82
|
end
|
|
83
83
|
|
|
@@ -92,8 +92,8 @@ module Schema
|
|
|
92
92
|
type == String || NumericTypes.include?(type)
|
|
93
93
|
end
|
|
94
94
|
|
|
95
|
-
def value_length(value)
|
|
96
|
-
return value.length if
|
|
95
|
+
def value_length(value, type)
|
|
96
|
+
return value.to_s.length if type == String
|
|
97
97
|
value # leave as-is otherwise
|
|
98
98
|
end
|
|
99
99
|
|
|
@@ -109,7 +109,7 @@ module Schema
|
|
|
109
109
|
table_schema[k][:types][type] ||= { count: 0 }
|
|
110
110
|
table_schema[k][:types][type][:count] += info[:count]
|
|
111
111
|
if type_has_min_max?(type)
|
|
112
|
-
table_schema[k][:types][type][:min] = [table_schema[k][:types][type][:min] ||
|
|
112
|
+
table_schema[k][:types][type][:min] = [table_schema[k][:types][type][:min] || INT_MAX, info[:min]].min
|
|
113
113
|
table_schema[k][:types][type][:max] = [table_schema[k][:types][type][:max] || 0, info[:max]].max
|
|
114
114
|
end
|
|
115
115
|
}
|
|
@@ -207,14 +207,20 @@ module Schema
|
|
|
207
207
|
|
|
208
208
|
def detect_type_of(value)
|
|
209
209
|
return Boolean if value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
|
210
|
-
|
|
210
|
+
|
|
211
|
+
if value.is_a? Integer
|
|
212
|
+
return Integer if value.abs <= INT_MAX
|
|
213
|
+
return String
|
|
214
|
+
end
|
|
215
|
+
|
|
211
216
|
return Numeric if value.is_a? Numeric
|
|
212
217
|
return Time if value.is_a? Time
|
|
213
218
|
return NilClass if value.is_a? NilClass
|
|
214
219
|
|
|
220
|
+
|
|
215
221
|
if value.is_a? String
|
|
216
|
-
return Integer if value =~ /^[-+]?[0-9]+$/
|
|
217
|
-
return Numeric if value =~ /^[-+]?[0-9]
|
|
222
|
+
return Integer if value =~ /^[-+]?[0-9]+$/ && value.to_i.abs <= INT_MAX
|
|
223
|
+
return Numeric if value =~ /^[-+]?[0-9]*\.[0-9]+$/
|
|
218
224
|
return Boolean if %w(false true).include?(value.downcase)
|
|
219
225
|
return Time if Timeliness.parse(value) != nil
|
|
220
226
|
return String
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: schema-inference
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Eurico Doirado
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2017-08-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -150,9 +150,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
150
150
|
version: '0'
|
|
151
151
|
requirements: []
|
|
152
152
|
rubyforge_project:
|
|
153
|
-
rubygems_version: 2.
|
|
153
|
+
rubygems_version: 2.6.11
|
|
154
154
|
signing_key:
|
|
155
155
|
specification_version: 4
|
|
156
156
|
summary: Supports inferring tabular schemas from deep nested structures.
|
|
157
157
|
test_files: []
|
|
158
|
-
has_rdoc:
|