jsontableschema 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +13 -0
- data/CHANGELOG.md +17 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +233 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/etc/schemas/geojson.json +209 -0
- data/etc/schemas/json-table-schema.json +102 -0
- data/jsontableschema.gemspec +32 -0
- data/lib/jsontableschema.rb +41 -0
- data/lib/jsontableschema/constraints/constraints.rb +76 -0
- data/lib/jsontableschema/constraints/enum.rb +14 -0
- data/lib/jsontableschema/constraints/max_length.rb +15 -0
- data/lib/jsontableschema/constraints/maximum.rb +14 -0
- data/lib/jsontableschema/constraints/min_length.rb +15 -0
- data/lib/jsontableschema/constraints/minimum.rb +14 -0
- data/lib/jsontableschema/constraints/pattern.rb +14 -0
- data/lib/jsontableschema/constraints/required.rb +32 -0
- data/lib/jsontableschema/data.rb +57 -0
- data/lib/jsontableschema/exceptions.rb +28 -0
- data/lib/jsontableschema/helpers.rb +48 -0
- data/lib/jsontableschema/infer.rb +142 -0
- data/lib/jsontableschema/model.rb +73 -0
- data/lib/jsontableschema/schema.rb +35 -0
- data/lib/jsontableschema/table.rb +50 -0
- data/lib/jsontableschema/types/any.rb +23 -0
- data/lib/jsontableschema/types/array.rb +37 -0
- data/lib/jsontableschema/types/base.rb +54 -0
- data/lib/jsontableschema/types/boolean.rb +35 -0
- data/lib/jsontableschema/types/date.rb +56 -0
- data/lib/jsontableschema/types/datetime.rb +63 -0
- data/lib/jsontableschema/types/geojson.rb +38 -0
- data/lib/jsontableschema/types/geopoint.rb +56 -0
- data/lib/jsontableschema/types/integer.rb +35 -0
- data/lib/jsontableschema/types/null.rb +37 -0
- data/lib/jsontableschema/types/number.rb +60 -0
- data/lib/jsontableschema/types/object.rb +37 -0
- data/lib/jsontableschema/types/string.rb +64 -0
- data/lib/jsontableschema/types/time.rb +55 -0
- data/lib/jsontableschema/validate.rb +54 -0
- data/lib/jsontableschema/version.rb +3 -0
- metadata +230 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
def convert_to_boolean(value)
|
5
|
+
if value.is_a?(Boolean)
|
6
|
+
return value
|
7
|
+
elsif true_values.include?(value.to_s.downcase)
|
8
|
+
true
|
9
|
+
elsif false_values.include?(value.to_s.downcase)
|
10
|
+
false
|
11
|
+
else
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def true_values
|
17
|
+
['yes', 'y', 'true', 't', '1']
|
18
|
+
end
|
19
|
+
|
20
|
+
def false_values
|
21
|
+
['no', 'n', 'false', 'f', '0']
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_class_for_type(type)
|
25
|
+
"JsonTableSchema::Types::#{type_class_lookup[type]}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def type_class_lookup
|
29
|
+
{
|
30
|
+
'any' => 'Any',
|
31
|
+
'array' => 'Array',
|
32
|
+
'base' => 'Base',
|
33
|
+
'boolean' => 'Boolean',
|
34
|
+
'date' => 'Date',
|
35
|
+
'datetime' => 'DateTime',
|
36
|
+
'geojson' => 'GeoJSON',
|
37
|
+
'geopoint' => 'GeoPoint',
|
38
|
+
'integer' => 'Integer',
|
39
|
+
'null' => 'Null',
|
40
|
+
'number' => 'Number',
|
41
|
+
'object' => 'Object',
|
42
|
+
'string' => 'String',
|
43
|
+
'time' => 'Time',
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
class Infer
|
3
|
+
|
4
|
+
include JsonTableSchema::Helpers
|
5
|
+
|
6
|
+
attr_reader :schema
|
7
|
+
|
8
|
+
def initialize(headers, rows, opts = {})
|
9
|
+
@headers = headers
|
10
|
+
@rows = rows
|
11
|
+
@explicit = opts[:explicit]
|
12
|
+
@primary_key = opts[:primary_key]
|
13
|
+
@row_limit = opts[:row_limit]
|
14
|
+
|
15
|
+
@schema = {
|
16
|
+
'fields' => fields
|
17
|
+
}
|
18
|
+
@schema['primaryKey'] = @primary_key if @primary_key
|
19
|
+
infer!
|
20
|
+
end
|
21
|
+
|
22
|
+
def fields
|
23
|
+
@headers.map do |header|
|
24
|
+
descriptor = {
|
25
|
+
'name' => header,
|
26
|
+
'title' => '',
|
27
|
+
'description' => '',
|
28
|
+
}
|
29
|
+
|
30
|
+
constraints = {}
|
31
|
+
constraints['required'] = @explicit === true
|
32
|
+
constraints['unique'] = (header == @primary_key)
|
33
|
+
constraints.delete_if { |k,v| v == false } unless @explicit === true
|
34
|
+
descriptor['constraints'] = constraints if constraints.count > 0
|
35
|
+
descriptor
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def infer!
|
40
|
+
type_matches = []
|
41
|
+
@rows.each_with_index do |row, i|
|
42
|
+
break if @row_limit && i > @row_limit
|
43
|
+
|
44
|
+
row_length = row.count
|
45
|
+
headers_length = @headers.count
|
46
|
+
|
47
|
+
if row_length > headers_length
|
48
|
+
row = row[0..headers_length]
|
49
|
+
elsif row_length < headers_length
|
50
|
+
diff = headers_length - row_length
|
51
|
+
fill = [''] * diff
|
52
|
+
row = row.push(fill).flatten
|
53
|
+
end
|
54
|
+
|
55
|
+
row.each_with_index do |col, i|
|
56
|
+
type_matches[i] ||= []
|
57
|
+
type_matches[i] << guess_type(col, i)
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
resolve_types(type_matches)
|
62
|
+
@schema = JsonTableSchema::Schema.new(@schema)
|
63
|
+
end
|
64
|
+
|
65
|
+
def guess_type(col, index)
|
66
|
+
guessed_type = 'string'
|
67
|
+
guessed_format = 'default'
|
68
|
+
|
69
|
+
available_types.reverse_each do |type|
|
70
|
+
klass = get_class_for_type(type)
|
71
|
+
converter = Kernel.const_get(klass).new(@schema['fields'][index])
|
72
|
+
if converter.test(col) === true
|
73
|
+
guessed_type = type
|
74
|
+
guessed_format = guess_format(converter, col)
|
75
|
+
break
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
{
|
80
|
+
'type' => guessed_type,
|
81
|
+
'format' => guessed_format
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
def guess_format(converter, col)
|
86
|
+
guessed_format = 'default'
|
87
|
+
converter.class.instance_methods.grep(/cast_/).each do |method|
|
88
|
+
begin
|
89
|
+
format = method.to_s
|
90
|
+
format.slice!('cast_')
|
91
|
+
next if format == 'default'
|
92
|
+
converter.send(method, col)
|
93
|
+
guessed_format = format
|
94
|
+
break
|
95
|
+
rescue JsonTableSchema::Exception
|
96
|
+
end
|
97
|
+
end
|
98
|
+
guessed_format
|
99
|
+
end
|
100
|
+
|
101
|
+
def resolve_types(results)
|
102
|
+
results.each_with_index do |result,v|
|
103
|
+
result.uniq!
|
104
|
+
|
105
|
+
if result.count == 1
|
106
|
+
rv = result[0]
|
107
|
+
else
|
108
|
+
counts = {}
|
109
|
+
result.each do |r|
|
110
|
+
counts[r] ||= 0
|
111
|
+
counts[r] += 1
|
112
|
+
end
|
113
|
+
|
114
|
+
sorted_counts = counts.sort_by {|_key, value| value}
|
115
|
+
rv = sorted_counts[0][0]
|
116
|
+
end
|
117
|
+
|
118
|
+
@schema['fields'][v].merge!(rv)
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
def available_types
|
124
|
+
[
|
125
|
+
'any',
|
126
|
+
'string',
|
127
|
+
'boolean',
|
128
|
+
'number',
|
129
|
+
'integer',
|
130
|
+
'null',
|
131
|
+
'date',
|
132
|
+
'time',
|
133
|
+
'datetime',
|
134
|
+
'array',
|
135
|
+
'object',
|
136
|
+
'geopoint',
|
137
|
+
'geojson'
|
138
|
+
]
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Model
|
3
|
+
|
4
|
+
DEFAULTS = {
|
5
|
+
'format' => 'default',
|
6
|
+
'type' => 'string'
|
7
|
+
}
|
8
|
+
|
9
|
+
def headers
|
10
|
+
fields.map { |f| transform(f['name']) }
|
11
|
+
rescue NoMethodError
|
12
|
+
[]
|
13
|
+
end
|
14
|
+
|
15
|
+
def fields
|
16
|
+
self['fields']
|
17
|
+
end
|
18
|
+
|
19
|
+
def primary_keys
|
20
|
+
[self['primaryKey']].flatten.reject { |k| k.nil? }
|
21
|
+
end
|
22
|
+
|
23
|
+
def foreign_keys
|
24
|
+
self['foreignKeys'] || []
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_type(key)
|
28
|
+
get_field(key)['type']
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_constraints(key)
|
32
|
+
get_field(key)['constraints'] || {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def required_headers
|
36
|
+
fields.select { |f| f['constraints']!= nil && f['constraints']['required'] == true }
|
37
|
+
.map { |f| transform(f['name']) }
|
38
|
+
rescue NoMethodError
|
39
|
+
[]
|
40
|
+
end
|
41
|
+
|
42
|
+
def has_field?(key)
|
43
|
+
get_field(key) != nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_field(key)
|
47
|
+
fields.find { |f| f['name'] == key }
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_fields_by_type(type)
|
51
|
+
fields.select { |f| f['type'] == type }
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def fields
|
57
|
+
self['fields']
|
58
|
+
end
|
59
|
+
|
60
|
+
def transform(name)
|
61
|
+
name.downcase! if @opts[:case_insensitive_headers]
|
62
|
+
name
|
63
|
+
end
|
64
|
+
|
65
|
+
def expand!
|
66
|
+
(self['fields'] || []).each do |f|
|
67
|
+
f['type'] = DEFAULTS['type'] if f['type'] == nil
|
68
|
+
f['format'] = DEFAULTS['format'] if f['format'] == nil
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
class Schema < Hash
|
3
|
+
include JsonTableSchema::Validate
|
4
|
+
include JsonTableSchema::Model
|
5
|
+
include JsonTableSchema::Data
|
6
|
+
include JsonTableSchema::Helpers
|
7
|
+
|
8
|
+
def initialize(schema, opts = {})
|
9
|
+
self.merge! parse_schema(schema)
|
10
|
+
@messages = []
|
11
|
+
@opts = opts
|
12
|
+
load_validator!
|
13
|
+
expand!
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_schema(schema)
|
17
|
+
if schema.class == Hash
|
18
|
+
schema
|
19
|
+
elsif schema.class == String
|
20
|
+
begin
|
21
|
+
JSON.parse open(schema).read
|
22
|
+
rescue Errno::ENOENT
|
23
|
+
raise SchemaException.new("File not found at `#{schema}`")
|
24
|
+
rescue OpenURI::HTTPError => e
|
25
|
+
raise SchemaException.new("URL `#{schema}` returned #{e.message}")
|
26
|
+
rescue JSON::ParserError
|
27
|
+
raise SchemaException.new("File at `#{schema}` is not valid JSON")
|
28
|
+
end
|
29
|
+
else
|
30
|
+
raise SchemaException.new("A schema must be a hash, path or URL")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
class Table
|
3
|
+
|
4
|
+
attr_reader :schema
|
5
|
+
|
6
|
+
def self.infer_schema(csv, opts = {})
|
7
|
+
JsonTableSchema::Table.new(csv, nil, opts)
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(csv, schema, opts = {})
|
11
|
+
@opts = opts
|
12
|
+
@csv = parse_csv(csv)
|
13
|
+
@schema = schema.nil? ? infer_schema(@csv) : JsonTableSchema::Schema.new(schema)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_csv(csv)
|
17
|
+
csv_string = csv.is_a?(Array) ? array_to_csv(csv) : open(csv).read
|
18
|
+
CSV.parse(csv_string, csv_options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def csv_options
|
22
|
+
(@opts[:csv_options] || {}).merge(headers: true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def rows(opts = {})
|
26
|
+
fail_fast = opts[:fail_fast] || opts[:fail_fast].nil?
|
27
|
+
rows = opts[:limit] ? @csv.to_a.drop(1).take(opts[:limit]) : @csv.to_a.drop(1)
|
28
|
+
converted = @schema.convert(rows, fail_fast)
|
29
|
+
opts[:keyed] ? coverted_to_hash(@csv.headers, converted) : converted
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def array_to_csv(array)
|
35
|
+
array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
|
36
|
+
end
|
37
|
+
|
38
|
+
def coverted_to_hash(headers, array)
|
39
|
+
array.map do |row|
|
40
|
+
Hash[row.map.with_index { |col, i| [headers[i], col] }]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def infer_schema(csv)
|
45
|
+
inferer = JsonTableSchema::Infer.new(csv.headers, csv.to_a)
|
46
|
+
inferer.schema
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Types
|
3
|
+
class Any < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'any'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
def cast_default(value)
|
18
|
+
value
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Types
|
3
|
+
class Array < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'array'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum',
|
14
|
+
'minLength',
|
15
|
+
'maxLength',
|
16
|
+
]
|
17
|
+
end
|
18
|
+
|
19
|
+
def type
|
20
|
+
::Array
|
21
|
+
end
|
22
|
+
|
23
|
+
def cast_default(value)
|
24
|
+
return value if value.is_a?(type)
|
25
|
+
parsed = JSON.parse(value)
|
26
|
+
if parsed.is_a?(type)
|
27
|
+
return parsed
|
28
|
+
else
|
29
|
+
raise JsonTableSchema::InvalidArrayType.new("#{value} is not a valid array")
|
30
|
+
end
|
31
|
+
rescue
|
32
|
+
raise JsonTableSchema::InvalidArrayType.new("#{value} is not a valid array")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Types
|
3
|
+
class Base
|
4
|
+
include JsonTableSchema::Helpers
|
5
|
+
|
6
|
+
|
7
|
+
def initialize(field)
|
8
|
+
@field = field
|
9
|
+
@constraints = field['constraints'] || {}
|
10
|
+
@required = ['true', true].include?(@constraints['required'])
|
11
|
+
@type = @field['type']
|
12
|
+
set_format
|
13
|
+
end
|
14
|
+
|
15
|
+
def cast(value, skip_constraints = false)
|
16
|
+
JsonTableSchema::Constraints.new(@field, value).validate! unless skip_constraints
|
17
|
+
return nil if is_null?(value)
|
18
|
+
send("cast_#{@format}", value)
|
19
|
+
rescue NoMethodError => e
|
20
|
+
if e.message.start_with?('undefined method `cast_')
|
21
|
+
raise(JsonTableSchema::InvalidFormat.new("The format `#{@format}` is not supported by the type `#{@type}`"))
|
22
|
+
else
|
23
|
+
raise e
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test(value)
|
28
|
+
cast(value, true)
|
29
|
+
true
|
30
|
+
rescue JsonTableSchema::Exception
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
def set_format
|
35
|
+
if (@field['format'] || '').start_with?('fmt:')
|
36
|
+
@format, @format_string = *@field['format'].split(':', 2)
|
37
|
+
else
|
38
|
+
@format = @field['format'] || 'default'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def is_null?(value)
|
45
|
+
null_values.include?(value) && @required == false
|
46
|
+
end
|
47
|
+
|
48
|
+
def null_values
|
49
|
+
['null', 'none', 'nil', 'nan', '-', '']
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|