jsontableschema 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +13 -0
- data/CHANGELOG.md +17 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +233 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/etc/schemas/geojson.json +209 -0
- data/etc/schemas/json-table-schema.json +102 -0
- data/jsontableschema.gemspec +32 -0
- data/lib/jsontableschema.rb +41 -0
- data/lib/jsontableschema/constraints/constraints.rb +76 -0
- data/lib/jsontableschema/constraints/enum.rb +14 -0
- data/lib/jsontableschema/constraints/max_length.rb +15 -0
- data/lib/jsontableschema/constraints/maximum.rb +14 -0
- data/lib/jsontableschema/constraints/min_length.rb +15 -0
- data/lib/jsontableschema/constraints/minimum.rb +14 -0
- data/lib/jsontableschema/constraints/pattern.rb +14 -0
- data/lib/jsontableschema/constraints/required.rb +32 -0
- data/lib/jsontableschema/data.rb +57 -0
- data/lib/jsontableschema/exceptions.rb +28 -0
- data/lib/jsontableschema/helpers.rb +48 -0
- data/lib/jsontableschema/infer.rb +142 -0
- data/lib/jsontableschema/model.rb +73 -0
- data/lib/jsontableschema/schema.rb +35 -0
- data/lib/jsontableschema/table.rb +50 -0
- data/lib/jsontableschema/types/any.rb +23 -0
- data/lib/jsontableschema/types/array.rb +37 -0
- data/lib/jsontableschema/types/base.rb +54 -0
- data/lib/jsontableschema/types/boolean.rb +35 -0
- data/lib/jsontableschema/types/date.rb +56 -0
- data/lib/jsontableschema/types/datetime.rb +63 -0
- data/lib/jsontableschema/types/geojson.rb +38 -0
- data/lib/jsontableschema/types/geopoint.rb +56 -0
- data/lib/jsontableschema/types/integer.rb +35 -0
- data/lib/jsontableschema/types/null.rb +37 -0
- data/lib/jsontableschema/types/number.rb +60 -0
- data/lib/jsontableschema/types/object.rb +37 -0
- data/lib/jsontableschema/types/string.rb +64 -0
- data/lib/jsontableschema/types/time.rb +55 -0
- data/lib/jsontableschema/validate.rb +54 -0
- data/lib/jsontableschema/version.rb +3 -0
- metadata +230 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
def convert_to_boolean(value)
|
5
|
+
if value.is_a?(Boolean)
|
6
|
+
return value
|
7
|
+
elsif true_values.include?(value.to_s.downcase)
|
8
|
+
true
|
9
|
+
elsif false_values.include?(value.to_s.downcase)
|
10
|
+
false
|
11
|
+
else
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def true_values
|
17
|
+
['yes', 'y', 'true', 't', '1']
|
18
|
+
end
|
19
|
+
|
20
|
+
def false_values
|
21
|
+
['no', 'n', 'false', 'f', '0']
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_class_for_type(type)
|
25
|
+
"JsonTableSchema::Types::#{type_class_lookup[type]}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def type_class_lookup
|
29
|
+
{
|
30
|
+
'any' => 'Any',
|
31
|
+
'array' => 'Array',
|
32
|
+
'base' => 'Base',
|
33
|
+
'boolean' => 'Boolean',
|
34
|
+
'date' => 'Date',
|
35
|
+
'datetime' => 'DateTime',
|
36
|
+
'geojson' => 'GeoJSON',
|
37
|
+
'geopoint' => 'GeoPoint',
|
38
|
+
'integer' => 'Integer',
|
39
|
+
'null' => 'Null',
|
40
|
+
'number' => 'Number',
|
41
|
+
'object' => 'Object',
|
42
|
+
'string' => 'String',
|
43
|
+
'time' => 'Time',
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
class Infer
|
3
|
+
|
4
|
+
include JsonTableSchema::Helpers
|
5
|
+
|
6
|
+
attr_reader :schema
|
7
|
+
|
8
|
+
def initialize(headers, rows, opts = {})
|
9
|
+
@headers = headers
|
10
|
+
@rows = rows
|
11
|
+
@explicit = opts[:explicit]
|
12
|
+
@primary_key = opts[:primary_key]
|
13
|
+
@row_limit = opts[:row_limit]
|
14
|
+
|
15
|
+
@schema = {
|
16
|
+
'fields' => fields
|
17
|
+
}
|
18
|
+
@schema['primaryKey'] = @primary_key if @primary_key
|
19
|
+
infer!
|
20
|
+
end
|
21
|
+
|
22
|
+
def fields
|
23
|
+
@headers.map do |header|
|
24
|
+
descriptor = {
|
25
|
+
'name' => header,
|
26
|
+
'title' => '',
|
27
|
+
'description' => '',
|
28
|
+
}
|
29
|
+
|
30
|
+
constraints = {}
|
31
|
+
constraints['required'] = @explicit === true
|
32
|
+
constraints['unique'] = (header == @primary_key)
|
33
|
+
constraints.delete_if { |k,v| v == false } unless @explicit === true
|
34
|
+
descriptor['constraints'] = constraints if constraints.count > 0
|
35
|
+
descriptor
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def infer!
|
40
|
+
type_matches = []
|
41
|
+
@rows.each_with_index do |row, i|
|
42
|
+
break if @row_limit && i > @row_limit
|
43
|
+
|
44
|
+
row_length = row.count
|
45
|
+
headers_length = @headers.count
|
46
|
+
|
47
|
+
if row_length > headers_length
|
48
|
+
row = row[0..headers_length]
|
49
|
+
elsif row_length < headers_length
|
50
|
+
diff = headers_length - row_length
|
51
|
+
fill = [''] * diff
|
52
|
+
row = row.push(fill).flatten
|
53
|
+
end
|
54
|
+
|
55
|
+
row.each_with_index do |col, i|
|
56
|
+
type_matches[i] ||= []
|
57
|
+
type_matches[i] << guess_type(col, i)
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
resolve_types(type_matches)
|
62
|
+
@schema = JsonTableSchema::Schema.new(@schema)
|
63
|
+
end
|
64
|
+
|
65
|
+
def guess_type(col, index)
|
66
|
+
guessed_type = 'string'
|
67
|
+
guessed_format = 'default'
|
68
|
+
|
69
|
+
available_types.reverse_each do |type|
|
70
|
+
klass = get_class_for_type(type)
|
71
|
+
converter = Kernel.const_get(klass).new(@schema['fields'][index])
|
72
|
+
if converter.test(col) === true
|
73
|
+
guessed_type = type
|
74
|
+
guessed_format = guess_format(converter, col)
|
75
|
+
break
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
{
|
80
|
+
'type' => guessed_type,
|
81
|
+
'format' => guessed_format
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
def guess_format(converter, col)
|
86
|
+
guessed_format = 'default'
|
87
|
+
converter.class.instance_methods.grep(/cast_/).each do |method|
|
88
|
+
begin
|
89
|
+
format = method.to_s
|
90
|
+
format.slice!('cast_')
|
91
|
+
next if format == 'default'
|
92
|
+
converter.send(method, col)
|
93
|
+
guessed_format = format
|
94
|
+
break
|
95
|
+
rescue JsonTableSchema::Exception
|
96
|
+
end
|
97
|
+
end
|
98
|
+
guessed_format
|
99
|
+
end
|
100
|
+
|
101
|
+
def resolve_types(results)
|
102
|
+
results.each_with_index do |result,v|
|
103
|
+
result.uniq!
|
104
|
+
|
105
|
+
if result.count == 1
|
106
|
+
rv = result[0]
|
107
|
+
else
|
108
|
+
counts = {}
|
109
|
+
result.each do |r|
|
110
|
+
counts[r] ||= 0
|
111
|
+
counts[r] += 1
|
112
|
+
end
|
113
|
+
|
114
|
+
sorted_counts = counts.sort_by {|_key, value| value}
|
115
|
+
rv = sorted_counts[0][0]
|
116
|
+
end
|
117
|
+
|
118
|
+
@schema['fields'][v].merge!(rv)
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
def available_types
|
124
|
+
[
|
125
|
+
'any',
|
126
|
+
'string',
|
127
|
+
'boolean',
|
128
|
+
'number',
|
129
|
+
'integer',
|
130
|
+
'null',
|
131
|
+
'date',
|
132
|
+
'time',
|
133
|
+
'datetime',
|
134
|
+
'array',
|
135
|
+
'object',
|
136
|
+
'geopoint',
|
137
|
+
'geojson'
|
138
|
+
]
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Model
|
3
|
+
|
4
|
+
DEFAULTS = {
|
5
|
+
'format' => 'default',
|
6
|
+
'type' => 'string'
|
7
|
+
}
|
8
|
+
|
9
|
+
def headers
|
10
|
+
fields.map { |f| transform(f['name']) }
|
11
|
+
rescue NoMethodError
|
12
|
+
[]
|
13
|
+
end
|
14
|
+
|
15
|
+
def fields
|
16
|
+
self['fields']
|
17
|
+
end
|
18
|
+
|
19
|
+
def primary_keys
|
20
|
+
[self['primaryKey']].flatten.reject { |k| k.nil? }
|
21
|
+
end
|
22
|
+
|
23
|
+
def foreign_keys
|
24
|
+
self['foreignKeys'] || []
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_type(key)
|
28
|
+
get_field(key)['type']
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_constraints(key)
|
32
|
+
get_field(key)['constraints'] || {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def required_headers
|
36
|
+
fields.select { |f| f['constraints']!= nil && f['constraints']['required'] == true }
|
37
|
+
.map { |f| transform(f['name']) }
|
38
|
+
rescue NoMethodError
|
39
|
+
[]
|
40
|
+
end
|
41
|
+
|
42
|
+
def has_field?(key)
|
43
|
+
get_field(key) != nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_field(key)
|
47
|
+
fields.find { |f| f['name'] == key }
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_fields_by_type(type)
|
51
|
+
fields.select { |f| f['type'] == type }
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def fields
|
57
|
+
self['fields']
|
58
|
+
end
|
59
|
+
|
60
|
+
def transform(name)
|
61
|
+
name.downcase! if @opts[:case_insensitive_headers]
|
62
|
+
name
|
63
|
+
end
|
64
|
+
|
65
|
+
def expand!
|
66
|
+
(self['fields'] || []).each do |f|
|
67
|
+
f['type'] = DEFAULTS['type'] if f['type'] == nil
|
68
|
+
f['format'] = DEFAULTS['format'] if f['format'] == nil
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
class Schema < Hash
|
3
|
+
include JsonTableSchema::Validate
|
4
|
+
include JsonTableSchema::Model
|
5
|
+
include JsonTableSchema::Data
|
6
|
+
include JsonTableSchema::Helpers
|
7
|
+
|
8
|
+
def initialize(schema, opts = {})
|
9
|
+
self.merge! parse_schema(schema)
|
10
|
+
@messages = []
|
11
|
+
@opts = opts
|
12
|
+
load_validator!
|
13
|
+
expand!
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_schema(schema)
|
17
|
+
if schema.class == Hash
|
18
|
+
schema
|
19
|
+
elsif schema.class == String
|
20
|
+
begin
|
21
|
+
JSON.parse open(schema).read
|
22
|
+
rescue Errno::ENOENT
|
23
|
+
raise SchemaException.new("File not found at `#{schema}`")
|
24
|
+
rescue OpenURI::HTTPError => e
|
25
|
+
raise SchemaException.new("URL `#{schema}` returned #{e.message}")
|
26
|
+
rescue JSON::ParserError
|
27
|
+
raise SchemaException.new("File at `#{schema}` is not valid JSON")
|
28
|
+
end
|
29
|
+
else
|
30
|
+
raise SchemaException.new("A schema must be a hash, path or URL")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
class Table
|
3
|
+
|
4
|
+
attr_reader :schema
|
5
|
+
|
6
|
+
def self.infer_schema(csv, opts = {})
|
7
|
+
JsonTableSchema::Table.new(csv, nil, opts)
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(csv, schema, opts = {})
|
11
|
+
@opts = opts
|
12
|
+
@csv = parse_csv(csv)
|
13
|
+
@schema = schema.nil? ? infer_schema(@csv) : JsonTableSchema::Schema.new(schema)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_csv(csv)
|
17
|
+
csv_string = csv.is_a?(Array) ? array_to_csv(csv) : open(csv).read
|
18
|
+
CSV.parse(csv_string, csv_options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def csv_options
|
22
|
+
(@opts[:csv_options] || {}).merge(headers: true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def rows(opts = {})
|
26
|
+
fail_fast = opts[:fail_fast] || opts[:fail_fast].nil?
|
27
|
+
rows = opts[:limit] ? @csv.to_a.drop(1).take(opts[:limit]) : @csv.to_a.drop(1)
|
28
|
+
converted = @schema.convert(rows, fail_fast)
|
29
|
+
opts[:keyed] ? coverted_to_hash(@csv.headers, converted) : converted
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def array_to_csv(array)
|
35
|
+
array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
|
36
|
+
end
|
37
|
+
|
38
|
+
def coverted_to_hash(headers, array)
|
39
|
+
array.map do |row|
|
40
|
+
Hash[row.map.with_index { |col, i| [headers[i], col] }]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def infer_schema(csv)
|
45
|
+
inferer = JsonTableSchema::Infer.new(csv.headers, csv.to_a)
|
46
|
+
inferer.schema
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Types
|
3
|
+
class Any < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'any'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
def cast_default(value)
|
18
|
+
value
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Types
|
3
|
+
class Array < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'array'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum',
|
14
|
+
'minLength',
|
15
|
+
'maxLength',
|
16
|
+
]
|
17
|
+
end
|
18
|
+
|
19
|
+
def type
|
20
|
+
::Array
|
21
|
+
end
|
22
|
+
|
23
|
+
def cast_default(value)
|
24
|
+
return value if value.is_a?(type)
|
25
|
+
parsed = JSON.parse(value)
|
26
|
+
if parsed.is_a?(type)
|
27
|
+
return parsed
|
28
|
+
else
|
29
|
+
raise JsonTableSchema::InvalidArrayType.new("#{value} is not a valid array")
|
30
|
+
end
|
31
|
+
rescue
|
32
|
+
raise JsonTableSchema::InvalidArrayType.new("#{value} is not a valid array")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module JsonTableSchema
|
2
|
+
module Types
|
3
|
+
class Base
|
4
|
+
include JsonTableSchema::Helpers
|
5
|
+
|
6
|
+
|
7
|
+
def initialize(field)
|
8
|
+
@field = field
|
9
|
+
@constraints = field['constraints'] || {}
|
10
|
+
@required = ['true', true].include?(@constraints['required'])
|
11
|
+
@type = @field['type']
|
12
|
+
set_format
|
13
|
+
end
|
14
|
+
|
15
|
+
def cast(value, skip_constraints = false)
|
16
|
+
JsonTableSchema::Constraints.new(@field, value).validate! unless skip_constraints
|
17
|
+
return nil if is_null?(value)
|
18
|
+
send("cast_#{@format}", value)
|
19
|
+
rescue NoMethodError => e
|
20
|
+
if e.message.start_with?('undefined method `cast_')
|
21
|
+
raise(JsonTableSchema::InvalidFormat.new("The format `#{@format}` is not supported by the type `#{@type}`"))
|
22
|
+
else
|
23
|
+
raise e
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test(value)
|
28
|
+
cast(value, true)
|
29
|
+
true
|
30
|
+
rescue JsonTableSchema::Exception
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
def set_format
|
35
|
+
if (@field['format'] || '').start_with?('fmt:')
|
36
|
+
@format, @format_string = *@field['format'].split(':', 2)
|
37
|
+
else
|
38
|
+
@format = @field['format'] || 'default'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def is_null?(value)
|
45
|
+
null_values.include?(value) && @required == false
|
46
|
+
end
|
47
|
+
|
48
|
+
def null_values
|
49
|
+
['null', 'none', 'nil', 'nan', '-', '']
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|