tableschema 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +14 -0
- data/CHANGELOG.md +31 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +274 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/etc/schemas/geojson.json +209 -0
- data/etc/schemas/json-table-schema.json +102 -0
- data/lib/tableschema.rb +42 -0
- data/lib/tableschema/constraints/constraints.rb +76 -0
- data/lib/tableschema/constraints/enum.rb +14 -0
- data/lib/tableschema/constraints/max_length.rb +15 -0
- data/lib/tableschema/constraints/maximum.rb +14 -0
- data/lib/tableschema/constraints/min_length.rb +15 -0
- data/lib/tableschema/constraints/minimum.rb +14 -0
- data/lib/tableschema/constraints/pattern.rb +14 -0
- data/lib/tableschema/constraints/required.rb +32 -0
- data/lib/tableschema/data.rb +60 -0
- data/lib/tableschema/exceptions.rb +28 -0
- data/lib/tableschema/field.rb +41 -0
- data/lib/tableschema/helpers.rb +48 -0
- data/lib/tableschema/infer.rb +143 -0
- data/lib/tableschema/model.rb +73 -0
- data/lib/tableschema/schema.rb +36 -0
- data/lib/tableschema/table.rb +51 -0
- data/lib/tableschema/types/any.rb +23 -0
- data/lib/tableschema/types/array.rb +37 -0
- data/lib/tableschema/types/base.rb +54 -0
- data/lib/tableschema/types/boolean.rb +35 -0
- data/lib/tableschema/types/date.rb +56 -0
- data/lib/tableschema/types/datetime.rb +63 -0
- data/lib/tableschema/types/geojson.rb +38 -0
- data/lib/tableschema/types/geopoint.rb +56 -0
- data/lib/tableschema/types/integer.rb +35 -0
- data/lib/tableschema/types/null.rb +37 -0
- data/lib/tableschema/types/number.rb +60 -0
- data/lib/tableschema/types/object.rb +37 -0
- data/lib/tableschema/types/string.rb +64 -0
- data/lib/tableschema/types/time.rb +55 -0
- data/lib/tableschema/validate.rb +54 -0
- data/lib/tableschema/version.rb +3 -0
- data/tableschema.gemspec +32 -0
- metadata +231 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module MaxLength
|
4
|
+
|
5
|
+
def check_max_length
|
6
|
+
return if @value.nil?
|
7
|
+
if @value.length > @constraints['maxLength'].to_i
|
8
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must have a maximum length of #{@constraints['maxLength']}")
|
9
|
+
end
|
10
|
+
true
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module Maximum
|
4
|
+
|
5
|
+
def check_maximum
|
6
|
+
if @value > parse_constraint(@constraints['maximum'])
|
7
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be more than #{@constraints['maximum']}")
|
8
|
+
end
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module MinLength
|
4
|
+
|
5
|
+
def check_min_length
|
6
|
+
return if @value.nil?
|
7
|
+
if @value.length < @constraints['minLength'].to_i
|
8
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must have a minimum length of #{@constraints['minLength']}")
|
9
|
+
end
|
10
|
+
true
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module Minimum
|
4
|
+
|
5
|
+
def check_minimum
|
6
|
+
if @value < parse_constraint(@constraints['minimum'])
|
7
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be less than #{@constraints['minimum']}")
|
8
|
+
end
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module Pattern
|
4
|
+
|
5
|
+
def check_pattern
|
6
|
+
if !@value.to_json.match /#{@constraints['pattern']}/
|
7
|
+
raise TableSchema::ConstraintError.new("The value for the field `#{@field['name']}` must match the pattern")
|
8
|
+
end
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module Required
|
4
|
+
|
5
|
+
def check_required
|
6
|
+
if required? && is_empty?
|
7
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` requires a value")
|
8
|
+
end
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def required?
|
15
|
+
required == true && @field['type'] != 'null'
|
16
|
+
end
|
17
|
+
|
18
|
+
def is_empty?
|
19
|
+
null_values.include?(@value)
|
20
|
+
end
|
21
|
+
|
22
|
+
def required
|
23
|
+
@constraints['required'].to_s == 'true'
|
24
|
+
end
|
25
|
+
|
26
|
+
def null_values
|
27
|
+
['null', 'none', 'nil', 'nan', '-', '']
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Data
|
3
|
+
|
4
|
+
attr_reader :errors
|
5
|
+
|
6
|
+
def cast_rows(rows, fail_fast = true, limit = nil)
|
7
|
+
@errors ||= []
|
8
|
+
parsed_rows = []
|
9
|
+
rows.each_with_index do |r, i|
|
10
|
+
begin
|
11
|
+
break if limit && (limit <= i)
|
12
|
+
r = r.fields if r.class == CSV::Row
|
13
|
+
parsed_rows << cast_row(r, fail_fast)
|
14
|
+
rescue MultipleInvalid, ConversionError => e
|
15
|
+
raise e if fail_fast == true
|
16
|
+
@errors << e if e.is_a?(ConversionError)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
check_for_errors
|
20
|
+
parsed_rows
|
21
|
+
end
|
22
|
+
|
23
|
+
alias_method :convert, :cast_rows
|
24
|
+
|
25
|
+
def cast_row(row, fail_fast = true)
|
26
|
+
@errors ||= []
|
27
|
+
raise_header_error(row) if row.count != fields.count
|
28
|
+
fields.each_with_index do |field,i|
|
29
|
+
row[i] = cast_column(field, row[i], fail_fast)
|
30
|
+
end
|
31
|
+
check_for_errors
|
32
|
+
row
|
33
|
+
end
|
34
|
+
|
35
|
+
alias_method :convert_row, :cast_row
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def raise_header_error(row)
|
40
|
+
raise(TableSchema::ConversionError.new("The number of items to convert (#{row.count}) does not match the number of headers in the schema (#{fields.count})"))
|
41
|
+
end
|
42
|
+
|
43
|
+
def check_for_errors
|
44
|
+
raise(TableSchema::MultipleInvalid.new("There were errors parsing the data")) if @errors.count > 0
|
45
|
+
end
|
46
|
+
|
47
|
+
def cast_column(field, col, fail_fast)
|
48
|
+
field.cast_value(col)
|
49
|
+
rescue Exception => e
|
50
|
+
if fail_fast == true
|
51
|
+
raise e
|
52
|
+
else
|
53
|
+
@errors << e
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
alias_method :convert_column, :cast_column
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Exception < ::Exception ; end
|
3
|
+
|
4
|
+
class SchemaException < Exception
|
5
|
+
attr_reader :message
|
6
|
+
|
7
|
+
def initialize message
|
8
|
+
@message = message
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class InvalidFormat < Exception ; end
|
13
|
+
class InvalidCast < Exception ; end
|
14
|
+
class InvalidEmail < Exception ; end
|
15
|
+
class InvalidURI < Exception ; end
|
16
|
+
class InvalidUUID < Exception ; end
|
17
|
+
class InvalidObjectType < Exception ; end
|
18
|
+
class InvalidArrayType < Exception ; end
|
19
|
+
class InvalidDateType < Exception ; end
|
20
|
+
class InvalidTimeType < Exception ; end
|
21
|
+
class InvalidDateTimeType < Exception ; end
|
22
|
+
class InvalidGeoJSONType < Exception ; end
|
23
|
+
class InvalidGeoPointType < Exception ; end
|
24
|
+
class ConstraintError < Exception ; end
|
25
|
+
class ConstraintNotSupported < Exception ; end
|
26
|
+
class ConversionError < Exception ; end
|
27
|
+
class MultipleInvalid < Exception ; end
|
28
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Field < Hash
|
3
|
+
include TableSchema::Helpers
|
4
|
+
|
5
|
+
attr_reader :type_class
|
6
|
+
|
7
|
+
def initialize(descriptor)
|
8
|
+
self.merge! descriptor
|
9
|
+
@type_class = get_type
|
10
|
+
end
|
11
|
+
|
12
|
+
def name
|
13
|
+
self['name']
|
14
|
+
end
|
15
|
+
|
16
|
+
def type
|
17
|
+
self['type'] || 'string'
|
18
|
+
end
|
19
|
+
|
20
|
+
def format
|
21
|
+
self['format'] || 'default'
|
22
|
+
end
|
23
|
+
|
24
|
+
def constraints
|
25
|
+
self['constraints'] || {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def cast_value(col)
|
29
|
+
klass = get_class_for_type(type)
|
30
|
+
converter = Kernel.const_get(klass).new(self)
|
31
|
+
converter.cast(col)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def get_type
|
37
|
+
Object.const_get get_class_for_type(type)
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
def convert_to_boolean(value)
|
5
|
+
if value.is_a?(Boolean)
|
6
|
+
return value
|
7
|
+
elsif true_values.include?(value.to_s.downcase)
|
8
|
+
true
|
9
|
+
elsif false_values.include?(value.to_s.downcase)
|
10
|
+
false
|
11
|
+
else
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def true_values
|
17
|
+
['yes', 'y', 'true', 't', '1']
|
18
|
+
end
|
19
|
+
|
20
|
+
def false_values
|
21
|
+
['no', 'n', 'false', 'f', '0']
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_class_for_type(type)
|
25
|
+
"TableSchema::Types::#{type_class_lookup[type] || 'String'}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def type_class_lookup
|
29
|
+
{
|
30
|
+
'any' => 'Any',
|
31
|
+
'array' => 'Array',
|
32
|
+
'base' => 'Base',
|
33
|
+
'boolean' => 'Boolean',
|
34
|
+
'date' => 'Date',
|
35
|
+
'datetime' => 'DateTime',
|
36
|
+
'geojson' => 'GeoJSON',
|
37
|
+
'geopoint' => 'GeoPoint',
|
38
|
+
'integer' => 'Integer',
|
39
|
+
'null' => 'Null',
|
40
|
+
'number' => 'Number',
|
41
|
+
'object' => 'Object',
|
42
|
+
'string' => 'String',
|
43
|
+
'time' => 'Time',
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Infer
|
3
|
+
|
4
|
+
include TableSchema::Helpers
|
5
|
+
|
6
|
+
attr_reader :schema
|
7
|
+
|
8
|
+
def initialize(headers, rows, opts = {})
|
9
|
+
@headers = headers
|
10
|
+
@rows = rows
|
11
|
+
@explicit = opts[:explicit]
|
12
|
+
@primary_key = opts[:primary_key]
|
13
|
+
@row_limit = opts[:row_limit]
|
14
|
+
|
15
|
+
@schema = {
|
16
|
+
'fields' => fields
|
17
|
+
}
|
18
|
+
@schema['primaryKey'] = @primary_key if @primary_key
|
19
|
+
infer!
|
20
|
+
end
|
21
|
+
|
22
|
+
def fields
|
23
|
+
@headers.map do |header|
|
24
|
+
descriptor = {
|
25
|
+
'name' => header,
|
26
|
+
'title' => '',
|
27
|
+
'description' => '',
|
28
|
+
}
|
29
|
+
|
30
|
+
constraints = {}
|
31
|
+
constraints['required'] = @explicit === true
|
32
|
+
constraints['unique'] = (header == @primary_key)
|
33
|
+
constraints.delete_if { |k,v| v == false } unless @explicit === true
|
34
|
+
descriptor['constraints'] = constraints if constraints.count > 0
|
35
|
+
descriptor
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def infer!
|
40
|
+
type_matches = []
|
41
|
+
@rows.each_with_index do |row, i|
|
42
|
+
break if @row_limit && i > @row_limit
|
43
|
+
row = row.fields if row.class == CSV::Row
|
44
|
+
|
45
|
+
row_length = row.count
|
46
|
+
headers_length = @headers.count
|
47
|
+
|
48
|
+
if row_length > headers_length
|
49
|
+
row = row[0..headers_length]
|
50
|
+
elsif row_length < headers_length
|
51
|
+
diff = headers_length - row_length
|
52
|
+
fill = [''] * diff
|
53
|
+
row = row.push(fill).flatten
|
54
|
+
end
|
55
|
+
|
56
|
+
row.each_with_index do |col, i|
|
57
|
+
type_matches[i] ||= []
|
58
|
+
type_matches[i] << guess_type(col, i)
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
resolve_types(type_matches)
|
63
|
+
@schema = TableSchema::Schema.new(@schema)
|
64
|
+
end
|
65
|
+
|
66
|
+
def guess_type(col, index)
|
67
|
+
guessed_type = 'string'
|
68
|
+
guessed_format = 'default'
|
69
|
+
|
70
|
+
available_types.reverse_each do |type|
|
71
|
+
klass = get_class_for_type(type)
|
72
|
+
converter = Kernel.const_get(klass).new(@schema['fields'][index])
|
73
|
+
if converter.test(col) === true
|
74
|
+
guessed_type = type
|
75
|
+
guessed_format = guess_format(converter, col)
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
{
|
81
|
+
'type' => guessed_type,
|
82
|
+
'format' => guessed_format
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
def guess_format(converter, col)
|
87
|
+
guessed_format = 'default'
|
88
|
+
converter.class.instance_methods.grep(/cast_/).each do |method|
|
89
|
+
begin
|
90
|
+
format = method.to_s
|
91
|
+
format.slice!('cast_')
|
92
|
+
next if format == 'default'
|
93
|
+
converter.send(method, col)
|
94
|
+
guessed_format = format
|
95
|
+
break
|
96
|
+
rescue TableSchema::Exception
|
97
|
+
end
|
98
|
+
end
|
99
|
+
guessed_format
|
100
|
+
end
|
101
|
+
|
102
|
+
def resolve_types(results)
|
103
|
+
results.each_with_index do |result,v|
|
104
|
+
result.uniq!
|
105
|
+
|
106
|
+
if result.count == 1
|
107
|
+
rv = result[0]
|
108
|
+
else
|
109
|
+
counts = {}
|
110
|
+
result.each do |r|
|
111
|
+
counts[r] ||= 0
|
112
|
+
counts[r] += 1
|
113
|
+
end
|
114
|
+
|
115
|
+
sorted_counts = counts.sort_by {|_key, value| value}
|
116
|
+
rv = sorted_counts[0][0]
|
117
|
+
end
|
118
|
+
|
119
|
+
@schema['fields'][v].merge!(rv)
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
def available_types
|
125
|
+
[
|
126
|
+
'any',
|
127
|
+
'string',
|
128
|
+
'boolean',
|
129
|
+
'number',
|
130
|
+
'integer',
|
131
|
+
'null',
|
132
|
+
'date',
|
133
|
+
'time',
|
134
|
+
'datetime',
|
135
|
+
'array',
|
136
|
+
'object',
|
137
|
+
'geopoint',
|
138
|
+
'geojson'
|
139
|
+
]
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Model
|
3
|
+
|
4
|
+
DEFAULTS = {
|
5
|
+
'format' => 'default',
|
6
|
+
'type' => 'string'
|
7
|
+
}
|
8
|
+
|
9
|
+
def headers
|
10
|
+
fields.map { |f| transform(f['name']) }
|
11
|
+
rescue NoMethodError
|
12
|
+
[]
|
13
|
+
end
|
14
|
+
|
15
|
+
def fields
|
16
|
+
self['fields']
|
17
|
+
end
|
18
|
+
|
19
|
+
def primary_keys
|
20
|
+
[self['primaryKey']].flatten.reject { |k| k.nil? }
|
21
|
+
end
|
22
|
+
|
23
|
+
def foreign_keys
|
24
|
+
self['foreignKeys'] || []
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_type(key)
|
28
|
+
get_field(key)['type']
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_constraints(key)
|
32
|
+
get_field(key)['constraints'] || {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def required_headers
|
36
|
+
fields.select { |f| f['constraints']!= nil && f['constraints']['required'] == true }
|
37
|
+
.map { |f| transform(f['name']) }
|
38
|
+
rescue NoMethodError
|
39
|
+
[]
|
40
|
+
end
|
41
|
+
|
42
|
+
def has_field?(key)
|
43
|
+
get_field(key) != nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_field(key)
|
47
|
+
fields.find { |f| f['name'] == key }
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_fields_by_type(type)
|
51
|
+
fields.select { |f| f['type'] == type }
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def transform(name)
|
57
|
+
name.downcase! if @opts[:case_insensitive_headers]
|
58
|
+
name
|
59
|
+
end
|
60
|
+
|
61
|
+
def expand!
|
62
|
+
(self['fields'] || []).each do |f|
|
63
|
+
f['type'] = DEFAULTS['type'] if f['type'] == nil
|
64
|
+
f['format'] = DEFAULTS['format'] if f['format'] == nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def load_fields!
|
69
|
+
self['fields'] = (self['fields'] || []).map { |f| TableSchema::Field.new(f) }
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|