tableschema 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +14 -0
- data/CHANGELOG.md +31 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +274 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/etc/schemas/geojson.json +209 -0
- data/etc/schemas/json-table-schema.json +102 -0
- data/lib/tableschema.rb +42 -0
- data/lib/tableschema/constraints/constraints.rb +76 -0
- data/lib/tableschema/constraints/enum.rb +14 -0
- data/lib/tableschema/constraints/max_length.rb +15 -0
- data/lib/tableschema/constraints/maximum.rb +14 -0
- data/lib/tableschema/constraints/min_length.rb +15 -0
- data/lib/tableschema/constraints/minimum.rb +14 -0
- data/lib/tableschema/constraints/pattern.rb +14 -0
- data/lib/tableschema/constraints/required.rb +32 -0
- data/lib/tableschema/data.rb +60 -0
- data/lib/tableschema/exceptions.rb +28 -0
- data/lib/tableschema/field.rb +41 -0
- data/lib/tableschema/helpers.rb +48 -0
- data/lib/tableschema/infer.rb +143 -0
- data/lib/tableschema/model.rb +73 -0
- data/lib/tableschema/schema.rb +36 -0
- data/lib/tableschema/table.rb +51 -0
- data/lib/tableschema/types/any.rb +23 -0
- data/lib/tableschema/types/array.rb +37 -0
- data/lib/tableschema/types/base.rb +54 -0
- data/lib/tableschema/types/boolean.rb +35 -0
- data/lib/tableschema/types/date.rb +56 -0
- data/lib/tableschema/types/datetime.rb +63 -0
- data/lib/tableschema/types/geojson.rb +38 -0
- data/lib/tableschema/types/geopoint.rb +56 -0
- data/lib/tableschema/types/integer.rb +35 -0
- data/lib/tableschema/types/null.rb +37 -0
- data/lib/tableschema/types/number.rb +60 -0
- data/lib/tableschema/types/object.rb +37 -0
- data/lib/tableschema/types/string.rb +64 -0
- data/lib/tableschema/types/time.rb +55 -0
- data/lib/tableschema/validate.rb +54 -0
- data/lib/tableschema/version.rb +3 -0
- data/tableschema.gemspec +32 -0
- metadata +231 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module MaxLength
|
4
|
+
|
5
|
+
def check_max_length
|
6
|
+
return if @value.nil?
|
7
|
+
if @value.length > @constraints['maxLength'].to_i
|
8
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must have a maximum length of #{@constraints['maxLength']}")
|
9
|
+
end
|
10
|
+
true
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module Maximum
|
4
|
+
|
5
|
+
def check_maximum
|
6
|
+
if @value > parse_constraint(@constraints['maximum'])
|
7
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be more than #{@constraints['maximum']}")
|
8
|
+
end
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module MinLength
|
4
|
+
|
5
|
+
def check_min_length
|
6
|
+
return if @value.nil?
|
7
|
+
if @value.length < @constraints['minLength'].to_i
|
8
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must have a minimum length of #{@constraints['minLength']}")
|
9
|
+
end
|
10
|
+
true
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module Minimum
|
4
|
+
|
5
|
+
def check_minimum
|
6
|
+
if @value < parse_constraint(@constraints['minimum'])
|
7
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be less than #{@constraints['minimum']}")
|
8
|
+
end
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module Pattern
|
4
|
+
|
5
|
+
def check_pattern
|
6
|
+
if !@value.to_json.match /#{@constraints['pattern']}/
|
7
|
+
raise TableSchema::ConstraintError.new("The value for the field `#{@field['name']}` must match the pattern")
|
8
|
+
end
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Constraints
|
3
|
+
module Required
|
4
|
+
|
5
|
+
def check_required
|
6
|
+
if required? && is_empty?
|
7
|
+
raise TableSchema::ConstraintError.new("The field `#{@field['name']}` requires a value")
|
8
|
+
end
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def required?
|
15
|
+
required == true && @field['type'] != 'null'
|
16
|
+
end
|
17
|
+
|
18
|
+
def is_empty?
|
19
|
+
null_values.include?(@value)
|
20
|
+
end
|
21
|
+
|
22
|
+
def required
|
23
|
+
@constraints['required'].to_s == 'true'
|
24
|
+
end
|
25
|
+
|
26
|
+
def null_values
|
27
|
+
['null', 'none', 'nil', 'nan', '-', '']
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Data
|
3
|
+
|
4
|
+
attr_reader :errors
|
5
|
+
|
6
|
+
def cast_rows(rows, fail_fast = true, limit = nil)
|
7
|
+
@errors ||= []
|
8
|
+
parsed_rows = []
|
9
|
+
rows.each_with_index do |r, i|
|
10
|
+
begin
|
11
|
+
break if limit && (limit <= i)
|
12
|
+
r = r.fields if r.class == CSV::Row
|
13
|
+
parsed_rows << cast_row(r, fail_fast)
|
14
|
+
rescue MultipleInvalid, ConversionError => e
|
15
|
+
raise e if fail_fast == true
|
16
|
+
@errors << e if e.is_a?(ConversionError)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
check_for_errors
|
20
|
+
parsed_rows
|
21
|
+
end
|
22
|
+
|
23
|
+
alias_method :convert, :cast_rows
|
24
|
+
|
25
|
+
def cast_row(row, fail_fast = true)
|
26
|
+
@errors ||= []
|
27
|
+
raise_header_error(row) if row.count != fields.count
|
28
|
+
fields.each_with_index do |field,i|
|
29
|
+
row[i] = cast_column(field, row[i], fail_fast)
|
30
|
+
end
|
31
|
+
check_for_errors
|
32
|
+
row
|
33
|
+
end
|
34
|
+
|
35
|
+
alias_method :convert_row, :cast_row
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def raise_header_error(row)
|
40
|
+
raise(TableSchema::ConversionError.new("The number of items to convert (#{row.count}) does not match the number of headers in the schema (#{fields.count})"))
|
41
|
+
end
|
42
|
+
|
43
|
+
def check_for_errors
|
44
|
+
raise(TableSchema::MultipleInvalid.new("There were errors parsing the data")) if @errors.count > 0
|
45
|
+
end
|
46
|
+
|
47
|
+
def cast_column(field, col, fail_fast)
|
48
|
+
field.cast_value(col)
|
49
|
+
rescue Exception => e
|
50
|
+
if fail_fast == true
|
51
|
+
raise e
|
52
|
+
else
|
53
|
+
@errors << e
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
alias_method :convert_column, :cast_column
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Exception < ::Exception ; end
|
3
|
+
|
4
|
+
class SchemaException < Exception
|
5
|
+
attr_reader :message
|
6
|
+
|
7
|
+
def initialize message
|
8
|
+
@message = message
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class InvalidFormat < Exception ; end
|
13
|
+
class InvalidCast < Exception ; end
|
14
|
+
class InvalidEmail < Exception ; end
|
15
|
+
class InvalidURI < Exception ; end
|
16
|
+
class InvalidUUID < Exception ; end
|
17
|
+
class InvalidObjectType < Exception ; end
|
18
|
+
class InvalidArrayType < Exception ; end
|
19
|
+
class InvalidDateType < Exception ; end
|
20
|
+
class InvalidTimeType < Exception ; end
|
21
|
+
class InvalidDateTimeType < Exception ; end
|
22
|
+
class InvalidGeoJSONType < Exception ; end
|
23
|
+
class InvalidGeoPointType < Exception ; end
|
24
|
+
class ConstraintError < Exception ; end
|
25
|
+
class ConstraintNotSupported < Exception ; end
|
26
|
+
class ConversionError < Exception ; end
|
27
|
+
class MultipleInvalid < Exception ; end
|
28
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Field < Hash
|
3
|
+
include TableSchema::Helpers
|
4
|
+
|
5
|
+
attr_reader :type_class
|
6
|
+
|
7
|
+
def initialize(descriptor)
|
8
|
+
self.merge! descriptor
|
9
|
+
@type_class = get_type
|
10
|
+
end
|
11
|
+
|
12
|
+
def name
|
13
|
+
self['name']
|
14
|
+
end
|
15
|
+
|
16
|
+
def type
|
17
|
+
self['type'] || 'string'
|
18
|
+
end
|
19
|
+
|
20
|
+
def format
|
21
|
+
self['format'] || 'default'
|
22
|
+
end
|
23
|
+
|
24
|
+
def constraints
|
25
|
+
self['constraints'] || {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def cast_value(col)
|
29
|
+
klass = get_class_for_type(type)
|
30
|
+
converter = Kernel.const_get(klass).new(self)
|
31
|
+
converter.cast(col)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def get_type
|
37
|
+
Object.const_get get_class_for_type(type)
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
def convert_to_boolean(value)
|
5
|
+
if value.is_a?(Boolean)
|
6
|
+
return value
|
7
|
+
elsif true_values.include?(value.to_s.downcase)
|
8
|
+
true
|
9
|
+
elsif false_values.include?(value.to_s.downcase)
|
10
|
+
false
|
11
|
+
else
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def true_values
|
17
|
+
['yes', 'y', 'true', 't', '1']
|
18
|
+
end
|
19
|
+
|
20
|
+
def false_values
|
21
|
+
['no', 'n', 'false', 'f', '0']
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_class_for_type(type)
|
25
|
+
"TableSchema::Types::#{type_class_lookup[type] || 'String'}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def type_class_lookup
|
29
|
+
{
|
30
|
+
'any' => 'Any',
|
31
|
+
'array' => 'Array',
|
32
|
+
'base' => 'Base',
|
33
|
+
'boolean' => 'Boolean',
|
34
|
+
'date' => 'Date',
|
35
|
+
'datetime' => 'DateTime',
|
36
|
+
'geojson' => 'GeoJSON',
|
37
|
+
'geopoint' => 'GeoPoint',
|
38
|
+
'integer' => 'Integer',
|
39
|
+
'null' => 'Null',
|
40
|
+
'number' => 'Number',
|
41
|
+
'object' => 'Object',
|
42
|
+
'string' => 'String',
|
43
|
+
'time' => 'Time',
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Infer
|
3
|
+
|
4
|
+
include TableSchema::Helpers
|
5
|
+
|
6
|
+
attr_reader :schema
|
7
|
+
|
8
|
+
def initialize(headers, rows, opts = {})
|
9
|
+
@headers = headers
|
10
|
+
@rows = rows
|
11
|
+
@explicit = opts[:explicit]
|
12
|
+
@primary_key = opts[:primary_key]
|
13
|
+
@row_limit = opts[:row_limit]
|
14
|
+
|
15
|
+
@schema = {
|
16
|
+
'fields' => fields
|
17
|
+
}
|
18
|
+
@schema['primaryKey'] = @primary_key if @primary_key
|
19
|
+
infer!
|
20
|
+
end
|
21
|
+
|
22
|
+
def fields
|
23
|
+
@headers.map do |header|
|
24
|
+
descriptor = {
|
25
|
+
'name' => header,
|
26
|
+
'title' => '',
|
27
|
+
'description' => '',
|
28
|
+
}
|
29
|
+
|
30
|
+
constraints = {}
|
31
|
+
constraints['required'] = @explicit === true
|
32
|
+
constraints['unique'] = (header == @primary_key)
|
33
|
+
constraints.delete_if { |k,v| v == false } unless @explicit === true
|
34
|
+
descriptor['constraints'] = constraints if constraints.count > 0
|
35
|
+
descriptor
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def infer!
|
40
|
+
type_matches = []
|
41
|
+
@rows.each_with_index do |row, i|
|
42
|
+
break if @row_limit && i > @row_limit
|
43
|
+
row = row.fields if row.class == CSV::Row
|
44
|
+
|
45
|
+
row_length = row.count
|
46
|
+
headers_length = @headers.count
|
47
|
+
|
48
|
+
if row_length > headers_length
|
49
|
+
row = row[0..headers_length]
|
50
|
+
elsif row_length < headers_length
|
51
|
+
diff = headers_length - row_length
|
52
|
+
fill = [''] * diff
|
53
|
+
row = row.push(fill).flatten
|
54
|
+
end
|
55
|
+
|
56
|
+
row.each_with_index do |col, i|
|
57
|
+
type_matches[i] ||= []
|
58
|
+
type_matches[i] << guess_type(col, i)
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
resolve_types(type_matches)
|
63
|
+
@schema = TableSchema::Schema.new(@schema)
|
64
|
+
end
|
65
|
+
|
66
|
+
def guess_type(col, index)
|
67
|
+
guessed_type = 'string'
|
68
|
+
guessed_format = 'default'
|
69
|
+
|
70
|
+
available_types.reverse_each do |type|
|
71
|
+
klass = get_class_for_type(type)
|
72
|
+
converter = Kernel.const_get(klass).new(@schema['fields'][index])
|
73
|
+
if converter.test(col) === true
|
74
|
+
guessed_type = type
|
75
|
+
guessed_format = guess_format(converter, col)
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
{
|
81
|
+
'type' => guessed_type,
|
82
|
+
'format' => guessed_format
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
def guess_format(converter, col)
|
87
|
+
guessed_format = 'default'
|
88
|
+
converter.class.instance_methods.grep(/cast_/).each do |method|
|
89
|
+
begin
|
90
|
+
format = method.to_s
|
91
|
+
format.slice!('cast_')
|
92
|
+
next if format == 'default'
|
93
|
+
converter.send(method, col)
|
94
|
+
guessed_format = format
|
95
|
+
break
|
96
|
+
rescue TableSchema::Exception
|
97
|
+
end
|
98
|
+
end
|
99
|
+
guessed_format
|
100
|
+
end
|
101
|
+
|
102
|
+
def resolve_types(results)
|
103
|
+
results.each_with_index do |result,v|
|
104
|
+
result.uniq!
|
105
|
+
|
106
|
+
if result.count == 1
|
107
|
+
rv = result[0]
|
108
|
+
else
|
109
|
+
counts = {}
|
110
|
+
result.each do |r|
|
111
|
+
counts[r] ||= 0
|
112
|
+
counts[r] += 1
|
113
|
+
end
|
114
|
+
|
115
|
+
sorted_counts = counts.sort_by {|_key, value| value}
|
116
|
+
rv = sorted_counts[0][0]
|
117
|
+
end
|
118
|
+
|
119
|
+
@schema['fields'][v].merge!(rv)
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
def available_types
|
125
|
+
[
|
126
|
+
'any',
|
127
|
+
'string',
|
128
|
+
'boolean',
|
129
|
+
'number',
|
130
|
+
'integer',
|
131
|
+
'null',
|
132
|
+
'date',
|
133
|
+
'time',
|
134
|
+
'datetime',
|
135
|
+
'array',
|
136
|
+
'object',
|
137
|
+
'geopoint',
|
138
|
+
'geojson'
|
139
|
+
]
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Model
|
3
|
+
|
4
|
+
DEFAULTS = {
|
5
|
+
'format' => 'default',
|
6
|
+
'type' => 'string'
|
7
|
+
}
|
8
|
+
|
9
|
+
def headers
|
10
|
+
fields.map { |f| transform(f['name']) }
|
11
|
+
rescue NoMethodError
|
12
|
+
[]
|
13
|
+
end
|
14
|
+
|
15
|
+
def fields
|
16
|
+
self['fields']
|
17
|
+
end
|
18
|
+
|
19
|
+
def primary_keys
|
20
|
+
[self['primaryKey']].flatten.reject { |k| k.nil? }
|
21
|
+
end
|
22
|
+
|
23
|
+
def foreign_keys
|
24
|
+
self['foreignKeys'] || []
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_type(key)
|
28
|
+
get_field(key)['type']
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_constraints(key)
|
32
|
+
get_field(key)['constraints'] || {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def required_headers
|
36
|
+
fields.select { |f| f['constraints']!= nil && f['constraints']['required'] == true }
|
37
|
+
.map { |f| transform(f['name']) }
|
38
|
+
rescue NoMethodError
|
39
|
+
[]
|
40
|
+
end
|
41
|
+
|
42
|
+
def has_field?(key)
|
43
|
+
get_field(key) != nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_field(key)
|
47
|
+
fields.find { |f| f['name'] == key }
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_fields_by_type(type)
|
51
|
+
fields.select { |f| f['type'] == type }
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def transform(name)
|
57
|
+
name.downcase! if @opts[:case_insensitive_headers]
|
58
|
+
name
|
59
|
+
end
|
60
|
+
|
61
|
+
def expand!
|
62
|
+
(self['fields'] || []).each do |f|
|
63
|
+
f['type'] = DEFAULTS['type'] if f['type'] == nil
|
64
|
+
f['format'] = DEFAULTS['format'] if f['format'] == nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def load_fields!
|
69
|
+
self['fields'] = (self['fields'] || []).map { |f| TableSchema::Field.new(f) }
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|