tableschema 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +21 -0
- data/.travis.yml +15 -1
- data/README.md +164 -129
- data/Rakefile +10 -1
- data/bin/console +2 -6
- data/{etc/schemas → lib/profiles}/geojson.json +0 -1
- data/lib/profiles/table-schema.json +1625 -0
- data/lib/profiles/topojson.json +311 -0
- data/lib/tableschema.rb +5 -3
- data/lib/tableschema/constraints/constraints.rb +12 -24
- data/lib/tableschema/constraints/enum.rb +6 -2
- data/lib/tableschema/constraints/max_length.rb +6 -2
- data/lib/tableschema/constraints/maximum.rb +12 -2
- data/lib/tableschema/constraints/min_length.rb +6 -2
- data/lib/tableschema/constraints/minimum.rb +12 -2
- data/lib/tableschema/constraints/pattern.rb +9 -2
- data/lib/tableschema/constraints/required.rb +6 -15
- data/lib/tableschema/constraints/unique.rb +12 -0
- data/lib/tableschema/defaults.rb +9 -0
- data/lib/tableschema/exceptions.rb +15 -2
- data/lib/tableschema/field.rb +39 -20
- data/lib/tableschema/helpers.rb +32 -15
- data/lib/tableschema/infer.rb +31 -28
- data/lib/tableschema/model.rb +57 -34
- data/lib/tableschema/schema.rb +40 -6
- data/lib/tableschema/table.rb +75 -26
- data/lib/tableschema/types/any.rb +1 -0
- data/lib/tableschema/types/array.rb +2 -1
- data/lib/tableschema/types/base.rb +9 -21
- data/lib/tableschema/types/date.rb +1 -0
- data/lib/tableschema/types/datetime.rb +1 -0
- data/lib/tableschema/types/duration.rb +31 -0
- data/lib/tableschema/types/geojson.rb +27 -5
- data/lib/tableschema/types/geopoint.rb +4 -3
- data/lib/tableschema/types/integer.rb +1 -0
- data/lib/tableschema/types/number.rb +40 -25
- data/lib/tableschema/types/object.rb +2 -1
- data/lib/tableschema/types/string.rb +8 -0
- data/lib/tableschema/types/time.rb +1 -0
- data/lib/tableschema/types/year.rb +34 -0
- data/lib/tableschema/types/yearmonth.rb +52 -0
- data/lib/tableschema/validate.rb +45 -29
- data/lib/tableschema/version.rb +1 -1
- data/tableschema.gemspec +2 -1
- metadata +31 -12
- data/etc/schemas/json-table-schema.json +0 -102
- data/lib/tableschema/data.rb +0 -60
- data/lib/tableschema/types/null.rb +0 -37
@@ -3,12 +3,22 @@ module TableSchema
|
|
3
3
|
module Maximum
|
4
4
|
|
5
5
|
def check_maximum
|
6
|
-
if @
|
7
|
-
|
6
|
+
if @field.type == 'yearmonth'
|
7
|
+
valid = Date.new(@value[:year], @value[:month]) <= Date.new(parsed_maximum[:year], parsed_maximum[:month])
|
8
|
+
else
|
9
|
+
valid = @value <= parsed_maximum
|
10
|
+
end
|
11
|
+
|
12
|
+
unless valid
|
13
|
+
raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must not be more than #{@constraints[:maximum]}")
|
8
14
|
end
|
9
15
|
true
|
10
16
|
end
|
11
17
|
|
18
|
+
def parsed_maximum
|
19
|
+
@field.cast_type(@constraints[:maximum])
|
20
|
+
end
|
21
|
+
|
12
22
|
end
|
13
23
|
end
|
14
24
|
end
|
@@ -4,12 +4,16 @@ module TableSchema
|
|
4
4
|
|
5
5
|
def check_min_length
|
6
6
|
return if @value.nil?
|
7
|
-
if @value.length <
|
8
|
-
raise TableSchema::ConstraintError.new("The field `#{@field[
|
7
|
+
if @value.length < parsed_min_length
|
8
|
+
raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must have a minimum length of #{@constraints[:minLength]}")
|
9
9
|
end
|
10
10
|
true
|
11
11
|
end
|
12
12
|
|
13
|
+
def parsed_min_length
|
14
|
+
@constraints[:minLength].to_i
|
15
|
+
end
|
16
|
+
|
13
17
|
end
|
14
18
|
end
|
15
19
|
end
|
@@ -3,12 +3,22 @@ module TableSchema
|
|
3
3
|
module Minimum
|
4
4
|
|
5
5
|
def check_minimum
|
6
|
-
if @
|
7
|
-
|
6
|
+
if @field.type == 'yearmonth'
|
7
|
+
valid = Date.new(@value[:year], @value[:month]) >= Date.new(parsed_minimum[:year], parsed_minimum[:month])
|
8
|
+
else
|
9
|
+
valid = @value >= parsed_minimum
|
10
|
+
end
|
11
|
+
|
12
|
+
unless valid
|
13
|
+
raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must not be less than #{@constraints[:minimum]}")
|
8
14
|
end
|
9
15
|
true
|
10
16
|
end
|
11
17
|
|
18
|
+
def parsed_minimum
|
19
|
+
@field.cast_type(@constraints[:minimum])
|
20
|
+
end
|
21
|
+
|
12
22
|
end
|
13
23
|
end
|
14
24
|
end
|
@@ -3,8 +3,15 @@ module TableSchema
|
|
3
3
|
module Pattern
|
4
4
|
|
5
5
|
def check_pattern
|
6
|
-
|
7
|
-
|
6
|
+
constraint = lambda { |value| value.match(/#{@constraints[:pattern]}/) }
|
7
|
+
if @field.type == 'yearmonth'
|
8
|
+
valid = constraint.call(Date.new(@value[:year], @value[:month]).strftime('%Y-%m'))
|
9
|
+
else
|
10
|
+
valid = constraint.call(@value.to_json)
|
11
|
+
end
|
12
|
+
|
13
|
+
unless valid
|
14
|
+
raise TableSchema::ConstraintError.new("The value for the field `#{@field[:name]}` must match the pattern")
|
8
15
|
end
|
9
16
|
true
|
10
17
|
end
|
@@ -3,30 +3,21 @@ module TableSchema
|
|
3
3
|
module Required
|
4
4
|
|
5
5
|
def check_required
|
6
|
-
if
|
7
|
-
raise TableSchema::ConstraintError.new("The field `#{@field[
|
6
|
+
if parsed_required == true && value_is_empty?
|
7
|
+
raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` requires a value")
|
8
8
|
end
|
9
9
|
true
|
10
10
|
end
|
11
11
|
|
12
12
|
private
|
13
13
|
|
14
|
-
def
|
15
|
-
|
14
|
+
def value_is_empty?
|
15
|
+
@value.nil? || @value == ''
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
19
|
-
|
18
|
+
def parsed_required
|
19
|
+
@constraints[:required].to_s == 'true'
|
20
20
|
end
|
21
|
-
|
22
|
-
def required
|
23
|
-
@constraints['required'].to_s == 'true'
|
24
|
-
end
|
25
|
-
|
26
|
-
def null_values
|
27
|
-
['null', 'none', 'nil', 'nan', '-', '']
|
28
|
-
end
|
29
|
-
|
30
21
|
end
|
31
22
|
end
|
32
23
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module TableSchema
|
2
2
|
class Exception < ::Exception ; end
|
3
|
-
|
3
|
+
|
4
4
|
class SchemaException < Exception
|
5
5
|
attr_reader :message
|
6
6
|
|
@@ -9,20 +9,33 @@ module TableSchema
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
+
class MultipleInvalid < Exception
|
13
|
+
attr_reader :message, :errors
|
14
|
+
|
15
|
+
def initialize(message, errors=[])
|
16
|
+
@message = message
|
17
|
+
@errors = errors
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
12
21
|
class InvalidFormat < Exception ; end
|
13
22
|
class InvalidCast < Exception ; end
|
14
23
|
class InvalidEmail < Exception ; end
|
15
24
|
class InvalidURI < Exception ; end
|
16
25
|
class InvalidUUID < Exception ; end
|
26
|
+
class InvalidBinary < Exception ; end
|
17
27
|
class InvalidObjectType < Exception ; end
|
18
28
|
class InvalidArrayType < Exception ; end
|
19
29
|
class InvalidDateType < Exception ; end
|
20
30
|
class InvalidTimeType < Exception ; end
|
21
31
|
class InvalidDateTimeType < Exception ; end
|
32
|
+
class InvalidYearType < Exception; end
|
33
|
+
class InvalidYearMonthType < Exception; end
|
34
|
+
class InvalidDurationType < Exception; end
|
22
35
|
class InvalidGeoJSONType < Exception ; end
|
36
|
+
class InvalidTopoJSONType < Exception ; end
|
23
37
|
class InvalidGeoPointType < Exception ; end
|
24
38
|
class ConstraintError < Exception ; end
|
25
39
|
class ConstraintNotSupported < Exception ; end
|
26
40
|
class ConversionError < Exception ; end
|
27
|
-
class MultipleInvalid < Exception ; end
|
28
41
|
end
|
data/lib/tableschema/field.rb
CHANGED
@@ -1,40 +1,59 @@
|
|
1
|
+
require 'tableschema/defaults'
|
2
|
+
|
1
3
|
module TableSchema
|
2
4
|
class Field < Hash
|
3
5
|
include TableSchema::Helpers
|
4
6
|
|
5
|
-
attr_reader :
|
6
|
-
|
7
|
-
def initialize(descriptor)
|
8
|
-
self.merge! descriptor
|
9
|
-
@type_class = get_type
|
10
|
-
end
|
7
|
+
attr_reader :name, :type, :format, :missing_values, :constraints
|
11
8
|
|
12
|
-
def
|
13
|
-
self
|
9
|
+
def initialize(descriptor, missing_values=nil)
|
10
|
+
self.merge! deep_symbolize_keys(descriptor)
|
11
|
+
@name = self[:name]
|
12
|
+
@type = self[:type] = self.fetch(:type, TableSchema::DEFAULTS[:type])
|
13
|
+
@format = self[:format] = self.fetch(:format, TableSchema::DEFAULTS[:format])
|
14
|
+
@constraints = self[:constraints] = self.fetch(:constraints, {})
|
15
|
+
@missing_values = missing_values || default_missing_values
|
14
16
|
end
|
15
17
|
|
16
|
-
def
|
17
|
-
self
|
18
|
+
def descriptor
|
19
|
+
self.to_h
|
18
20
|
end
|
19
21
|
|
20
|
-
def
|
21
|
-
|
22
|
+
def cast_value(value, check_constraints: true)
|
23
|
+
cast_value = cast_type(value)
|
24
|
+
return cast_value if check_constraints == false
|
25
|
+
TableSchema::Constraints.new(self, cast_value).validate!
|
26
|
+
cast_value
|
22
27
|
end
|
23
28
|
|
24
|
-
def
|
25
|
-
|
29
|
+
def test_value(value, check_constraints: true)
|
30
|
+
cast_value(value, check_constraints: check_constraints)
|
31
|
+
true
|
32
|
+
rescue TableSchema::Exception
|
33
|
+
false
|
26
34
|
end
|
27
35
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
36
|
+
def cast_type(value)
|
37
|
+
if is_null?(value)
|
38
|
+
nil
|
39
|
+
else
|
40
|
+
type_class.new(self).cast(value)
|
41
|
+
end
|
32
42
|
end
|
33
43
|
|
34
44
|
private
|
35
45
|
|
36
|
-
def
|
37
|
-
|
46
|
+
def default_missing_values
|
47
|
+
defaults = TableSchema::DEFAULTS[:missing_values]
|
48
|
+
@type == 'string' ? defaults - [''] : defaults
|
49
|
+
end
|
50
|
+
|
51
|
+
def type_class
|
52
|
+
Object.const_get get_class_for_type(@type)
|
53
|
+
end
|
54
|
+
|
55
|
+
def is_null?(value)
|
56
|
+
@missing_values.include?(value)
|
38
57
|
end
|
39
58
|
|
40
59
|
end
|
data/lib/tableschema/helpers.rb
CHANGED
@@ -1,6 +1,21 @@
|
|
1
1
|
module TableSchema
|
2
2
|
module Helpers
|
3
3
|
|
4
|
+
def deep_symbolize_keys(descriptor)
|
5
|
+
case descriptor
|
6
|
+
when Hash
|
7
|
+
descriptor.inject({}) do |new_descriptor, (key, val)|
|
8
|
+
key_sym = key.respond_to?(:to_sym) ? key.to_sym : key
|
9
|
+
new_descriptor[key_sym] = deep_symbolize_keys(val)
|
10
|
+
new_descriptor
|
11
|
+
end
|
12
|
+
when Enumerable
|
13
|
+
descriptor.map{ |el| deep_symbolize_keys(el)}
|
14
|
+
else
|
15
|
+
descriptor
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
4
19
|
def convert_to_boolean(value)
|
5
20
|
if value.is_a?(Boolean)
|
6
21
|
return value
|
@@ -22,25 +37,27 @@ module TableSchema
|
|
22
37
|
end
|
23
38
|
|
24
39
|
def get_class_for_type(type)
|
25
|
-
"TableSchema::Types::#{type_class_lookup[type] || 'String'}"
|
40
|
+
"TableSchema::Types::#{type_class_lookup[type.to_sym] || 'String'}"
|
26
41
|
end
|
27
42
|
|
28
43
|
def type_class_lookup
|
29
44
|
{
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
45
|
+
any: 'Any',
|
46
|
+
array: 'Array',
|
47
|
+
base: 'Base',
|
48
|
+
boolean: 'Boolean',
|
49
|
+
date: 'Date',
|
50
|
+
datetime: 'DateTime',
|
51
|
+
geojson: 'GeoJSON',
|
52
|
+
geopoint: 'GeoPoint',
|
53
|
+
integer: 'Integer',
|
54
|
+
number: 'Number',
|
55
|
+
object: 'Object',
|
56
|
+
string: 'String',
|
57
|
+
time: 'Time',
|
58
|
+
year: 'Year',
|
59
|
+
yearmonth: 'YearMonth',
|
60
|
+
duration: 'Duration',
|
44
61
|
}
|
45
62
|
end
|
46
63
|
|
data/lib/tableschema/infer.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'tableschema/defaults'
|
2
|
+
require 'tableschema/field'
|
3
|
+
|
1
4
|
module TableSchema
|
2
5
|
class Infer
|
3
6
|
|
@@ -5,41 +8,41 @@ module TableSchema
|
|
5
8
|
|
6
9
|
attr_reader :schema
|
7
10
|
|
8
|
-
def initialize(headers, rows,
|
11
|
+
def initialize(headers, rows, explicit: false, primary_key: nil, row_limit: nil)
|
9
12
|
@headers = headers
|
10
13
|
@rows = rows
|
11
|
-
@explicit =
|
12
|
-
@primary_key =
|
13
|
-
@row_limit =
|
14
|
+
@explicit = explicit
|
15
|
+
@primary_key = primary_key
|
16
|
+
@row_limit = row_limit
|
14
17
|
|
15
18
|
@schema = {
|
16
|
-
|
19
|
+
fields: fields
|
17
20
|
}
|
18
|
-
@schema[
|
21
|
+
@schema[:primaryKey] = @primary_key if @primary_key
|
19
22
|
infer!
|
20
23
|
end
|
21
24
|
|
22
25
|
def fields
|
23
26
|
@headers.map do |header|
|
24
27
|
descriptor = {
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
+
name: header,
|
29
|
+
title: '',
|
30
|
+
description: '',
|
28
31
|
}
|
29
32
|
|
30
33
|
constraints = {}
|
31
|
-
constraints[
|
32
|
-
constraints[
|
33
|
-
constraints.delete_if { |
|
34
|
-
descriptor[
|
35
|
-
descriptor
|
34
|
+
constraints[:required] = @explicit === true
|
35
|
+
constraints[:unique] = (header == @primary_key)
|
36
|
+
constraints.delete_if { |_,v| v == false } unless @explicit === true
|
37
|
+
descriptor[:constraints] = constraints if constraints.count > 0
|
38
|
+
TableSchema::Field.new(descriptor)
|
36
39
|
end
|
37
40
|
end
|
38
41
|
|
39
42
|
def infer!
|
40
43
|
type_matches = []
|
41
|
-
@rows.each_with_index do |row,
|
42
|
-
break if @row_limit &&
|
44
|
+
@rows.each_with_index do |row, index|
|
45
|
+
break if @row_limit && index > @row_limit
|
43
46
|
row = row.fields if row.class == CSV::Row
|
44
47
|
|
45
48
|
row_length = row.count
|
@@ -53,9 +56,9 @@ module TableSchema
|
|
53
56
|
row = row.push(fill).flatten
|
54
57
|
end
|
55
58
|
|
56
|
-
row.each_with_index do |col,
|
57
|
-
type_matches[
|
58
|
-
type_matches[
|
59
|
+
row.each_with_index do |col, idx|
|
60
|
+
type_matches[idx] ||= []
|
61
|
+
type_matches[idx] << guess_type(col, idx)
|
59
62
|
end
|
60
63
|
|
61
64
|
end
|
@@ -64,12 +67,12 @@ module TableSchema
|
|
64
67
|
end
|
65
68
|
|
66
69
|
def guess_type(col, index)
|
67
|
-
guessed_type =
|
68
|
-
guessed_format =
|
70
|
+
guessed_type = TableSchema::DEFAULTS[:type]
|
71
|
+
guessed_format = TableSchema::DEFAULTS[:format]
|
69
72
|
|
70
73
|
available_types.reverse_each do |type|
|
71
74
|
klass = get_class_for_type(type)
|
72
|
-
converter = Kernel.const_get(klass).new(@schema[
|
75
|
+
converter = Kernel.const_get(klass).new(@schema[:fields][index])
|
73
76
|
if converter.test(col) === true
|
74
77
|
guessed_type = type
|
75
78
|
guessed_format = guess_format(converter, col)
|
@@ -78,22 +81,23 @@ module TableSchema
|
|
78
81
|
end
|
79
82
|
|
80
83
|
{
|
81
|
-
|
82
|
-
|
84
|
+
type: guessed_type,
|
85
|
+
format: guessed_format
|
83
86
|
}
|
84
87
|
end
|
85
88
|
|
86
89
|
def guess_format(converter, col)
|
87
|
-
guessed_format =
|
90
|
+
guessed_format = TableSchema::DEFAULTS[:format]
|
88
91
|
converter.class.instance_methods.grep(/cast_/).each do |method|
|
89
92
|
begin
|
90
93
|
format = method.to_s
|
91
94
|
format.slice!('cast_')
|
92
|
-
next if format ==
|
95
|
+
next if format == TableSchema::DEFAULTS[:format]
|
93
96
|
converter.send(method, col)
|
94
97
|
guessed_format = format
|
95
98
|
break
|
96
99
|
rescue TableSchema::Exception
|
100
|
+
next
|
97
101
|
end
|
98
102
|
end
|
99
103
|
guessed_format
|
@@ -116,7 +120,7 @@ module TableSchema
|
|
116
120
|
rv = sorted_counts[0][0]
|
117
121
|
end
|
118
122
|
|
119
|
-
@schema[
|
123
|
+
@schema[:fields][v].merge!(rv)
|
120
124
|
end
|
121
125
|
|
122
126
|
end
|
@@ -128,7 +132,6 @@ module TableSchema
|
|
128
132
|
'boolean',
|
129
133
|
'number',
|
130
134
|
'integer',
|
131
|
-
'null',
|
132
135
|
'date',
|
133
136
|
'time',
|
134
137
|
'datetime',
|