tableschema 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +14 -0
- data/CHANGELOG.md +31 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +274 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/etc/schemas/geojson.json +209 -0
- data/etc/schemas/json-table-schema.json +102 -0
- data/lib/tableschema.rb +42 -0
- data/lib/tableschema/constraints/constraints.rb +76 -0
- data/lib/tableschema/constraints/enum.rb +14 -0
- data/lib/tableschema/constraints/max_length.rb +15 -0
- data/lib/tableschema/constraints/maximum.rb +14 -0
- data/lib/tableschema/constraints/min_length.rb +15 -0
- data/lib/tableschema/constraints/minimum.rb +14 -0
- data/lib/tableschema/constraints/pattern.rb +14 -0
- data/lib/tableschema/constraints/required.rb +32 -0
- data/lib/tableschema/data.rb +60 -0
- data/lib/tableschema/exceptions.rb +28 -0
- data/lib/tableschema/field.rb +41 -0
- data/lib/tableschema/helpers.rb +48 -0
- data/lib/tableschema/infer.rb +143 -0
- data/lib/tableschema/model.rb +73 -0
- data/lib/tableschema/schema.rb +36 -0
- data/lib/tableschema/table.rb +51 -0
- data/lib/tableschema/types/any.rb +23 -0
- data/lib/tableschema/types/array.rb +37 -0
- data/lib/tableschema/types/base.rb +54 -0
- data/lib/tableschema/types/boolean.rb +35 -0
- data/lib/tableschema/types/date.rb +56 -0
- data/lib/tableschema/types/datetime.rb +63 -0
- data/lib/tableschema/types/geojson.rb +38 -0
- data/lib/tableschema/types/geopoint.rb +56 -0
- data/lib/tableschema/types/integer.rb +35 -0
- data/lib/tableschema/types/null.rb +37 -0
- data/lib/tableschema/types/number.rb +60 -0
- data/lib/tableschema/types/object.rb +37 -0
- data/lib/tableschema/types/string.rb +64 -0
- data/lib/tableschema/types/time.rb +55 -0
- data/lib/tableschema/validate.rb +54 -0
- data/lib/tableschema/version.rb +3 -0
- data/tableschema.gemspec +32 -0
- metadata +231 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Schema < Hash
|
3
|
+
include TableSchema::Validate
|
4
|
+
include TableSchema::Model
|
5
|
+
include TableSchema::Data
|
6
|
+
include TableSchema::Helpers
|
7
|
+
|
8
|
+
def initialize(descriptor, opts = {})
|
9
|
+
self.merge! parse_schema(descriptor)
|
10
|
+
@messages = []
|
11
|
+
@opts = opts
|
12
|
+
load_fields!
|
13
|
+
load_validator!
|
14
|
+
expand!
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_schema(descriptor)
|
18
|
+
if descriptor.class == Hash
|
19
|
+
descriptor
|
20
|
+
elsif descriptor.class == String
|
21
|
+
begin
|
22
|
+
JSON.parse open(descriptor).read
|
23
|
+
rescue Errno::ENOENT
|
24
|
+
raise SchemaException.new("File not found at `#{descriptor}`")
|
25
|
+
rescue OpenURI::HTTPError => e
|
26
|
+
raise SchemaException.new("URL `#{descriptor}` returned #{e.message}")
|
27
|
+
rescue JSON::ParserError
|
28
|
+
raise SchemaException.new("File at `#{descriptor}` is not valid JSON")
|
29
|
+
end
|
30
|
+
else
|
31
|
+
raise SchemaException.new("A schema must be a hash, path or URL")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Table
|
3
|
+
|
4
|
+
attr_reader :schema
|
5
|
+
|
6
|
+
def self.infer_schema(csv, opts = {})
|
7
|
+
TableSchema::Table.new(csv, nil, opts)
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(csv, descriptor, opts = {})
|
11
|
+
@opts = opts
|
12
|
+
@csv = parse_csv(csv)
|
13
|
+
@schema = descriptor.nil? ? infer_schema(@csv) : TableSchema::Schema.new(descriptor)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_csv(csv)
|
17
|
+
csv = csv.is_a?(Array) ? StringIO.new(array_to_csv csv) : open(csv)
|
18
|
+
CSV.new(csv, csv_options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def csv_options
|
22
|
+
(@opts[:csv_options] || {}).merge(headers: true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def rows(opts = {})
|
26
|
+
fail_fast = opts[:fail_fast] || opts[:fail_fast].nil?
|
27
|
+
converted = @schema.cast_rows(@csv, fail_fast, opts[:limit])
|
28
|
+
opts[:keyed] ? coverted_to_hash(@csv.headers, converted) : converted
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def array_to_csv(array)
|
34
|
+
array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
|
35
|
+
end
|
36
|
+
|
37
|
+
def coverted_to_hash(headers, array)
|
38
|
+
array.map do |row|
|
39
|
+
Hash[row.map.with_index { |col, i| [headers[i], col] }]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def infer_schema(csv)
|
44
|
+
headers = csv.first.to_h.keys
|
45
|
+
csv.rewind
|
46
|
+
inferer = TableSchema::Infer.new(headers, csv)
|
47
|
+
inferer.schema
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class Any < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'any'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
def cast_default(value)
|
18
|
+
value
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class Array < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'array'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum',
|
14
|
+
'minLength',
|
15
|
+
'maxLength',
|
16
|
+
]
|
17
|
+
end
|
18
|
+
|
19
|
+
def type
|
20
|
+
::Array
|
21
|
+
end
|
22
|
+
|
23
|
+
def cast_default(value)
|
24
|
+
return value if value.is_a?(type)
|
25
|
+
parsed = JSON.parse(value)
|
26
|
+
if parsed.is_a?(type)
|
27
|
+
return parsed
|
28
|
+
else
|
29
|
+
raise TableSchema::InvalidArrayType.new("#{value} is not a valid array")
|
30
|
+
end
|
31
|
+
rescue
|
32
|
+
raise TableSchema::InvalidArrayType.new("#{value} is not a valid array")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class Base
|
4
|
+
include TableSchema::Helpers
|
5
|
+
|
6
|
+
|
7
|
+
def initialize(field)
|
8
|
+
@field = field
|
9
|
+
@constraints = field['constraints'] || {}
|
10
|
+
@required = ['true', true].include?(@constraints['required'])
|
11
|
+
@type = @field['type']
|
12
|
+
set_format
|
13
|
+
end
|
14
|
+
|
15
|
+
def cast(value, skip_constraints = false)
|
16
|
+
TableSchema::Constraints.new(@field, value).validate! unless skip_constraints
|
17
|
+
return nil if is_null?(value)
|
18
|
+
send("cast_#{@format}", value)
|
19
|
+
rescue NoMethodError => e
|
20
|
+
if e.message.start_with?('undefined method `cast_')
|
21
|
+
raise(TableSchema::InvalidFormat.new("The format `#{@format}` is not supported by the type `#{@type}`"))
|
22
|
+
else
|
23
|
+
raise e
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test(value)
|
28
|
+
cast(value, true)
|
29
|
+
true
|
30
|
+
rescue TableSchema::Exception
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
def set_format
|
35
|
+
if (@field['format'] || '').start_with?('fmt:')
|
36
|
+
@format, @format_string = *@field['format'].split(':', 2)
|
37
|
+
else
|
38
|
+
@format = @field['format'] || 'default'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def is_null?(value)
|
45
|
+
null_values.include?(value) && @required == false
|
46
|
+
end
|
47
|
+
|
48
|
+
def null_values
|
49
|
+
['null', 'none', 'nil', 'nan', '-', '']
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Hack to check against one type from http://stackoverflow.com/a/3028378/452684
|
2
|
+
# because Ruby doesn't have a single boolean class
|
3
|
+
module Boolean; end
|
4
|
+
class TrueClass; include Boolean; end
|
5
|
+
class FalseClass; include Boolean; end
|
6
|
+
|
7
|
+
module TableSchema
|
8
|
+
module Types
|
9
|
+
class Boolean < Base
|
10
|
+
|
11
|
+
def name
|
12
|
+
'boolean'
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.supported_constraints
|
16
|
+
[
|
17
|
+
'required',
|
18
|
+
'pattern',
|
19
|
+
'enum',
|
20
|
+
]
|
21
|
+
end
|
22
|
+
|
23
|
+
def type
|
24
|
+
::Boolean
|
25
|
+
end
|
26
|
+
|
27
|
+
def cast_default(value)
|
28
|
+
value = convert_to_boolean(value)
|
29
|
+
raise TableSchema::InvalidCast.new("#{value} is not a #{name}") if value.nil?
|
30
|
+
value
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class Date < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'date'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum',
|
14
|
+
'minimum',
|
15
|
+
'maximum',
|
16
|
+
]
|
17
|
+
end
|
18
|
+
|
19
|
+
def type
|
20
|
+
::Date
|
21
|
+
end
|
22
|
+
|
23
|
+
def iso8601
|
24
|
+
'%Y-%m-%d'
|
25
|
+
end
|
26
|
+
|
27
|
+
def cast_default(value)
|
28
|
+
@format_string = iso8601
|
29
|
+
cast_fmt(value)
|
30
|
+
end
|
31
|
+
|
32
|
+
def cast_any(value)
|
33
|
+
return value if value.is_a?(type)
|
34
|
+
|
35
|
+
date = ::Date._parse(value)
|
36
|
+
if date.values.count == 3
|
37
|
+
::Date.parse(value)
|
38
|
+
else
|
39
|
+
raise TableSchema::InvalidDateType.new("#{value} is not a valid date")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def cast_fmt(value)
|
44
|
+
return value if value.is_a?(type)
|
45
|
+
|
46
|
+
begin
|
47
|
+
return ::Date.strptime(value, @format_string)
|
48
|
+
rescue ArgumentError
|
49
|
+
raise TableSchema::InvalidDateType.new("#{value} is not a valid date")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class DateTime < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'datetime'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum',
|
14
|
+
'minimum',
|
15
|
+
'maximum'
|
16
|
+
]
|
17
|
+
end
|
18
|
+
|
19
|
+
def type
|
20
|
+
::DateTime
|
21
|
+
end
|
22
|
+
|
23
|
+
def iso8601
|
24
|
+
'%Y-%m-%dT%H:%M:%SZ'
|
25
|
+
end
|
26
|
+
|
27
|
+
# raw_formats = ['DD/MM/YYYYThh/mm/ss']
|
28
|
+
# py_formats = ['%Y/%m/%dT%H:%M:%S']
|
29
|
+
# format_map = dict(zip(raw_formats, py_formats))
|
30
|
+
|
31
|
+
def cast_default(value)
|
32
|
+
@format_string = iso8601
|
33
|
+
cast_fmt(value)
|
34
|
+
end
|
35
|
+
|
36
|
+
def cast_any(value)
|
37
|
+
return value if value.is_a?(type)
|
38
|
+
|
39
|
+
begin
|
40
|
+
date = ::DateTime._parse(value)
|
41
|
+
if date.values.count >= 4
|
42
|
+
::DateTime.parse(value)
|
43
|
+
else
|
44
|
+
raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid datetime")
|
45
|
+
end
|
46
|
+
rescue ArgumentError
|
47
|
+
raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid datetime")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def cast_fmt(value)
|
52
|
+
return value if value.is_a?(type)
|
53
|
+
|
54
|
+
begin
|
55
|
+
return ::DateTime.strptime(value, @format_string)
|
56
|
+
rescue ArgumentError
|
57
|
+
raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid date")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class GeoJSON < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'geojson'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
def type
|
18
|
+
::Hash
|
19
|
+
end
|
20
|
+
|
21
|
+
def cast_default(value)
|
22
|
+
value = JSON.parse(value) if !value.is_a?(type)
|
23
|
+
JSON::Validator.validate!(geojson_schema, value)
|
24
|
+
value
|
25
|
+
rescue JSON::Schema::ValidationError, JSON::ParserError
|
26
|
+
raise TableSchema::InvalidGeoJSONType.new("#{value} is not valid GeoJSON")
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def geojson_schema
|
32
|
+
path = File.join( File.dirname(__FILE__), "..", "..", "..", "etc", "schemas", "geojson.json" )
|
33
|
+
@geojson_schema ||= JSON.parse File.read(path)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class GeoPoint < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'geopoint'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
def types
|
18
|
+
[::String, ::Array, ::Hash]
|
19
|
+
end
|
20
|
+
|
21
|
+
def cast_default(value)
|
22
|
+
latlng = value.split(',', 2)
|
23
|
+
cast_array([latlng[0], latlng[1]])
|
24
|
+
end
|
25
|
+
|
26
|
+
def cast_object(value)
|
27
|
+
value = JSON.parse(value) if value.is_a?(::String)
|
28
|
+
cast_array([value['longitude'], value['latitude']])
|
29
|
+
rescue JSON::ParserError
|
30
|
+
raise TableSchema::InvalidGeoPointType.new("#{value} is not a valid geopoint")
|
31
|
+
end
|
32
|
+
|
33
|
+
def cast_array(value)
|
34
|
+
value = JSON.parse(value) if value.is_a?(::String)
|
35
|
+
value = [Float(value[0]), Float(value[1])]
|
36
|
+
check_latlng_range(value)
|
37
|
+
value
|
38
|
+
rescue JSON::ParserError, ArgumentError, TypeError
|
39
|
+
raise TableSchema::InvalidGeoPointType.new("#{value} is not a valid geopoint")
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def check_latlng_range(geopoint)
|
45
|
+
longitude = geopoint[0]
|
46
|
+
latitude = geopoint[1]
|
47
|
+
if longitude >= 180 or longitude <= -180
|
48
|
+
raise TableSchema::InvalidGeoPointType.new("longtitude should be between -180 and 180, found `#{longitude}`")
|
49
|
+
elsif latitude >= 90 or latitude <= -90
|
50
|
+
raise TableSchema::InvalidGeoPointType.new("longtitude should be between -90 and 90, found `#{latitude}`")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|