tableschema 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/CHANGELOG.md +31 -0
  6. data/CODE_OF_CONDUCT.md +49 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +274 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/etc/schemas/geojson.json +209 -0
  14. data/etc/schemas/json-table-schema.json +102 -0
  15. data/lib/tableschema.rb +42 -0
  16. data/lib/tableschema/constraints/constraints.rb +76 -0
  17. data/lib/tableschema/constraints/enum.rb +14 -0
  18. data/lib/tableschema/constraints/max_length.rb +15 -0
  19. data/lib/tableschema/constraints/maximum.rb +14 -0
  20. data/lib/tableschema/constraints/min_length.rb +15 -0
  21. data/lib/tableschema/constraints/minimum.rb +14 -0
  22. data/lib/tableschema/constraints/pattern.rb +14 -0
  23. data/lib/tableschema/constraints/required.rb +32 -0
  24. data/lib/tableschema/data.rb +60 -0
  25. data/lib/tableschema/exceptions.rb +28 -0
  26. data/lib/tableschema/field.rb +41 -0
  27. data/lib/tableschema/helpers.rb +48 -0
  28. data/lib/tableschema/infer.rb +143 -0
  29. data/lib/tableschema/model.rb +73 -0
  30. data/lib/tableschema/schema.rb +36 -0
  31. data/lib/tableschema/table.rb +51 -0
  32. data/lib/tableschema/types/any.rb +23 -0
  33. data/lib/tableschema/types/array.rb +37 -0
  34. data/lib/tableschema/types/base.rb +54 -0
  35. data/lib/tableschema/types/boolean.rb +35 -0
  36. data/lib/tableschema/types/date.rb +56 -0
  37. data/lib/tableschema/types/datetime.rb +63 -0
  38. data/lib/tableschema/types/geojson.rb +38 -0
  39. data/lib/tableschema/types/geopoint.rb +56 -0
  40. data/lib/tableschema/types/integer.rb +35 -0
  41. data/lib/tableschema/types/null.rb +37 -0
  42. data/lib/tableschema/types/number.rb +60 -0
  43. data/lib/tableschema/types/object.rb +37 -0
  44. data/lib/tableschema/types/string.rb +64 -0
  45. data/lib/tableschema/types/time.rb +55 -0
  46. data/lib/tableschema/validate.rb +54 -0
  47. data/lib/tableschema/version.rb +3 -0
  48. data/tableschema.gemspec +32 -0
  49. metadata +231 -0
@@ -0,0 +1,36 @@
1
+ module TableSchema
2
+ class Schema < Hash
3
+ include TableSchema::Validate
4
+ include TableSchema::Model
5
+ include TableSchema::Data
6
+ include TableSchema::Helpers
7
+
8
+ def initialize(descriptor, opts = {})
9
+ self.merge! parse_schema(descriptor)
10
+ @messages = []
11
+ @opts = opts
12
+ load_fields!
13
+ load_validator!
14
+ expand!
15
+ end
16
+
17
+ def parse_schema(descriptor)
18
+ if descriptor.class == Hash
19
+ descriptor
20
+ elsif descriptor.class == String
21
+ begin
22
+ JSON.parse open(descriptor).read
23
+ rescue Errno::ENOENT
24
+ raise SchemaException.new("File not found at `#{descriptor}`")
25
+ rescue OpenURI::HTTPError => e
26
+ raise SchemaException.new("URL `#{descriptor}` returned #{e.message}")
27
+ rescue JSON::ParserError
28
+ raise SchemaException.new("File at `#{descriptor}` is not valid JSON")
29
+ end
30
+ else
31
+ raise SchemaException.new("A schema must be a hash, path or URL")
32
+ end
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,51 @@
1
+ module TableSchema
2
+ class Table
3
+
4
+ attr_reader :schema
5
+
6
+ def self.infer_schema(csv, opts = {})
7
+ TableSchema::Table.new(csv, nil, opts)
8
+ end
9
+
10
+ def initialize(csv, descriptor, opts = {})
11
+ @opts = opts
12
+ @csv = parse_csv(csv)
13
+ @schema = descriptor.nil? ? infer_schema(@csv) : TableSchema::Schema.new(descriptor)
14
+ end
15
+
16
+ def parse_csv(csv)
17
+ csv = csv.is_a?(Array) ? StringIO.new(array_to_csv csv) : open(csv)
18
+ CSV.new(csv, csv_options)
19
+ end
20
+
21
+ def csv_options
22
+ (@opts[:csv_options] || {}).merge(headers: true)
23
+ end
24
+
25
+ def rows(opts = {})
26
+ fail_fast = opts[:fail_fast] || opts[:fail_fast].nil?
27
+ converted = @schema.cast_rows(@csv, fail_fast, opts[:limit])
28
+ opts[:keyed] ? coverted_to_hash(@csv.headers, converted) : converted
29
+ end
30
+
31
+ private
32
+
33
+ def array_to_csv(array)
34
+ array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
35
+ end
36
+
37
+ def coverted_to_hash(headers, array)
38
+ array.map do |row|
39
+ Hash[row.map.with_index { |col, i| [headers[i], col] }]
40
+ end
41
+ end
42
+
43
+ def infer_schema(csv)
44
+ headers = csv.first.to_h.keys
45
+ csv.rewind
46
+ inferer = TableSchema::Infer.new(headers, csv)
47
+ inferer.schema
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,23 @@
1
+ module TableSchema
2
+ module Types
3
+ class Any < Base
4
+
5
+ def name
6
+ 'any'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum'
14
+ ]
15
+ end
16
+
17
+ def cast_default(value)
18
+ value
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,37 @@
1
+ module TableSchema
2
+ module Types
3
+ class Array < Base
4
+
5
+ def name
6
+ 'array'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum',
14
+ 'minLength',
15
+ 'maxLength',
16
+ ]
17
+ end
18
+
19
+ def type
20
+ ::Array
21
+ end
22
+
23
+ def cast_default(value)
24
+ return value if value.is_a?(type)
25
+ parsed = JSON.parse(value)
26
+ if parsed.is_a?(type)
27
+ return parsed
28
+ else
29
+ raise TableSchema::InvalidArrayType.new("#{value} is not a valid array")
30
+ end
31
+ rescue
32
+ raise TableSchema::InvalidArrayType.new("#{value} is not a valid array")
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,54 @@
1
+ module TableSchema
2
+ module Types
3
+ class Base
4
+ include TableSchema::Helpers
5
+
6
+
7
+ def initialize(field)
8
+ @field = field
9
+ @constraints = field['constraints'] || {}
10
+ @required = ['true', true].include?(@constraints['required'])
11
+ @type = @field['type']
12
+ set_format
13
+ end
14
+
15
+ def cast(value, skip_constraints = false)
16
+ TableSchema::Constraints.new(@field, value).validate! unless skip_constraints
17
+ return nil if is_null?(value)
18
+ send("cast_#{@format}", value)
19
+ rescue NoMethodError => e
20
+ if e.message.start_with?('undefined method `cast_')
21
+ raise(TableSchema::InvalidFormat.new("The format `#{@format}` is not supported by the type `#{@type}`"))
22
+ else
23
+ raise e
24
+ end
25
+ end
26
+
27
+ def test(value)
28
+ cast(value, true)
29
+ true
30
+ rescue TableSchema::Exception
31
+ false
32
+ end
33
+
34
+ def set_format
35
+ if (@field['format'] || '').start_with?('fmt:')
36
+ @format, @format_string = *@field['format'].split(':', 2)
37
+ else
38
+ @format = @field['format'] || 'default'
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def is_null?(value)
45
+ null_values.include?(value) && @required == false
46
+ end
47
+
48
+ def null_values
49
+ ['null', 'none', 'nil', 'nan', '-', '']
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,35 @@
1
+ # Hack to check against one type from http://stackoverflow.com/a/3028378/452684
2
+ # because Ruby doesn't have a single boolean class
3
+ module Boolean; end
4
+ class TrueClass; include Boolean; end
5
+ class FalseClass; include Boolean; end
6
+
7
+ module TableSchema
8
+ module Types
9
+ class Boolean < Base
10
+
11
+ def name
12
+ 'boolean'
13
+ end
14
+
15
+ def self.supported_constraints
16
+ [
17
+ 'required',
18
+ 'pattern',
19
+ 'enum',
20
+ ]
21
+ end
22
+
23
+ def type
24
+ ::Boolean
25
+ end
26
+
27
+ def cast_default(value)
28
+ value = convert_to_boolean(value)
29
+ raise TableSchema::InvalidCast.new("#{value} is not a #{name}") if value.nil?
30
+ value
31
+ end
32
+
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,56 @@
1
+ module TableSchema
2
+ module Types
3
+ class Date < Base
4
+
5
+ def name
6
+ 'date'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum',
14
+ 'minimum',
15
+ 'maximum',
16
+ ]
17
+ end
18
+
19
+ def type
20
+ ::Date
21
+ end
22
+
23
+ def iso8601
24
+ '%Y-%m-%d'
25
+ end
26
+
27
+ def cast_default(value)
28
+ @format_string = iso8601
29
+ cast_fmt(value)
30
+ end
31
+
32
+ def cast_any(value)
33
+ return value if value.is_a?(type)
34
+
35
+ date = ::Date._parse(value)
36
+ if date.values.count == 3
37
+ ::Date.parse(value)
38
+ else
39
+ raise TableSchema::InvalidDateType.new("#{value} is not a valid date")
40
+ end
41
+ end
42
+
43
+ def cast_fmt(value)
44
+ return value if value.is_a?(type)
45
+
46
+ begin
47
+ return ::Date.strptime(value, @format_string)
48
+ rescue ArgumentError
49
+ raise TableSchema::InvalidDateType.new("#{value} is not a valid date")
50
+ end
51
+ end
52
+
53
+
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,63 @@
1
+ module TableSchema
2
+ module Types
3
+ class DateTime < Base
4
+
5
+ def name
6
+ 'datetime'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum',
14
+ 'minimum',
15
+ 'maximum'
16
+ ]
17
+ end
18
+
19
+ def type
20
+ ::DateTime
21
+ end
22
+
23
+ def iso8601
24
+ '%Y-%m-%dT%H:%M:%SZ'
25
+ end
26
+
27
+ # raw_formats = ['DD/MM/YYYYThh/mm/ss']
28
+ # py_formats = ['%Y/%m/%dT%H:%M:%S']
29
+ # format_map = dict(zip(raw_formats, py_formats))
30
+
31
+ def cast_default(value)
32
+ @format_string = iso8601
33
+ cast_fmt(value)
34
+ end
35
+
36
+ def cast_any(value)
37
+ return value if value.is_a?(type)
38
+
39
+ begin
40
+ date = ::DateTime._parse(value)
41
+ if date.values.count >= 4
42
+ ::DateTime.parse(value)
43
+ else
44
+ raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid datetime")
45
+ end
46
+ rescue ArgumentError
47
+ raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid datetime")
48
+ end
49
+ end
50
+
51
+ def cast_fmt(value)
52
+ return value if value.is_a?(type)
53
+
54
+ begin
55
+ return ::DateTime.strptime(value, @format_string)
56
+ rescue ArgumentError
57
+ raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid date")
58
+ end
59
+ end
60
+
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,38 @@
1
+ module TableSchema
2
+ module Types
3
+ class GeoJSON < Base
4
+
5
+ def name
6
+ 'geojson'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum'
14
+ ]
15
+ end
16
+
17
+ def type
18
+ ::Hash
19
+ end
20
+
21
+ def cast_default(value)
22
+ value = JSON.parse(value) if !value.is_a?(type)
23
+ JSON::Validator.validate!(geojson_schema, value)
24
+ value
25
+ rescue JSON::Schema::ValidationError, JSON::ParserError
26
+ raise TableSchema::InvalidGeoJSONType.new("#{value} is not valid GeoJSON")
27
+ end
28
+
29
+ private
30
+
31
+ def geojson_schema
32
+ path = File.join( File.dirname(__FILE__), "..", "..", "..", "etc", "schemas", "geojson.json" )
33
+ @geojson_schema ||= JSON.parse File.read(path)
34
+ end
35
+
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,56 @@
1
+ module TableSchema
2
+ module Types
3
+ class GeoPoint < Base
4
+
5
+ def name
6
+ 'geopoint'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum'
14
+ ]
15
+ end
16
+
17
+ def types
18
+ [::String, ::Array, ::Hash]
19
+ end
20
+
21
+ def cast_default(value)
22
+ latlng = value.split(',', 2)
23
+ cast_array([latlng[0], latlng[1]])
24
+ end
25
+
26
+ def cast_object(value)
27
+ value = JSON.parse(value) if value.is_a?(::String)
28
+ cast_array([value['longitude'], value['latitude']])
29
+ rescue JSON::ParserError
30
+ raise TableSchema::InvalidGeoPointType.new("#{value} is not a valid geopoint")
31
+ end
32
+
33
+ def cast_array(value)
34
+ value = JSON.parse(value) if value.is_a?(::String)
35
+ value = [Float(value[0]), Float(value[1])]
36
+ check_latlng_range(value)
37
+ value
38
+ rescue JSON::ParserError, ArgumentError, TypeError
39
+ raise TableSchema::InvalidGeoPointType.new("#{value} is not a valid geopoint")
40
+ end
41
+
42
+ private
43
+
44
+ def check_latlng_range(geopoint)
45
+ longitude = geopoint[0]
46
+ latitude = geopoint[1]
47
+ if longitude >= 180 or longitude <= -180
48
+ raise TableSchema::InvalidGeoPointType.new("longtitude should be between -180 and 180, found `#{longitude}`")
49
+ elsif latitude >= 90 or latitude <= -90
50
+ raise TableSchema::InvalidGeoPointType.new("longtitude should be between -90 and 90, found `#{latitude}`")
51
+ end
52
+ end
53
+
54
+ end
55
+ end
56
+ end