jsontableschema 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG.md +17 -0
  6. data/CODE_OF_CONDUCT.md +49 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +233 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/etc/schemas/geojson.json +209 -0
  14. data/etc/schemas/json-table-schema.json +102 -0
  15. data/jsontableschema.gemspec +32 -0
  16. data/lib/jsontableschema.rb +41 -0
  17. data/lib/jsontableschema/constraints/constraints.rb +76 -0
  18. data/lib/jsontableschema/constraints/enum.rb +14 -0
  19. data/lib/jsontableschema/constraints/max_length.rb +15 -0
  20. data/lib/jsontableschema/constraints/maximum.rb +14 -0
  21. data/lib/jsontableschema/constraints/min_length.rb +15 -0
  22. data/lib/jsontableschema/constraints/minimum.rb +14 -0
  23. data/lib/jsontableschema/constraints/pattern.rb +14 -0
  24. data/lib/jsontableschema/constraints/required.rb +32 -0
  25. data/lib/jsontableschema/data.rb +57 -0
  26. data/lib/jsontableschema/exceptions.rb +28 -0
  27. data/lib/jsontableschema/helpers.rb +48 -0
  28. data/lib/jsontableschema/infer.rb +142 -0
  29. data/lib/jsontableschema/model.rb +73 -0
  30. data/lib/jsontableschema/schema.rb +35 -0
  31. data/lib/jsontableschema/table.rb +50 -0
  32. data/lib/jsontableschema/types/any.rb +23 -0
  33. data/lib/jsontableschema/types/array.rb +37 -0
  34. data/lib/jsontableschema/types/base.rb +54 -0
  35. data/lib/jsontableschema/types/boolean.rb +35 -0
  36. data/lib/jsontableschema/types/date.rb +56 -0
  37. data/lib/jsontableschema/types/datetime.rb +63 -0
  38. data/lib/jsontableschema/types/geojson.rb +38 -0
  39. data/lib/jsontableschema/types/geopoint.rb +56 -0
  40. data/lib/jsontableschema/types/integer.rb +35 -0
  41. data/lib/jsontableschema/types/null.rb +37 -0
  42. data/lib/jsontableschema/types/number.rb +60 -0
  43. data/lib/jsontableschema/types/object.rb +37 -0
  44. data/lib/jsontableschema/types/string.rb +64 -0
  45. data/lib/jsontableschema/types/time.rb +55 -0
  46. data/lib/jsontableschema/validate.rb +54 -0
  47. data/lib/jsontableschema/version.rb +3 -0
  48. metadata +230 -0
@@ -0,0 +1,48 @@
1
+ module JsonTableSchema
2
+ module Helpers
3
+
4
+ def convert_to_boolean(value)
5
+ if value.is_a?(Boolean)
6
+ return value
7
+ elsif true_values.include?(value.to_s.downcase)
8
+ true
9
+ elsif false_values.include?(value.to_s.downcase)
10
+ false
11
+ else
12
+ nil
13
+ end
14
+ end
15
+
16
+ def true_values
17
+ ['yes', 'y', 'true', 't', '1']
18
+ end
19
+
20
+ def false_values
21
+ ['no', 'n', 'false', 'f', '0']
22
+ end
23
+
24
+ def get_class_for_type(type)
25
+ "JsonTableSchema::Types::#{type_class_lookup[type]}"
26
+ end
27
+
28
+ def type_class_lookup
29
+ {
30
+ 'any' => 'Any',
31
+ 'array' => 'Array',
32
+ 'base' => 'Base',
33
+ 'boolean' => 'Boolean',
34
+ 'date' => 'Date',
35
+ 'datetime' => 'DateTime',
36
+ 'geojson' => 'GeoJSON',
37
+ 'geopoint' => 'GeoPoint',
38
+ 'integer' => 'Integer',
39
+ 'null' => 'Null',
40
+ 'number' => 'Number',
41
+ 'object' => 'Object',
42
+ 'string' => 'String',
43
+ 'time' => 'Time',
44
+ }
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,142 @@
1
+ module JsonTableSchema
2
+ class Infer
3
+
4
+ include JsonTableSchema::Helpers
5
+
6
+ attr_reader :schema
7
+
8
+ def initialize(headers, rows, opts = {})
9
+ @headers = headers
10
+ @rows = rows
11
+ @explicit = opts[:explicit]
12
+ @primary_key = opts[:primary_key]
13
+ @row_limit = opts[:row_limit]
14
+
15
+ @schema = {
16
+ 'fields' => fields
17
+ }
18
+ @schema['primaryKey'] = @primary_key if @primary_key
19
+ infer!
20
+ end
21
+
22
+ def fields
23
+ @headers.map do |header|
24
+ descriptor = {
25
+ 'name' => header,
26
+ 'title' => '',
27
+ 'description' => '',
28
+ }
29
+
30
+ constraints = {}
31
+ constraints['required'] = @explicit === true
32
+ constraints['unique'] = (header == @primary_key)
33
+ constraints.delete_if { |k,v| v == false } unless @explicit === true
34
+ descriptor['constraints'] = constraints if constraints.count > 0
35
+ descriptor
36
+ end
37
+ end
38
+
39
+ def infer!
40
+ type_matches = []
41
+ @rows.each_with_index do |row, i|
42
+ break if @row_limit && i > @row_limit
43
+
44
+ row_length = row.count
45
+ headers_length = @headers.count
46
+
47
+ if row_length > headers_length
48
+ row = row[0..headers_length]
49
+ elsif row_length < headers_length
50
+ diff = headers_length - row_length
51
+ fill = [''] * diff
52
+ row = row.push(fill).flatten
53
+ end
54
+
55
+ row.each_with_index do |col, i|
56
+ type_matches[i] ||= []
57
+ type_matches[i] << guess_type(col, i)
58
+ end
59
+
60
+ end
61
+ resolve_types(type_matches)
62
+ @schema = JsonTableSchema::Schema.new(@schema)
63
+ end
64
+
65
+ def guess_type(col, index)
66
+ guessed_type = 'string'
67
+ guessed_format = 'default'
68
+
69
+ available_types.reverse_each do |type|
70
+ klass = get_class_for_type(type)
71
+ converter = Kernel.const_get(klass).new(@schema['fields'][index])
72
+ if converter.test(col) === true
73
+ guessed_type = type
74
+ guessed_format = guess_format(converter, col)
75
+ break
76
+ end
77
+ end
78
+
79
+ {
80
+ 'type' => guessed_type,
81
+ 'format' => guessed_format
82
+ }
83
+ end
84
+
85
+ def guess_format(converter, col)
86
+ guessed_format = 'default'
87
+ converter.class.instance_methods.grep(/cast_/).each do |method|
88
+ begin
89
+ format = method.to_s
90
+ format.slice!('cast_')
91
+ next if format == 'default'
92
+ converter.send(method, col)
93
+ guessed_format = format
94
+ break
95
+ rescue JsonTableSchema::Exception
96
+ end
97
+ end
98
+ guessed_format
99
+ end
100
+
101
+ def resolve_types(results)
102
+ results.each_with_index do |result,v|
103
+ result.uniq!
104
+
105
+ if result.count == 1
106
+ rv = result[0]
107
+ else
108
+ counts = {}
109
+ result.each do |r|
110
+ counts[r] ||= 0
111
+ counts[r] += 1
112
+ end
113
+
114
+ sorted_counts = counts.sort_by {|_key, value| value}
115
+ rv = sorted_counts[0][0]
116
+ end
117
+
118
+ @schema['fields'][v].merge!(rv)
119
+ end
120
+
121
+ end
122
+
123
+ def available_types
124
+ [
125
+ 'any',
126
+ 'string',
127
+ 'boolean',
128
+ 'number',
129
+ 'integer',
130
+ 'null',
131
+ 'date',
132
+ 'time',
133
+ 'datetime',
134
+ 'array',
135
+ 'object',
136
+ 'geopoint',
137
+ 'geojson'
138
+ ]
139
+ end
140
+
141
+ end
142
+ end
@@ -0,0 +1,73 @@
1
+ module JsonTableSchema
2
+ module Model
3
+
4
+ DEFAULTS = {
5
+ 'format' => 'default',
6
+ 'type' => 'string'
7
+ }
8
+
9
+ def headers
10
+ fields.map { |f| transform(f['name']) }
11
+ rescue NoMethodError
12
+ []
13
+ end
14
+
15
+ def fields
16
+ self['fields']
17
+ end
18
+
19
+ def primary_keys
20
+ [self['primaryKey']].flatten.reject { |k| k.nil? }
21
+ end
22
+
23
+ def foreign_keys
24
+ self['foreignKeys'] || []
25
+ end
26
+
27
+ def get_type(key)
28
+ get_field(key)['type']
29
+ end
30
+
31
+ def get_constraints(key)
32
+ get_field(key)['constraints'] || {}
33
+ end
34
+
35
+ def required_headers
36
+ fields.select { |f| f['constraints']!= nil && f['constraints']['required'] == true }
37
+ .map { |f| transform(f['name']) }
38
+ rescue NoMethodError
39
+ []
40
+ end
41
+
42
+ def has_field?(key)
43
+ get_field(key) != nil
44
+ end
45
+
46
+ def get_field(key)
47
+ fields.find { |f| f['name'] == key }
48
+ end
49
+
50
+ def get_fields_by_type(type)
51
+ fields.select { |f| f['type'] == type }
52
+ end
53
+
54
+ private
55
+
56
+ def fields
57
+ self['fields']
58
+ end
59
+
60
+ def transform(name)
61
+ name.downcase! if @opts[:case_insensitive_headers]
62
+ name
63
+ end
64
+
65
+ def expand!
66
+ (self['fields'] || []).each do |f|
67
+ f['type'] = DEFAULTS['type'] if f['type'] == nil
68
+ f['format'] = DEFAULTS['format'] if f['format'] == nil
69
+ end
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,35 @@
1
+ module JsonTableSchema
2
+ class Schema < Hash
3
+ include JsonTableSchema::Validate
4
+ include JsonTableSchema::Model
5
+ include JsonTableSchema::Data
6
+ include JsonTableSchema::Helpers
7
+
8
+ def initialize(schema, opts = {})
9
+ self.merge! parse_schema(schema)
10
+ @messages = []
11
+ @opts = opts
12
+ load_validator!
13
+ expand!
14
+ end
15
+
16
+ def parse_schema(schema)
17
+ if schema.class == Hash
18
+ schema
19
+ elsif schema.class == String
20
+ begin
21
+ JSON.parse open(schema).read
22
+ rescue Errno::ENOENT
23
+ raise SchemaException.new("File not found at `#{schema}`")
24
+ rescue OpenURI::HTTPError => e
25
+ raise SchemaException.new("URL `#{schema}` returned #{e.message}")
26
+ rescue JSON::ParserError
27
+ raise SchemaException.new("File at `#{schema}` is not valid JSON")
28
+ end
29
+ else
30
+ raise SchemaException.new("A schema must be a hash, path or URL")
31
+ end
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,50 @@
1
+ module JsonTableSchema
2
+ class Table
3
+
4
+ attr_reader :schema
5
+
6
+ def self.infer_schema(csv, opts = {})
7
+ JsonTableSchema::Table.new(csv, nil, opts)
8
+ end
9
+
10
+ def initialize(csv, schema, opts = {})
11
+ @opts = opts
12
+ @csv = parse_csv(csv)
13
+ @schema = schema.nil? ? infer_schema(@csv) : JsonTableSchema::Schema.new(schema)
14
+ end
15
+
16
+ def parse_csv(csv)
17
+ csv_string = csv.is_a?(Array) ? array_to_csv(csv) : open(csv).read
18
+ CSV.parse(csv_string, csv_options)
19
+ end
20
+
21
+ def csv_options
22
+ (@opts[:csv_options] || {}).merge(headers: true)
23
+ end
24
+
25
+ def rows(opts = {})
26
+ fail_fast = opts[:fail_fast] || opts[:fail_fast].nil?
27
+ rows = opts[:limit] ? @csv.to_a.drop(1).take(opts[:limit]) : @csv.to_a.drop(1)
28
+ converted = @schema.convert(rows, fail_fast)
29
+ opts[:keyed] ? coverted_to_hash(@csv.headers, converted) : converted
30
+ end
31
+
32
+ private
33
+
34
+ def array_to_csv(array)
35
+ array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
36
+ end
37
+
38
+ def coverted_to_hash(headers, array)
39
+ array.map do |row|
40
+ Hash[row.map.with_index { |col, i| [headers[i], col] }]
41
+ end
42
+ end
43
+
44
+ def infer_schema(csv)
45
+ inferer = JsonTableSchema::Infer.new(csv.headers, csv.to_a)
46
+ inferer.schema
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,23 @@
1
+ module JsonTableSchema
2
+ module Types
3
+ class Any < Base
4
+
5
+ def name
6
+ 'any'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum'
14
+ ]
15
+ end
16
+
17
+ def cast_default(value)
18
+ value
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,37 @@
1
+ module JsonTableSchema
2
+ module Types
3
+ class Array < Base
4
+
5
+ def name
6
+ 'array'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum',
14
+ 'minLength',
15
+ 'maxLength',
16
+ ]
17
+ end
18
+
19
+ def type
20
+ ::Array
21
+ end
22
+
23
+ def cast_default(value)
24
+ return value if value.is_a?(type)
25
+ parsed = JSON.parse(value)
26
+ if parsed.is_a?(type)
27
+ return parsed
28
+ else
29
+ raise JsonTableSchema::InvalidArrayType.new("#{value} is not a valid array")
30
+ end
31
+ rescue
32
+ raise JsonTableSchema::InvalidArrayType.new("#{value} is not a valid array")
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,54 @@
1
+ module JsonTableSchema
2
+ module Types
3
+ class Base
4
+ include JsonTableSchema::Helpers
5
+
6
+
7
+ def initialize(field)
8
+ @field = field
9
+ @constraints = field['constraints'] || {}
10
+ @required = ['true', true].include?(@constraints['required'])
11
+ @type = @field['type']
12
+ set_format
13
+ end
14
+
15
+ def cast(value, skip_constraints = false)
16
+ JsonTableSchema::Constraints.new(@field, value).validate! unless skip_constraints
17
+ return nil if is_null?(value)
18
+ send("cast_#{@format}", value)
19
+ rescue NoMethodError => e
20
+ if e.message.start_with?('undefined method `cast_')
21
+ raise(JsonTableSchema::InvalidFormat.new("The format `#{@format}` is not supported by the type `#{@type}`"))
22
+ else
23
+ raise e
24
+ end
25
+ end
26
+
27
+ def test(value)
28
+ cast(value, true)
29
+ true
30
+ rescue JsonTableSchema::Exception
31
+ false
32
+ end
33
+
34
+ def set_format
35
+ if (@field['format'] || '').start_with?('fmt:')
36
+ @format, @format_string = *@field['format'].split(':', 2)
37
+ else
38
+ @format = @field['format'] || 'default'
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def is_null?(value)
45
+ null_values.include?(value) && @required == false
46
+ end
47
+
48
+ def null_values
49
+ ['null', 'none', 'nil', 'nan', '-', '']
50
+ end
51
+
52
+ end
53
+ end
54
+ end