jsontableschema 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG.md +17 -0
  6. data/CODE_OF_CONDUCT.md +49 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +233 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/etc/schemas/geojson.json +209 -0
  14. data/etc/schemas/json-table-schema.json +102 -0
  15. data/jsontableschema.gemspec +32 -0
  16. data/lib/jsontableschema.rb +41 -0
  17. data/lib/jsontableschema/constraints/constraints.rb +76 -0
  18. data/lib/jsontableschema/constraints/enum.rb +14 -0
  19. data/lib/jsontableschema/constraints/max_length.rb +15 -0
  20. data/lib/jsontableschema/constraints/maximum.rb +14 -0
  21. data/lib/jsontableschema/constraints/min_length.rb +15 -0
  22. data/lib/jsontableschema/constraints/minimum.rb +14 -0
  23. data/lib/jsontableschema/constraints/pattern.rb +14 -0
  24. data/lib/jsontableschema/constraints/required.rb +32 -0
  25. data/lib/jsontableschema/data.rb +57 -0
  26. data/lib/jsontableschema/exceptions.rb +28 -0
  27. data/lib/jsontableschema/helpers.rb +48 -0
  28. data/lib/jsontableschema/infer.rb +142 -0
  29. data/lib/jsontableschema/model.rb +73 -0
  30. data/lib/jsontableschema/schema.rb +35 -0
  31. data/lib/jsontableschema/table.rb +50 -0
  32. data/lib/jsontableschema/types/any.rb +23 -0
  33. data/lib/jsontableschema/types/array.rb +37 -0
  34. data/lib/jsontableschema/types/base.rb +54 -0
  35. data/lib/jsontableschema/types/boolean.rb +35 -0
  36. data/lib/jsontableschema/types/date.rb +56 -0
  37. data/lib/jsontableschema/types/datetime.rb +63 -0
  38. data/lib/jsontableschema/types/geojson.rb +38 -0
  39. data/lib/jsontableschema/types/geopoint.rb +56 -0
  40. data/lib/jsontableschema/types/integer.rb +35 -0
  41. data/lib/jsontableschema/types/null.rb +37 -0
  42. data/lib/jsontableschema/types/number.rb +60 -0
  43. data/lib/jsontableschema/types/object.rb +37 -0
  44. data/lib/jsontableschema/types/string.rb +64 -0
  45. data/lib/jsontableschema/types/time.rb +55 -0
  46. data/lib/jsontableschema/validate.rb +54 -0
  47. data/lib/jsontableschema/version.rb +3 -0
  48. metadata +230 -0
@@ -0,0 +1,48 @@
1
+ module JsonTableSchema
2
+ module Helpers
3
+
4
+ def convert_to_boolean(value)
5
+ if value.is_a?(Boolean)
6
+ return value
7
+ elsif true_values.include?(value.to_s.downcase)
8
+ true
9
+ elsif false_values.include?(value.to_s.downcase)
10
+ false
11
+ else
12
+ nil
13
+ end
14
+ end
15
+
16
+ def true_values
17
+ ['yes', 'y', 'true', 't', '1']
18
+ end
19
+
20
+ def false_values
21
+ ['no', 'n', 'false', 'f', '0']
22
+ end
23
+
24
+ def get_class_for_type(type)
25
+ "JsonTableSchema::Types::#{type_class_lookup[type]}"
26
+ end
27
+
28
+ def type_class_lookup
29
+ {
30
+ 'any' => 'Any',
31
+ 'array' => 'Array',
32
+ 'base' => 'Base',
33
+ 'boolean' => 'Boolean',
34
+ 'date' => 'Date',
35
+ 'datetime' => 'DateTime',
36
+ 'geojson' => 'GeoJSON',
37
+ 'geopoint' => 'GeoPoint',
38
+ 'integer' => 'Integer',
39
+ 'null' => 'Null',
40
+ 'number' => 'Number',
41
+ 'object' => 'Object',
42
+ 'string' => 'String',
43
+ 'time' => 'Time',
44
+ }
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,142 @@
1
+ module JsonTableSchema
2
+ class Infer
3
+
4
+ include JsonTableSchema::Helpers
5
+
6
+ attr_reader :schema
7
+
8
+ def initialize(headers, rows, opts = {})
9
+ @headers = headers
10
+ @rows = rows
11
+ @explicit = opts[:explicit]
12
+ @primary_key = opts[:primary_key]
13
+ @row_limit = opts[:row_limit]
14
+
15
+ @schema = {
16
+ 'fields' => fields
17
+ }
18
+ @schema['primaryKey'] = @primary_key if @primary_key
19
+ infer!
20
+ end
21
+
22
+ def fields
23
+ @headers.map do |header|
24
+ descriptor = {
25
+ 'name' => header,
26
+ 'title' => '',
27
+ 'description' => '',
28
+ }
29
+
30
+ constraints = {}
31
+ constraints['required'] = @explicit === true
32
+ constraints['unique'] = (header == @primary_key)
33
+ constraints.delete_if { |k,v| v == false } unless @explicit === true
34
+ descriptor['constraints'] = constraints if constraints.count > 0
35
+ descriptor
36
+ end
37
+ end
38
+
39
+ def infer!
40
+ type_matches = []
41
+ @rows.each_with_index do |row, i|
42
+ break if @row_limit && i > @row_limit
43
+
44
+ row_length = row.count
45
+ headers_length = @headers.count
46
+
47
+ if row_length > headers_length
48
+ row = row[0..headers_length]
49
+ elsif row_length < headers_length
50
+ diff = headers_length - row_length
51
+ fill = [''] * diff
52
+ row = row.push(fill).flatten
53
+ end
54
+
55
+ row.each_with_index do |col, i|
56
+ type_matches[i] ||= []
57
+ type_matches[i] << guess_type(col, i)
58
+ end
59
+
60
+ end
61
+ resolve_types(type_matches)
62
+ @schema = JsonTableSchema::Schema.new(@schema)
63
+ end
64
+
65
+ def guess_type(col, index)
66
+ guessed_type = 'string'
67
+ guessed_format = 'default'
68
+
69
+ available_types.reverse_each do |type|
70
+ klass = get_class_for_type(type)
71
+ converter = Kernel.const_get(klass).new(@schema['fields'][index])
72
+ if converter.test(col) === true
73
+ guessed_type = type
74
+ guessed_format = guess_format(converter, col)
75
+ break
76
+ end
77
+ end
78
+
79
+ {
80
+ 'type' => guessed_type,
81
+ 'format' => guessed_format
82
+ }
83
+ end
84
+
85
+ def guess_format(converter, col)
86
+ guessed_format = 'default'
87
+ converter.class.instance_methods.grep(/cast_/).each do |method|
88
+ begin
89
+ format = method.to_s
90
+ format.slice!('cast_')
91
+ next if format == 'default'
92
+ converter.send(method, col)
93
+ guessed_format = format
94
+ break
95
+ rescue JsonTableSchema::Exception
96
+ end
97
+ end
98
+ guessed_format
99
+ end
100
+
101
+ def resolve_types(results)
102
+ results.each_with_index do |result,v|
103
+ result.uniq!
104
+
105
+ if result.count == 1
106
+ rv = result[0]
107
+ else
108
+ counts = {}
109
+ result.each do |r|
110
+ counts[r] ||= 0
111
+ counts[r] += 1
112
+ end
113
+
114
+ sorted_counts = counts.sort_by {|_key, value| value}
115
+ rv = sorted_counts[0][0]
116
+ end
117
+
118
+ @schema['fields'][v].merge!(rv)
119
+ end
120
+
121
+ end
122
+
123
+ def available_types
124
+ [
125
+ 'any',
126
+ 'string',
127
+ 'boolean',
128
+ 'number',
129
+ 'integer',
130
+ 'null',
131
+ 'date',
132
+ 'time',
133
+ 'datetime',
134
+ 'array',
135
+ 'object',
136
+ 'geopoint',
137
+ 'geojson'
138
+ ]
139
+ end
140
+
141
+ end
142
+ end
@@ -0,0 +1,73 @@
1
+ module JsonTableSchema
2
+ module Model
3
+
4
+ DEFAULTS = {
5
+ 'format' => 'default',
6
+ 'type' => 'string'
7
+ }
8
+
9
+ def headers
10
+ fields.map { |f| transform(f['name']) }
11
+ rescue NoMethodError
12
+ []
13
+ end
14
+
15
+ def fields
16
+ self['fields']
17
+ end
18
+
19
+ def primary_keys
20
+ [self['primaryKey']].flatten.reject { |k| k.nil? }
21
+ end
22
+
23
+ def foreign_keys
24
+ self['foreignKeys'] || []
25
+ end
26
+
27
+ def get_type(key)
28
+ get_field(key)['type']
29
+ end
30
+
31
+ def get_constraints(key)
32
+ get_field(key)['constraints'] || {}
33
+ end
34
+
35
+ def required_headers
36
+ fields.select { |f| f['constraints']!= nil && f['constraints']['required'] == true }
37
+ .map { |f| transform(f['name']) }
38
+ rescue NoMethodError
39
+ []
40
+ end
41
+
42
+ def has_field?(key)
43
+ get_field(key) != nil
44
+ end
45
+
46
+ def get_field(key)
47
+ fields.find { |f| f['name'] == key }
48
+ end
49
+
50
+ def get_fields_by_type(type)
51
+ fields.select { |f| f['type'] == type }
52
+ end
53
+
54
+ private
55
+
56
+ def fields
57
+ self['fields']
58
+ end
59
+
60
+ def transform(name)
61
+ name.downcase! if @opts[:case_insensitive_headers]
62
+ name
63
+ end
64
+
65
+ def expand!
66
+ (self['fields'] || []).each do |f|
67
+ f['type'] = DEFAULTS['type'] if f['type'] == nil
68
+ f['format'] = DEFAULTS['format'] if f['format'] == nil
69
+ end
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,35 @@
1
+ module JsonTableSchema
2
+ class Schema < Hash
3
+ include JsonTableSchema::Validate
4
+ include JsonTableSchema::Model
5
+ include JsonTableSchema::Data
6
+ include JsonTableSchema::Helpers
7
+
8
+ def initialize(schema, opts = {})
9
+ self.merge! parse_schema(schema)
10
+ @messages = []
11
+ @opts = opts
12
+ load_validator!
13
+ expand!
14
+ end
15
+
16
+ def parse_schema(schema)
17
+ if schema.class == Hash
18
+ schema
19
+ elsif schema.class == String
20
+ begin
21
+ JSON.parse open(schema).read
22
+ rescue Errno::ENOENT
23
+ raise SchemaException.new("File not found at `#{schema}`")
24
+ rescue OpenURI::HTTPError => e
25
+ raise SchemaException.new("URL `#{schema}` returned #{e.message}")
26
+ rescue JSON::ParserError
27
+ raise SchemaException.new("File at `#{schema}` is not valid JSON")
28
+ end
29
+ else
30
+ raise SchemaException.new("A schema must be a hash, path or URL")
31
+ end
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,50 @@
1
+ module JsonTableSchema
2
+ class Table
3
+
4
+ attr_reader :schema
5
+
6
+ def self.infer_schema(csv, opts = {})
7
+ JsonTableSchema::Table.new(csv, nil, opts)
8
+ end
9
+
10
+ def initialize(csv, schema, opts = {})
11
+ @opts = opts
12
+ @csv = parse_csv(csv)
13
+ @schema = schema.nil? ? infer_schema(@csv) : JsonTableSchema::Schema.new(schema)
14
+ end
15
+
16
+ def parse_csv(csv)
17
+ csv_string = csv.is_a?(Array) ? array_to_csv(csv) : open(csv).read
18
+ CSV.parse(csv_string, csv_options)
19
+ end
20
+
21
+ def csv_options
22
+ (@opts[:csv_options] || {}).merge(headers: true)
23
+ end
24
+
25
+ def rows(opts = {})
26
+ fail_fast = opts[:fail_fast] || opts[:fail_fast].nil?
27
+ rows = opts[:limit] ? @csv.to_a.drop(1).take(opts[:limit]) : @csv.to_a.drop(1)
28
+ converted = @schema.convert(rows, fail_fast)
29
+ opts[:keyed] ? coverted_to_hash(@csv.headers, converted) : converted
30
+ end
31
+
32
+ private
33
+
34
+ def array_to_csv(array)
35
+ array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
36
+ end
37
+
38
+ def coverted_to_hash(headers, array)
39
+ array.map do |row|
40
+ Hash[row.map.with_index { |col, i| [headers[i], col] }]
41
+ end
42
+ end
43
+
44
+ def infer_schema(csv)
45
+ inferer = JsonTableSchema::Infer.new(csv.headers, csv.to_a)
46
+ inferer.schema
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,23 @@
1
+ module JsonTableSchema
2
+ module Types
3
+ class Any < Base
4
+
5
+ def name
6
+ 'any'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum'
14
+ ]
15
+ end
16
+
17
+ def cast_default(value)
18
+ value
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,37 @@
1
+ module JsonTableSchema
2
+ module Types
3
+ class Array < Base
4
+
5
+ def name
6
+ 'array'
7
+ end
8
+
9
+ def self.supported_constraints
10
+ [
11
+ 'required',
12
+ 'pattern',
13
+ 'enum',
14
+ 'minLength',
15
+ 'maxLength',
16
+ ]
17
+ end
18
+
19
+ def type
20
+ ::Array
21
+ end
22
+
23
+ def cast_default(value)
24
+ return value if value.is_a?(type)
25
+ parsed = JSON.parse(value)
26
+ if parsed.is_a?(type)
27
+ return parsed
28
+ else
29
+ raise JsonTableSchema::InvalidArrayType.new("#{value} is not a valid array")
30
+ end
31
+ rescue
32
+ raise JsonTableSchema::InvalidArrayType.new("#{value} is not a valid array")
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,54 @@
1
+ module JsonTableSchema
2
+ module Types
3
+ class Base
4
+ include JsonTableSchema::Helpers
5
+
6
+
7
+ def initialize(field)
8
+ @field = field
9
+ @constraints = field['constraints'] || {}
10
+ @required = ['true', true].include?(@constraints['required'])
11
+ @type = @field['type']
12
+ set_format
13
+ end
14
+
15
+ def cast(value, skip_constraints = false)
16
+ JsonTableSchema::Constraints.new(@field, value).validate! unless skip_constraints
17
+ return nil if is_null?(value)
18
+ send("cast_#{@format}", value)
19
+ rescue NoMethodError => e
20
+ if e.message.start_with?('undefined method `cast_')
21
+ raise(JsonTableSchema::InvalidFormat.new("The format `#{@format}` is not supported by the type `#{@type}`"))
22
+ else
23
+ raise e
24
+ end
25
+ end
26
+
27
+ def test(value)
28
+ cast(value, true)
29
+ true
30
+ rescue JsonTableSchema::Exception
31
+ false
32
+ end
33
+
34
+ def set_format
35
+ if (@field['format'] || '').start_with?('fmt:')
36
+ @format, @format_string = *@field['format'].split(':', 2)
37
+ else
38
+ @format = @field['format'] || 'default'
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def is_null?(value)
45
+ null_values.include?(value) && @required == false
46
+ end
47
+
48
+ def null_values
49
+ ['null', 'none', 'nil', 'nan', '-', '']
50
+ end
51
+
52
+ end
53
+ end
54
+ end