csvlint 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +7 -1
  3. data/CHANGELOG.md +19 -1
  4. data/README.md +93 -36
  5. data/bin/csvlint +68 -27
  6. data/csvlint.gemspec +2 -0
  7. data/features/csvw_schema_validation.feature +127 -0
  8. data/features/fixtures/spreadsheet.xlsx +0 -0
  9. data/features/sources.feature +3 -4
  10. data/features/step_definitions/parse_csv_steps.rb +13 -1
  11. data/features/step_definitions/schema_validation_steps.rb +27 -1
  12. data/features/step_definitions/sources_steps.rb +1 -1
  13. data/features/step_definitions/validation_errors_steps.rb +48 -1
  14. data/features/step_definitions/validation_info_steps.rb +5 -1
  15. data/features/step_definitions/validation_warnings_steps.rb +15 -1
  16. data/features/support/load_tests.rb +114 -0
  17. data/features/validation_errors.feature +12 -24
  18. data/features/validation_warnings.feature +18 -6
  19. data/lib/csvlint.rb +10 -0
  20. data/lib/csvlint/csvw/column.rb +359 -0
  21. data/lib/csvlint/csvw/date_format.rb +182 -0
  22. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  23. data/lib/csvlint/csvw/number_format.rb +211 -0
  24. data/lib/csvlint/csvw/property_checker.rb +761 -0
  25. data/lib/csvlint/csvw/table.rb +204 -0
  26. data/lib/csvlint/csvw/table_group.rb +165 -0
  27. data/lib/csvlint/schema.rb +40 -23
  28. data/lib/csvlint/validate.rb +142 -19
  29. data/lib/csvlint/version.rb +1 -1
  30. data/spec/csvw/column_spec.rb +112 -0
  31. data/spec/csvw/date_format_spec.rb +49 -0
  32. data/spec/csvw/number_format_spec.rb +403 -0
  33. data/spec/csvw/table_group_spec.rb +143 -0
  34. data/spec/csvw/table_spec.rb +90 -0
  35. data/spec/schema_spec.rb +27 -1
  36. data/spec/spec_helper.rb +0 -1
  37. data/spec/validator_spec.rb +16 -10
  38. metadata +53 -2
@@ -0,0 +1,13 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class MetadataError < StandardError
4
+
5
+ attr_reader :path
6
+
7
+ def initialize(path=nil)
8
+ @path = path
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,211 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class NumberFormat
4
+
5
+ attr_reader :pattern, :prefix, :numeric_part, :suffix, :grouping_separator, :decimal_separator, :primary_grouping_size, :secondary_grouping_size, :fractional_grouping_size
6
+
7
+ def initialize(pattern=nil, grouping_separator=nil, decimal_separator=".")
8
+ @pattern = pattern
9
+ @grouping_separator = grouping_separator || (@pattern.nil? ? nil : ",")
10
+ @decimal_separator = decimal_separator || "."
11
+ if pattern.nil?
12
+ @regexp = Regexp.new("^(([-+]?[0-9]+(#{Regexp.escape(@decimal_separator)}[0-9]+)?([Ee][-+]?[0-9]+)?[%‰]?)|NaN|INF|-INF)$")
13
+ else
14
+ numeric_part_regexp = Regexp.new("(?<numeric_part>([0#Ee]|#{Regexp.escape(@grouping_separator)}|#{Regexp.escape(@decimal_separator)})+)")
15
+ number_format_regexp = Regexp.new("^(?<prefix>.*?)#{numeric_part_regexp}(?<suffix>.*?)$")
16
+ match = number_format_regexp.match(pattern)
17
+ raise Csvw::NumberFormatError, "invalid number format" if match.nil?
18
+
19
+ @prefix = match["prefix"]
20
+ @numeric_part = match["numeric_part"]
21
+ @suffix = match["suffix"]
22
+
23
+ parts = @numeric_part.split("E")
24
+ mantissa_part = parts[0]
25
+ exponent_part = parts[1] || ""
26
+ mantissa_parts = mantissa_part.split(@decimal_separator)
27
+ # raise Csvw::NumberFormatError, "more than two decimal separators in number format" if parts.length > 2
28
+ integer_part = mantissa_parts[0]
29
+ fractional_part = mantissa_parts[1] || ""
30
+
31
+ @integer_pattern = exponent_part == "" && fractional_part == ""
32
+
33
+ min_integer_digits = integer_part.gsub(@grouping_separator, "").gsub("#", "").length
34
+ min_fraction_digits = fractional_part.gsub(@grouping_separator, "").gsub("#", "").length
35
+ max_fraction_digits = fractional_part.gsub(@grouping_separator, "").length
36
+ min_exponent_digits = exponent_part.gsub("#", "").length
37
+ max_exponent_digits = exponent_part.length
38
+
39
+ integer_parts = integer_part.split(@grouping_separator)[1..-1]
40
+ @primary_grouping_size = integer_parts[-1].length rescue 0
41
+ @secondary_grouping_size = integer_parts[-2].length rescue @primary_grouping_size
42
+
43
+ fractional_parts = fractional_part.split(@grouping_separator)[0..-2]
44
+ @fractional_grouping_size = fractional_parts[0].length rescue 0
45
+
46
+ numeric_part_regexp = "[-+]?"
47
+
48
+ if @primary_grouping_size == 0
49
+ integer_regexp = "[0-9]*[0-9]{#{min_integer_digits}}"
50
+ else
51
+ leading_regexp = "([0-9]{0,#{@secondary_grouping_size - 1}}#{Regexp.escape(@grouping_separator)})?"
52
+ secondary_groups = "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)})*"
53
+ if min_integer_digits > @primary_grouping_size
54
+ remaining_req_digits = min_integer_digits - @primary_grouping_size
55
+ req_secondary_groups = remaining_req_digits / @secondary_grouping_size > 0 ? "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}){#{remaining_req_digits / @secondary_grouping_size}}" : ""
56
+ if remaining_req_digits % @secondary_grouping_size > 0
57
+ final_req_digits = "[0-9]{#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
58
+ final_opt_digits = "[0-9]{0,#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
59
+ integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{remaining_req_digits % @secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
60
+ else
61
+ integer_regexp = "(#{leading_regexp}#{secondary_groups})?#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
62
+ end
63
+ else
64
+ final_req_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{#{@primary_grouping_size - min_integer_digits}}" : ""
65
+ final_opt_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{0,#{@primary_grouping_size - min_integer_digits}}" : ""
66
+ integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{min_integer_digits}}"
67
+ end
68
+ end
69
+
70
+ numeric_part_regexp += integer_regexp
71
+
72
+ if max_fraction_digits > 0
73
+ if @fractional_grouping_size == 0
74
+ fractional_regexp = ""
75
+ fractional_regexp += "[0-9]{#{min_fraction_digits}}" if min_fraction_digits > 0
76
+ fractional_regexp += "[0-9]{0,#{max_fraction_digits - min_fraction_digits}}" unless min_fraction_digits == max_fraction_digits
77
+ fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}"
78
+ fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0
79
+ numeric_part_regexp += fractional_regexp
80
+ else
81
+ fractional_regexp = ""
82
+
83
+ if min_fraction_digits > 0
84
+ if min_fraction_digits >= @fractional_grouping_size
85
+ # first group of required digits - something like "[0-9]{3}"
86
+ fractional_regexp += "[0-9]{#{@fractional_grouping_size}}"
87
+ # additional groups of required digits - something like "(,[0-9]{3}){1}"
88
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){#{min_fraction_digits / @fractional_grouping_size - 1}}" if min_fraction_digits / @fractional_grouping_size > 1
89
+ fractional_regexp += "#{Regexp.escape(@grouping_separator)}" if min_fraction_digits % @fractional_grouping_size > 0
90
+ end
91
+ # additional required digits - something like ",[0-9]{1}"
92
+ fractional_regexp += "[0-9]{#{min_fraction_digits % @fractional_grouping_size}}" if min_fraction_digits % @fractional_grouping_size > 0
93
+
94
+ opt_fractional_digits = max_fraction_digits - min_fraction_digits
95
+ if opt_fractional_digits > 0
96
+ fractional_regexp += "("
97
+
98
+ if min_fraction_digits % @fractional_grouping_size > 0
99
+ # optional fractional digits to complete the group
100
+ fractional_regexp += "[0-9]{0,#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}"
101
+ fractional_regexp += "|"
102
+ fractional_regexp += "[0-9]{#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}"
103
+ else
104
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?"
105
+ fractional_regexp += "|"
106
+ fractional_regexp += "#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}"
107
+ end
108
+
109
+ remaining_opt_fractional_digits = opt_fractional_digits - (@fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size))
110
+ if remaining_opt_fractional_digits > 0
111
+ if remaining_opt_fractional_digits % @fractional_grouping_size > 0
112
+ # optional fraction digits in groups
113
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{remaining_opt_fractional_digits / @fractional_grouping_size}}" if remaining_opt_fractional_digits > @fractional_grouping_size
114
+ # remaining optional fraction digits
115
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?"
116
+ else
117
+ # optional fraction digits in groups
118
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size
119
+ # remaining optional fraction digits
120
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?"
121
+ end
122
+
123
+ # optional fraction digits in groups
124
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size
125
+ # remaining optional fraction digits
126
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?" if remaining_opt_fractional_digits % @fractional_grouping_size > 0
127
+ end
128
+ fractional_regexp += ")"
129
+ end
130
+ elsif max_fraction_digits % @fractional_grouping_size > 0
131
+ # optional fractional digits in groups
132
+ fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{max_fraction_digits / @fractional_grouping_size}}"
133
+ # remaining optional fraction digits
134
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{max_fraction_digits % @fractional_grouping_size}})?" if max_fraction_digits % @fractional_grouping_size > 0
135
+ else
136
+ fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{(max_fraction_digits / @fractional_grouping_size) - 1}}" if max_fraction_digits > @fractional_grouping_size
137
+ fractional_regexp += "[0-9]{#{@fractional_grouping_size}}"
138
+ end
139
+ fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}"
140
+ fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0
141
+ numeric_part_regexp += fractional_regexp
142
+ end
143
+ end
144
+
145
+ if max_exponent_digits > 0
146
+ numeric_part_regexp += "E"
147
+ numeric_part_regexp += "[0-9]{0,#{max_exponent_digits - min_exponent_digits}}" unless max_exponent_digits == min_exponent_digits
148
+ numeric_part_regexp += "[0-9]{#{min_exponent_digits}}" unless min_exponent_digits == 0
149
+ end
150
+
151
+ @regexp = Regexp.new("^(?<prefix>#{Regexp.escape(@prefix)})(?<numeric_part>#{numeric_part_regexp})(?<suffix>#{suffix})$")
152
+ end
153
+ end
154
+
155
+ def match(value)
156
+ value =~ @regexp ? true : false
157
+ end
158
+
159
+ def parse(value)
160
+ if @pattern.nil?
161
+ return nil if !@grouping_separator.nil? && value =~ Regexp.new("((^#{Regexp.escape(@grouping_separator)})|#{Regexp.escape(@grouping_separator)}{2})")
162
+ value.gsub!(@grouping_separator, "") unless @grouping_separator.nil?
163
+ if value =~ INTEGER_REGEXP
164
+ case value[-1]
165
+ when "%"
166
+ return value.to_f / 100
167
+ when "‰"
168
+ return value.to_f / 1000
169
+ else
170
+ return value.to_i
171
+ end
172
+ elsif value =~ @regexp
173
+ case value
174
+ when "NaN"
175
+ return Float::NAN
176
+ when "INF"
177
+ return Float::INFINITY
178
+ when "-INF"
179
+ return -Float::INFINITY
180
+ else
181
+ case value[-1]
182
+ when "%"
183
+ return value.to_f / 100
184
+ when "‰"
185
+ return value.to_f / 1000
186
+ else
187
+ return value.to_f
188
+ end
189
+ end
190
+ else
191
+ return nil
192
+ end
193
+ else
194
+ match = @regexp.match(value)
195
+ return nil if match.nil?
196
+ number = match["numeric_part"].gsub(@grouping_separator, "")
197
+ return number.to_i if @integer_pattern
198
+ return number.to_f
199
+ end
200
+ end
201
+
202
+ private
203
+ INTEGER_REGEXP = /^[-+]?[0-9]+[%‰]?$/
204
+
205
+ end
206
+
207
+ class NumberFormatError < StandardError
208
+
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,761 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class PropertyChecker
4
+
5
+ class << self
6
+
7
+ def check_property(property, value, base_url, lang)
8
+ if PROPERTIES.include? property
9
+ return PROPERTIES[property].call(value, base_url, lang)
10
+ elsif property =~ /^([a-z]+):/ && NAMESPACES.include?(property.split(":")[0])
11
+ value, warnings = check_common_property_value(value, base_url, lang)
12
+ return value, warnings, :annotation
13
+ else
14
+ return value, :invalid_property, nil
15
+ end
16
+ end
17
+
18
+ private
19
+ def check_common_property_value(value, base_url, lang)
20
+ case value
21
+ when Hash
22
+ value = value.clone
23
+ warnings = []
24
+ value.each do |p,v|
25
+ case p
26
+ when "@context"
27
+ raise Csvlint::Csvw::MetadataError.new(p), "common property has @context property"
28
+ when "@list"
29
+ raise Csvlint::Csvw::MetadataError.new(p), "common property has @list property"
30
+ when "@set"
31
+ raise Csvlint::Csvw::MetadataError.new(p), "common property has @set property"
32
+ when "@type"
33
+ if value["@value"] && BUILT_IN_DATATYPES.include?(v)
34
+ elsif !value["@value"] && BUILT_IN_TYPES.include?(v)
35
+ elsif v =~ /^([a-z]+):/ && NAMESPACES.include?(v.split(":")[0])
36
+ else
37
+ # must be an absolute URI
38
+ begin
39
+ raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @type (#{v})" if URI(v).scheme.nil?
40
+ rescue
41
+ raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @type (#{v})"
42
+ end
43
+ end
44
+ when "@id"
45
+ unless base_url.nil?
46
+ begin
47
+ v = URI.join(base_url, v)
48
+ rescue
49
+ raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @id (#{v})"
50
+ end
51
+ end
52
+ when "@value"
53
+ raise Csvlint::Csvw::MetadataError.new(), "common property with @value has both @language and @type" if value["@type"] && value["@language"]
54
+ raise Csvlint::Csvw::MetadataError.new(), "common property with @value has properties other than @language or @type" unless value.except("@type").except("@language").except("@value").empty?
55
+ when "@language"
56
+ raise Csvlint::Csvw::MetadataError.new(), "common property with @language lacks a @value" unless value["@value"]
57
+ raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @language (#{v})" unless v =~ BCP47_LANGUAGE_REGEXP || v.nil?
58
+ else
59
+ if p[0] == "@"
60
+ raise Csvlint::Csvw::MetadataError.new(), "common property has property other than @id, @type, @value or @language beginning with @ (#{p})"
61
+ end
62
+ end
63
+ if v.instance_of? Hash
64
+ v, w = check_common_property_value(v, base_url, lang)
65
+ warnings += Array(w)
66
+ end
67
+ value[p] = v
68
+ end
69
+ return value, warnings
70
+ else
71
+ return value, nil
72
+ end
73
+ end
74
+
75
+ def convert_value_facet(value, property, datatype)
76
+ if value[property]
77
+ if DATE_FORMAT_DATATYPES.include?(datatype)
78
+ format = Csvlint::Csvw::DateFormat.new(nil, datatype)
79
+ v = format.parse(value[property])
80
+ if v.nil?
81
+ value.delete(property)
82
+ return [":invalid_#{property}".to_sym]
83
+ else
84
+ value[property] = v
85
+ return []
86
+ end
87
+ elsif NUMERIC_FORMAT_DATATYPES.include?(datatype)
88
+ return []
89
+ else
90
+ raise Csvlint::Csvw::MetadataError.new("datatype.#{property}"), "#{property} is only allowed for numeric, date/time and duration types"
91
+ end
92
+ end
93
+ return []
94
+ end
95
+
96
+ def array_property(type)
97
+ return lambda { |value, base_url, lang|
98
+ return value, nil, type if value.instance_of? Array
99
+ return false, :invalid_value, type
100
+ }
101
+ end
102
+
103
+ def boolean_property(type)
104
+ return lambda { |value, base_url, lang|
105
+ return value, nil, type if value == true || value == false
106
+ return false, :invalid_value, type
107
+ }
108
+ end
109
+
110
+ def string_property(type)
111
+ return lambda { |value, base_url, lang|
112
+ return value, nil, type if value.instance_of? String
113
+ return "", :invalid_value, type
114
+ }
115
+ end
116
+
117
+ def numeric_property(type)
118
+ return lambda { |value, base_url, lang|
119
+ return value, nil, type if value.kind_of?(Integer) && value >= 0
120
+ return nil, :invalid_value, type
121
+ }
122
+ end
123
+
124
+ def link_property(type)
125
+ return lambda { |value, base_url, lang|
126
+ raise Csvlint::Csvw::MetadataError.new(), "URL #{value} starts with _:" if value.to_s =~ /^_:/
127
+ return (base_url.nil? ? URI(value) : URI.join(base_url, value)), nil, type if value.instance_of? String
128
+ return base_url, :invalid_value, type
129
+ }
130
+ end
131
+
132
+ def language_property(type)
133
+ return lambda { |value, base_url, lang|
134
+ return value, nil, type if value =~ BCP47_REGEXP
135
+ return nil, :invalid_value, type
136
+ }
137
+ end
138
+
139
+ def natural_language_property(type)
140
+ return lambda { |value, base_url, lang|
141
+ warnings = []
142
+ if value.instance_of? String
143
+ return { lang => [ value ] }, nil, type
144
+ elsif value.instance_of? Array
145
+ valid_titles = []
146
+ value.each do |title|
147
+ if title.instance_of? String
148
+ valid_titles << title
149
+ else
150
+ warnings << :invalid_value
151
+ end
152
+ end
153
+ return { lang => valid_titles }, warnings, type
154
+ elsif value.instance_of? Hash
155
+ value = value.clone
156
+ value.each do |l,v|
157
+ if l =~ BCP47_REGEXP
158
+ valid_titles = []
159
+ Array(v).each do |title|
160
+ if title.instance_of? String
161
+ valid_titles << title
162
+ else
163
+ warnings << :invalid_value
164
+ end
165
+ end
166
+ value[l] = valid_titles
167
+ else
168
+ value.delete(l)
169
+ warnings << :invalid_language
170
+ end
171
+ end
172
+ warnings << :invalid_value if value.empty?
173
+ return value, warnings, type
174
+ else
175
+ return {}, :invalid_value, type
176
+ end
177
+ }
178
+ end
179
+
180
+ def column_reference_property(type)
181
+ return lambda { |value, base_url, lang|
182
+ return Array(value), nil, type
183
+ }
184
+ end
185
+
186
+
187
+ end
188
+
189
+ PROPERTIES = {
190
+ # context properties
191
+ "@language" => language_property(:context),
192
+ "@base" => link_property(:context),
193
+ # common properties
194
+ "@id" => link_property(:common),
195
+ "notes" => array_property(:common),
196
+ "suppressOutput" => boolean_property(:common),
197
+ # inherited properties
198
+ "null" => lambda { |value, base_url, lang|
199
+ case value
200
+ when String
201
+ return [value], nil, :inherited
202
+ when Array
203
+ values = []
204
+ warnings = []
205
+ value.each do |v|
206
+ if v.instance_of? String
207
+ values << v
208
+ else
209
+ warnings << :invalid_value
210
+ end
211
+ end
212
+ return values, warnings, :inherited
213
+ else
214
+ return [""], :invalid_value, :inherited
215
+ end
216
+ },
217
+ "default" => string_property(:inherited),
218
+ "separator" => lambda { |value, base_url, lang|
219
+ return value, nil, :inherited if value.instance_of?(String) || value.nil?
220
+ return nil, :invalid_value, :inherited
221
+ },
222
+ "lang" => language_property(:inherited),
223
+ "datatype" => lambda { |value, base_url, lang|
224
+ value = value.clone
225
+ warnings = []
226
+ if value.instance_of? Hash
227
+ if value["@id"]
228
+ raise Csvlint::Csvw::MetadataError.new("datatype.@id"), "datatype @id must not be the id of a built-in datatype (#{value["@id"]})" if BUILT_IN_DATATYPES.values.include?(value["@id"])
229
+ v,w,t = PROPERTIES["@id"].call(value["@id"], base_url, lang)
230
+ unless w.nil?
231
+ warnings << w
232
+ value.delete("@id")
233
+ end
234
+ end
235
+
236
+ if value["base"]
237
+ if BUILT_IN_DATATYPES.include? value["base"]
238
+ value["base"] = BUILT_IN_DATATYPES[value["base"]]
239
+ else
240
+ value["base"] = BUILT_IN_DATATYPES["string"]
241
+ warnings << :invalid_datatype_base
242
+ end
243
+ else
244
+ value["base"] = BUILT_IN_DATATYPES["string"]
245
+ end
246
+ elsif BUILT_IN_DATATYPES.include? value
247
+ value = { "@id" => BUILT_IN_DATATYPES[value] }
248
+ else
249
+ value = { "@id" => BUILT_IN_DATATYPES["string"] }
250
+ warnings << :invalid_value
251
+ end
252
+
253
+ unless STRING_DATATYPES.include?(value["base"]) || BINARY_DATATYPES.include?(value["base"])
254
+ raise Csvlint::Csvw::MetadataError.new("datatype.length"), "datatypes based on #{value["base"]} cannot have a length facet" if value["length"]
255
+ raise Csvlint::Csvw::MetadataError.new("datatype.minLength"), "datatypes based on #{value["base"]} cannot have a minLength facet" if value["minLength"]
256
+ raise Csvlint::Csvw::MetadataError.new("datatype.maxLength"), "datatypes based on #{value["base"]} cannot have a maxLength facet" if value["maxLength"]
257
+ end
258
+
259
+ if value["minimum"]
260
+ value["minInclusive"] = value["minimum"]
261
+ value.delete("minimum")
262
+ end
263
+ if value["maximum"]
264
+ value["maxInclusive"] = value["maximum"]
265
+ value.delete("maximum")
266
+ end
267
+
268
+ warnings += convert_value_facet(value, "minInclusive", value["base"])
269
+ warnings += convert_value_facet(value, "minExclusive", value["base"])
270
+ warnings += convert_value_facet(value, "maxInclusive", value["base"])
271
+ warnings += convert_value_facet(value, "maxExclusive", value["base"])
272
+
273
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both minimum/minInclusive (#{value["minInclusive"]}) and minExclusive (#{value["minExclusive"]}" if value["minInclusive"] && value["minExclusive"]
274
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both maximum/maxInclusive (#{value["maxInclusive"]}) and maxExclusive (#{value["maxExclusive"]}" if value["maxInclusive"] && value["maxExclusive"]
275
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{value["minInclusive"]}) cannot be more than maxInclusive (#{value["maxInclusive"]}" if value["minInclusive"] && value["maxInclusive"] && value["minInclusive"] > value["maxInclusive"]
276
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{value["minInclusive"]}) cannot be more than or equal to maxExclusive (#{value["maxExclusive"]}" if value["minInclusive"] && value["maxExclusive"] && value["minInclusive"] >= value["maxExclusive"]
277
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{value["minExclusive"]}) cannot be more than or equal to maxExclusive (#{value["maxExclusive"]}" if value["minExclusive"] && value["maxExclusive"] && value["minExclusive"] > value["maxExclusive"]
278
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{value["minExclusive"]}) cannot be more than maxInclusive (#{value["maxInclusive"]}" if value["minExclusive"] && value["maxInclusive"] && value["minExclusive"] >= value["maxInclusive"]
279
+
280
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be less than minLength (#{value["minLength"]}" if value["length"] && value["minLength"] && value["length"] < value["minLength"]
281
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["length"] && value["maxLength"] && value["length"] > value["maxLength"]
282
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minLength (#{value["minLength"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["minLength"] && value["maxLength"] && value["minLength"] > value["maxLength"]
283
+
284
+ if value["format"]
285
+ if REGEXP_FORMAT_DATATYPES.include?(value["base"])
286
+ begin
287
+ value["format"] = Regexp.new(value["format"])
288
+ rescue RegexpError
289
+ value.delete("format")
290
+ warnings << :invalid_regex
291
+ end
292
+ elsif NUMERIC_FORMAT_DATATYPES.include?(value["base"])
293
+ value["format"] = { "pattern" => value["format"] } if value["format"].instance_of? String
294
+ begin
295
+ value["format"] = Csvlint::Csvw::NumberFormat.new(value["format"]["pattern"], value["format"]["groupChar"], value["format"]["decimalChar"] || ".")
296
+ rescue Csvlint::Csvw::NumberFormatError
297
+ value["format"] = Csvlint::Csvw::NumberFormat.new(nil, value["format"]["groupChar"], value["format"]["decimalChar"] || ".")
298
+ warnings << :invalid_number_format
299
+ end
300
+ elsif value["base"] == "http://www.w3.org/2001/XMLSchema#boolean"
301
+ if value["format"].instance_of? String
302
+ value["format"] = value["format"].split("|")
303
+ unless value["format"].length == 2
304
+ value.delete("format")
305
+ warnings << :invalid_boolean_format
306
+ end
307
+ else
308
+ value.delete("format")
309
+ warnings << :invalid_boolean_format
310
+ end
311
+ elsif DATE_FORMAT_DATATYPES.include?(value["base"])
312
+ if value["format"].instance_of? String
313
+ begin
314
+ value["format"] = Csvlint::Csvw::DateFormat.new(value["format"])
315
+ rescue Csvlint::CsvDateFormatError
316
+ value.delete("format")
317
+ warnings << :invalid_date_format
318
+ end
319
+ else
320
+ value.delete("format")
321
+ warnings << :invalid_date_format
322
+ end
323
+ end
324
+ end
325
+ return value, warnings, :inherited
326
+ },
327
+ "required" => boolean_property(:inherited),
328
+ "ordered" => boolean_property(:inherited),
329
+ "aboutUrl" => string_property(:inherited),
330
+ "propertyUrl" => string_property(:inherited),
331
+ "valueUrl" => string_property(:inherited),
332
+ "textDirection" => lambda { |value, base_url, lang|
333
+ value = value.to_sym
334
+ return value, nil, :inherited if [:ltr, :rtl, :auto, :inherit].include? value
335
+ return :inherit, :invalid_value, :inherited
336
+ },
337
+ # column level properties
338
+ "virtual" => boolean_property(:column),
339
+ "titles" => natural_language_property(:column),
340
+ "name" => lambda { |value, base_url, lang|
341
+ return value, nil, :column if value.instance_of?(String) && value =~ NAME_REGEXP
342
+ return nil, :invalid_value, :column
343
+ },
344
+ # table level properties
345
+ "transformations" => lambda { |value, base_url, lang|
346
+ transformations = []
347
+ warnings = []
348
+ if value.instance_of? Array
349
+ value.each_with_index do |transformation,i|
350
+ if transformation.instance_of? Hash
351
+ transformation = transformation.clone
352
+ transformation.each do |p,v|
353
+ if p == "@id"
354
+ raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@id"), "@id starts with _:" if v =~ /^_:/
355
+ elsif p == "@type"
356
+ raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@type"), "@type of transformation is not 'Template'" if v != 'Template'
357
+ elsif p == "url"
358
+ elsif p == "titles"
359
+ else
360
+ v, warning, type = check_property(p, v, base_url, lang)
361
+ unless type == :transformation && (warning.nil? || warning.empty?)
362
+ value.delete(p)
363
+ warnings << :invalid_property unless type == :transformation
364
+ warnings += Array(warning)
365
+ end
366
+ end
367
+ end
368
+ transformations << transformation
369
+ else
370
+ warnings << :invalid_transformation
371
+ end
372
+ end
373
+ else
374
+ warnings << :invalid_value
375
+ end
376
+ return transformations, warnings, :table
377
+ },
378
+ "tableDirection" => lambda { |value, base_url, lang|
379
+ value = value.to_sym
380
+ return value, nil, :table if [:ltr, :rtl, :auto].include? value
381
+ return :auto, :invalid_value, :table
382
+ },
383
+ "tableSchema" => lambda { |value, base_url, lang|
384
+ schema_base_url = base_url
385
+ schema_lang = lang
386
+ if value.instance_of? String
387
+ schema_url = URI.join(base_url, value).to_s
388
+ schema_base_url = schema_url
389
+ schema_ref = schema_url.start_with?("file:") ? File.new(schema_url[5..-1]) : schema_url
390
+ schema = JSON.parse( open(schema_ref).read )
391
+ schema["@id"] = schema["@id"] ? URI.join(schema_url, schema["@id"]).to_s : schema_url
392
+ if schema["@context"]
393
+ if schema["@context"].instance_of?(Array) && schema["@context"].length > 1
394
+ schema_base_url = schema["@context"][1]["@base"] ? URI.join(schema_base_url, schema["@context"][1]["@base"]).to_s : schema_base_url
395
+ schema_lang = schema["@context"][1]["@language"] || schema_lang
396
+ end
397
+ schema.delete("@context")
398
+ end
399
+ elsif value.instance_of? Hash
400
+ schema = value.clone
401
+ else
402
+ return {}, :invalid_value, :table
403
+ end
404
+ warnings = []
405
+ schema.each do |p,v|
406
+ if p == "@id"
407
+ raise Csvlint::Csvw::MetadataError.new("tableSchema.@id"), "@id starts with _:" if v =~ /^_:/
408
+ elsif p == "@type"
409
+ raise Csvlint::Csvw::MetadataError.new("tableSchema.@type"), "@type of schema is not 'Schema'" if v != 'Schema'
410
+ else
411
+ v, warning, type = check_property(p, v, schema_base_url, schema_lang)
412
+ if (type == :schema || type == :inherited) && (warning.nil? || warning.empty?)
413
+ schema[p] = v
414
+ else
415
+ schema.delete(p)
416
+ warnings << :invalid_property unless (type == :schema || type == :inherited)
417
+ warnings += Array(warning)
418
+ end
419
+ end
420
+ end
421
+ return schema, warnings, :table
422
+ },
423
+ "url" => link_property(:table),
424
+ "dialect" => lambda { |value, base_url, lang|
425
+ if value.instance_of? Hash
426
+ value = value.clone
427
+ warnings = []
428
+ value.each do |p,v|
429
+ if p == "@id"
430
+ raise Csvlint::Csvw::MetadataError.new("dialect.@id"), "@id starts with _:" if v =~ /^_:/
431
+ elsif p == "@type"
432
+ raise Csvlint::Csvw::MetadataError.new("dialect.@type"), "@type of dialect is not 'Dialect'" if v != 'Dialect'
433
+ else
434
+ v, warning, type = check_property(p, v, base_url, lang)
435
+ if type == :dialect && (warning.nil? || warning.empty?)
436
+ value[p] = v
437
+ else
438
+ value.delete(p)
439
+ warnings << :invalid_property unless type == :dialect
440
+ warnings += Array(warning)
441
+ end
442
+ end
443
+ end
444
+ return value, warnings, :table
445
+ else
446
+ return {}, :invalid_value, :table
447
+ end
448
+ },
449
+ # dialect properties
450
+ "commentPrefix" => string_property(:dialect),
451
+ "delimiter" => string_property(:dialect),
452
+ "doubleQuote" => boolean_property(:dialect),
453
+ "encoding" => lambda { |value, base_url, lang|
454
+ return value, nil, :dialect if VALID_ENCODINGS.include? value
455
+ return nil, :invalid_value, :dialect
456
+ },
457
+ "header" => boolean_property(:dialect),
458
+ "headerRowCount" => numeric_property(:dialect),
459
+ "lineTerminators" => array_property(:dialect),
460
+ "quoteChar" => string_property(:dialect),
461
+ "skipBlankRows" => boolean_property(:dialect),
462
+ "skipColumns" => numeric_property(:dialect),
463
+ "skipInitialSpace" => boolean_property(:dialect),
464
+ "skipRows" => numeric_property(:dialect),
465
+ "trim" => lambda { |value, base_url, lang|
466
+ value = :true if value == true || value == "true"
467
+ value = :false if value == false || value == "false"
468
+ value = :start if value == "start"
469
+ value = :end if value == "end"
470
+ return value, nil, :dialect if [:true, :false, :start, :end].include? value
471
+ return true, :invalid_value, :dialect
472
+ },
473
+ # schema properties
474
+ "columns" => lambda { |value, base_url, lang| return value, nil, :schema },
475
+ "primaryKey" => column_reference_property(:schema),
476
+ "foreignKeys" => lambda { |value, base_url, lang|
477
+ foreign_keys = []
478
+ warnings = []
479
+ if value.instance_of? Array
480
+ value.each_with_index do |foreign_key,i|
481
+ if foreign_key.instance_of? Hash
482
+ foreign_key = foreign_key.clone
483
+ foreign_key.each do |p,v|
484
+ v, warning, type = check_property(p, v, base_url, lang)
485
+ if type == :foreign_key && (warning.nil? || warning.empty?)
486
+ foreign_key[p] = v
487
+ elsif p =~ /:/
488
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.#{p}"), "foreignKey includes a prefixed (common) property"
489
+ else
490
+ foreign_key.delete(p)
491
+ warnings << :invalid_property unless type == :foreign_key
492
+ warnings += Array(warning)
493
+ end
494
+ end
495
+ foreign_keys << foreign_key
496
+ else
497
+ warnings << :invalid_foreign_key
498
+ end
499
+ end
500
+ else
501
+ warnings << :invalid_value
502
+ end
503
+ return foreign_keys, warnings, :schema
504
+ },
505
+ "rowTitles" => column_reference_property(:schema),
506
+ # transformation properties
507
+ "targetFormat" => lambda { |value, base_url, lang| return value, nil, :transformation },
508
+ "scriptFormat" => lambda { |value, base_url, lang| return value, nil, :transformation },
509
+ "source" => lambda { |value, base_url, lang| return value, nil, :transformation },
510
+ # foreignKey properties
511
+ "columnReference" => column_reference_property(:foreign_key),
512
+ "reference" => lambda { |value, base_url, lang|
513
+ if value.instance_of? Hash
514
+ value = value.clone
515
+ warnings = []
516
+ value.each do |p,v|
517
+ if ["resource", "schemaReference", "columnReference"].include? p
518
+ v, warning, type = check_property(p, v, base_url, lang)
519
+ if warning.nil? || warning.empty?
520
+ value[p] = v
521
+ else
522
+ value.delete(p)
523
+ warnings += Array(warning)
524
+ end
525
+ elsif p =~ /:/
526
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.#{p}"), "foreignKey reference includes a prefixed (common) property"
527
+ else
528
+ value.delete(p)
529
+ warnings << :invalid_property
530
+ end
531
+ end
532
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.columnReference"), "foreignKey reference columnReference is missing" unless value["columnReference"]
533
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference does not have either resource or schemaReference" unless value["resource"] || value["schemaReference"]
534
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference has both resource and schemaReference" if value["resource"] && value["schemaReference"]
535
+ return value, warnings, :foreign_key
536
+ else
537
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference is not an object"
538
+ end
539
+ },
540
+ # foreignKey reference properties
541
+ "resource" => lambda { |value, base_url, lang| return value, nil, :foreign_key_reference },
542
+ "schemaReference" => lambda { |value, base_url, lang|
543
+ return URI.join(base_url, value).to_s, nil, :foreign_key_reference
544
+ }
545
+ }
546
+
547
+ NAMESPACES = {
548
+ "dcat" => "http://www.w3.org/ns/dcat#",
549
+ "qb" => "http://purl.org/linked-data/cube#",
550
+ "grddl" => "http://www.w3.org/2003/g/data-view#",
551
+ "ma" => "http://www.w3.org/ns/ma-ont#",
552
+ "org" => "http://www.w3.org/ns/org#",
553
+ "owl" => "http://www.w3.org/2002/07/owl#",
554
+ "prov" => "http://www.w3.org/ns/prov#",
555
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
556
+ "rdfa" => "http://www.w3.org/ns/rdfa#",
557
+ "rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
558
+ "rif" => "http://www.w3.org/2007/rif#",
559
+ "rr" => "http://www.w3.org/ns/r2rml#",
560
+ "sd" => "http://www.w3.org/ns/sparql-service-description#",
561
+ "skos" => "http://www.w3.org/2004/02/skos/core#",
562
+ "skosxl" => "http://www.w3.org/2008/05/skos-xl#",
563
+ "wdr" => "http://www.w3.org/2007/05/powder#",
564
+ "void" => "http://rdfs.org/ns/void#",
565
+ "wdrs" => "http://www.w3.org/2007/05/powder-s#",
566
+ "xhv" => "http://www.w3.org/1999/xhtml/vocab#",
567
+ "xml" => "http://www.w3.org/XML/1998/namespace",
568
+ "xsd" => "http://www.w3.org/2001/XMLSchema#",
569
+ "cc" => "http://creativecommons.org/ns#",
570
+ "ctag" => "http://commontag.org/ns#",
571
+ "dc" => "http://purl.org/dc/terms/",
572
+ "dcterms" => "http://purl.org/dc/terms/",
573
+ "dc11" => "http://purl.org/dc/elements/1.1/",
574
+ "foaf" => "http://xmlns.com/foaf/0.1/",
575
+ "gr" => "http://purl.org/goodrelations/v1#",
576
+ "ical" => "http://www.w3.org/2002/12/cal/icaltzd#",
577
+ "og" => "http://ogp.me/ns#",
578
+ "rev" => "http://purl.org/stuff/rev#",
579
+ "sioc" => "http://rdfs.org/sioc/ns#",
580
+ "v" => "http://rdf.data-vocabulary.org/#",
581
+ "vcard" => "http://www.w3.org/2006/vcard/ns#",
582
+ "schema" => "http://schema.org/"
583
+ }
584
+
585
+ BCP47_REGULAR_REGEXP = "(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)"
586
+ BCP47_IRREGULAR_REGEXP = "(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)"
587
+ BCP47_GRANDFATHERED_REGEXP = "(?<grandfathered>" + BCP47_IRREGULAR_REGEXP + "|" + BCP47_REGULAR_REGEXP + ")"
588
+ BCP47_PRIVATE_USE_REGEXP = "(?<privateUse>x(-[A-Za-z0-9]{1,8})+)"
589
+ BCP47_SINGLETON_REGEXP = "[0-9A-WY-Za-wy-z]"
590
+ BCP47_EXTENSION_REGEXP = "(?<extension>" + BCP47_SINGLETON_REGEXP + "(-[A-Za-z0-9]{2,8})+)"
591
+ BCP47_VARIANT_REGEXP = "(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3})"
592
+ BCP47_REGION_REGEXP = "(?<region>[A-Za-z]{2}|[0-9]{3})"
593
+ BCP47_SCRIPT_REGEXP = "(?<script>[A-Za-z]{4})"
594
+ BCP47_EXTLANG_REGEXP = "(?<extlang>[A-Za-z]{3}(-[A-Za-z]{3}){0,2})"
595
+ BCP47_LANGUAGE_REGEXP = "(?<language>([A-Za-z]{2,3}(-" + BCP47_EXTLANG_REGEXP + ")?)|[A-Za-z]{4}|[A-Za-z]{5,8})"
596
+ BCP47_LANGTAG_REGEXP = "(" + BCP47_LANGUAGE_REGEXP + "(-" + BCP47_SCRIPT_REGEXP + ")?" + "(-" + BCP47_REGION_REGEXP + ")?" + "(-" + BCP47_VARIANT_REGEXP + ")*" + "(-" + BCP47_EXTENSION_REGEXP + ")*" + "(-" + BCP47_PRIVATE_USE_REGEXP + ")?" + ")"
597
+ BCP47_LANGUAGETAG_REGEXP = "^(" + BCP47_GRANDFATHERED_REGEXP + "|" + BCP47_LANGTAG_REGEXP + "|" + BCP47_PRIVATE_USE_REGEXP + ")$"
598
+ BCP47_REGEXP = Regexp.new(BCP47_LANGUAGETAG_REGEXP)
599
+
600
+ NAME_REGEXP = /^([A-Za-z0-9]|(%[A-F0-9][A-F0-9]))([A-Za-z0-9_]|(%[A-F0-9][A-F0-9]))*$/
601
+
602
+ BUILT_IN_TYPES = ["TableGroup", "Table", "Schema", "Column", "Dialect", "Template", "Datatype"]
603
+
604
+ REGEXP_FORMAT_DATATYPES = [
605
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
606
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
607
+ "http://www.w3.org/ns/csvw#JSON",
608
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType",
609
+ "http://www.w3.org/2001/XMLSchema#anyURI",
610
+ "http://www.w3.org/2001/XMLSchema#base64Binary",
611
+ "http://www.w3.org/2001/XMLSchema#duration",
612
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration",
613
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration",
614
+ "http://www.w3.org/2001/XMLSchema#hexBinary",
615
+ "http://www.w3.org/2001/XMLSchema#QName",
616
+ "http://www.w3.org/2001/XMLSchema#string",
617
+ "http://www.w3.org/2001/XMLSchema#normalizedString",
618
+ "http://www.w3.org/2001/XMLSchema#token",
619
+ "http://www.w3.org/2001/XMLSchema#language",
620
+ "http://www.w3.org/2001/XMLSchema#Name",
621
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN"
622
+ ]
623
+
624
+ STRING_DATATYPES = [
625
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
626
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
627
+ "http://www.w3.org/ns/csvw#JSON",
628
+ "http://www.w3.org/2001/XMLSchema#string",
629
+ "http://www.w3.org/2001/XMLSchema#normalizedString",
630
+ "http://www.w3.org/2001/XMLSchema#token",
631
+ "http://www.w3.org/2001/XMLSchema#language",
632
+ "http://www.w3.org/2001/XMLSchema#Name",
633
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN"
634
+ ]
635
+
636
+ BINARY_DATATYPES = [
637
+ "http://www.w3.org/2001/XMLSchema#base64Binary",
638
+ "http://www.w3.org/2001/XMLSchema#hexBinary"
639
+ ]
640
+
641
+ NUMERIC_FORMAT_DATATYPES = [
642
+ "http://www.w3.org/2001/XMLSchema#decimal",
643
+ "http://www.w3.org/2001/XMLSchema#integer",
644
+ "http://www.w3.org/2001/XMLSchema#long",
645
+ "http://www.w3.org/2001/XMLSchema#int",
646
+ "http://www.w3.org/2001/XMLSchema#short",
647
+ "http://www.w3.org/2001/XMLSchema#byte",
648
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
649
+ "http://www.w3.org/2001/XMLSchema#positiveInteger",
650
+ "http://www.w3.org/2001/XMLSchema#unsignedLong",
651
+ "http://www.w3.org/2001/XMLSchema#unsignedInt",
652
+ "http://www.w3.org/2001/XMLSchema#unsignedShort",
653
+ "http://www.w3.org/2001/XMLSchema#unsignedByte",
654
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger",
655
+ "http://www.w3.org/2001/XMLSchema#negativeInteger",
656
+ "http://www.w3.org/2001/XMLSchema#double",
657
+ "http://www.w3.org/2001/XMLSchema#float"
658
+ ]
659
+
660
+ DATE_FORMAT_DATATYPES = [
661
+ "http://www.w3.org/2001/XMLSchema#date",
662
+ "http://www.w3.org/2001/XMLSchema#dateTime",
663
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp",
664
+ "http://www.w3.org/2001/XMLSchema#time"
665
+ ]
666
+
667
+ BUILT_IN_DATATYPES = {
668
+ "number" => "http://www.w3.org/2001/XMLSchema#double",
669
+ "binary" => "http://www.w3.org/2001/XMLSchema#base64Binary",
670
+ "datetime" => "http://www.w3.org/2001/XMLSchema#dateTime",
671
+ "any" => "http://www.w3.org/2001/XMLSchema#anyAtomicType",
672
+ "xml" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
673
+ "html" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
674
+ "json" => "http://www.w3.org/ns/csvw#JSON",
675
+ "anyAtomicType" => "http://www.w3.org/2001/XMLSchema#anyAtomicType",
676
+ "anyURI" => "http://www.w3.org/2001/XMLSchema#anyURI",
677
+ "base64Binary" => "http://www.w3.org/2001/XMLSchema#base64Binary",
678
+ "boolean" => "http://www.w3.org/2001/XMLSchema#boolean",
679
+ "date" => "http://www.w3.org/2001/XMLSchema#date",
680
+ "dateTime" => "http://www.w3.org/2001/XMLSchema#dateTime",
681
+ "dateTimeStamp" => "http://www.w3.org/2001/XMLSchema#dateTimeStamp",
682
+ "decimal" => "http://www.w3.org/2001/XMLSchema#decimal",
683
+ "integer" => "http://www.w3.org/2001/XMLSchema#integer",
684
+ "long" => "http://www.w3.org/2001/XMLSchema#long",
685
+ "int" => "http://www.w3.org/2001/XMLSchema#int",
686
+ "short" => "http://www.w3.org/2001/XMLSchema#short",
687
+ "byte" => "http://www.w3.org/2001/XMLSchema#byte",
688
+ "nonNegativeInteger" => "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
689
+ "positiveInteger" => "http://www.w3.org/2001/XMLSchema#positiveInteger",
690
+ "unsignedLong" => "http://www.w3.org/2001/XMLSchema#unsignedLong",
691
+ "unsignedInt" => "http://www.w3.org/2001/XMLSchema#unsignedInt",
692
+ "unsignedShort" => "http://www.w3.org/2001/XMLSchema#unsignedShort",
693
+ "unsignedByte" => "http://www.w3.org/2001/XMLSchema#unsignedByte",
694
+ "nonPositiveInteger" => "http://www.w3.org/2001/XMLSchema#nonPositiveInteger",
695
+ "negativeInteger" => "http://www.w3.org/2001/XMLSchema#negativeInteger",
696
+ "double" => "http://www.w3.org/2001/XMLSchema#double",
697
+ "duration" => "http://www.w3.org/2001/XMLSchema#duration",
698
+ "dayTimeDuration" => "http://www.w3.org/2001/XMLSchema#dayTimeDuration",
699
+ "yearMonthDuration" => "http://www.w3.org/2001/XMLSchema#yearMonthDuration",
700
+ "float" => "http://www.w3.org/2001/XMLSchema#float",
701
+ "gDay" => "http://www.w3.org/2001/XMLSchema#gDay",
702
+ "gMonth" => "http://www.w3.org/2001/XMLSchema#gMonth",
703
+ "gMonthDay" => "http://www.w3.org/2001/XMLSchema#gMonthDay",
704
+ "gYear" => "http://www.w3.org/2001/XMLSchema#gYear",
705
+ "gYearMonth" => "http://www.w3.org/2001/XMLSchema#gYearMonth",
706
+ "hexBinary" => "http://www.w3.org/2001/XMLSchema#hexBinary",
707
+ "QName" => "http://www.w3.org/2001/XMLSchema#QName",
708
+ "string" => "http://www.w3.org/2001/XMLSchema#string",
709
+ "normalizedString" => "http://www.w3.org/2001/XMLSchema#normalizedString",
710
+ "token" => "http://www.w3.org/2001/XMLSchema#token",
711
+ "language" => "http://www.w3.org/2001/XMLSchema#language",
712
+ "Name" => "http://www.w3.org/2001/XMLSchema#Name",
713
+ "NMTOKEN" => "http://www.w3.org/2001/XMLSchema#NMTOKEN",
714
+ "time" => "http://www.w3.org/2001/XMLSchema#time"
715
+ }
716
+
717
+ VALID_ENCODINGS = [
718
+ "utf-8",
719
+ "ibm866",
720
+ "iso-8859-2",
721
+ "iso-8859-3",
722
+ "iso-8859-4",
723
+ "iso-8859-5",
724
+ "iso-8859-6",
725
+ "iso-8859-7",
726
+ "iso-8859-8",
727
+ "iso-8859-8-i",
728
+ "iso-8859-10",
729
+ "iso-8859-13",
730
+ "iso-8859-14",
731
+ "iso-8859-15",
732
+ "iso-8859-16",
733
+ "koi8-r",
734
+ "koi8-u",
735
+ "macintosh",
736
+ "windows-874",
737
+ "windows-1250",
738
+ "windows-1251",
739
+ "windows-1252",
740
+ "windows-1253",
741
+ "windows-1254",
742
+ "windows-1255",
743
+ "windows-1256",
744
+ "windows-1257",
745
+ "windows-1258",
746
+ "x-mac-cyrillic",
747
+ "gb18030",
748
+ "hz-gb-2312",
749
+ "big5",
750
+ "euc-jp",
751
+ "iso-2022-jp",
752
+ "shift_jis",
753
+ "euc-kr",
754
+ "replacement",
755
+ "utf-16be",
756
+ "utf-16le",
757
+ "x-user-defined"
758
+ ]
759
+ end
760
+ end
761
+ end