csvlint 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.gitignore +7 -1
- data/CHANGELOG.md +19 -1
- data/README.md +93 -36
- data/bin/csvlint +68 -27
- data/csvlint.gemspec +2 -0
- data/features/csvw_schema_validation.feature +127 -0
- data/features/fixtures/spreadsheet.xlsx +0 -0
- data/features/sources.feature +3 -4
- data/features/step_definitions/parse_csv_steps.rb +13 -1
- data/features/step_definitions/schema_validation_steps.rb +27 -1
- data/features/step_definitions/sources_steps.rb +1 -1
- data/features/step_definitions/validation_errors_steps.rb +48 -1
- data/features/step_definitions/validation_info_steps.rb +5 -1
- data/features/step_definitions/validation_warnings_steps.rb +15 -1
- data/features/support/load_tests.rb +114 -0
- data/features/validation_errors.feature +12 -24
- data/features/validation_warnings.feature +18 -6
- data/lib/csvlint.rb +10 -0
- data/lib/csvlint/csvw/column.rb +359 -0
- data/lib/csvlint/csvw/date_format.rb +182 -0
- data/lib/csvlint/csvw/metadata_error.rb +13 -0
- data/lib/csvlint/csvw/number_format.rb +211 -0
- data/lib/csvlint/csvw/property_checker.rb +761 -0
- data/lib/csvlint/csvw/table.rb +204 -0
- data/lib/csvlint/csvw/table_group.rb +165 -0
- data/lib/csvlint/schema.rb +40 -23
- data/lib/csvlint/validate.rb +142 -19
- data/lib/csvlint/version.rb +1 -1
- data/spec/csvw/column_spec.rb +112 -0
- data/spec/csvw/date_format_spec.rb +49 -0
- data/spec/csvw/number_format_spec.rb +403 -0
- data/spec/csvw/table_group_spec.rb +143 -0
- data/spec/csvw/table_spec.rb +90 -0
- data/spec/schema_spec.rb +27 -1
- data/spec/spec_helper.rb +0 -1
- data/spec/validator_spec.rb +16 -10
- metadata +53 -2
@@ -0,0 +1,211 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class NumberFormat
|
4
|
+
|
5
|
+
attr_reader :pattern, :prefix, :numeric_part, :suffix, :grouping_separator, :decimal_separator, :primary_grouping_size, :secondary_grouping_size, :fractional_grouping_size
|
6
|
+
|
7
|
+
def initialize(pattern=nil, grouping_separator=nil, decimal_separator=".")
|
8
|
+
@pattern = pattern
|
9
|
+
@grouping_separator = grouping_separator || (@pattern.nil? ? nil : ",")
|
10
|
+
@decimal_separator = decimal_separator || "."
|
11
|
+
if pattern.nil?
|
12
|
+
@regexp = Regexp.new("^(([-+]?[0-9]+(#{Regexp.escape(@decimal_separator)}[0-9]+)?([Ee][-+]?[0-9]+)?[%‰]?)|NaN|INF|-INF)$")
|
13
|
+
else
|
14
|
+
numeric_part_regexp = Regexp.new("(?<numeric_part>([0#Ee]|#{Regexp.escape(@grouping_separator)}|#{Regexp.escape(@decimal_separator)})+)")
|
15
|
+
number_format_regexp = Regexp.new("^(?<prefix>.*?)#{numeric_part_regexp}(?<suffix>.*?)$")
|
16
|
+
match = number_format_regexp.match(pattern)
|
17
|
+
raise Csvw::NumberFormatError, "invalid number format" if match.nil?
|
18
|
+
|
19
|
+
@prefix = match["prefix"]
|
20
|
+
@numeric_part = match["numeric_part"]
|
21
|
+
@suffix = match["suffix"]
|
22
|
+
|
23
|
+
parts = @numeric_part.split("E")
|
24
|
+
mantissa_part = parts[0]
|
25
|
+
exponent_part = parts[1] || ""
|
26
|
+
mantissa_parts = mantissa_part.split(@decimal_separator)
|
27
|
+
# raise Csvw::NumberFormatError, "more than two decimal separators in number format" if parts.length > 2
|
28
|
+
integer_part = mantissa_parts[0]
|
29
|
+
fractional_part = mantissa_parts[1] || ""
|
30
|
+
|
31
|
+
@integer_pattern = exponent_part == "" && fractional_part == ""
|
32
|
+
|
33
|
+
min_integer_digits = integer_part.gsub(@grouping_separator, "").gsub("#", "").length
|
34
|
+
min_fraction_digits = fractional_part.gsub(@grouping_separator, "").gsub("#", "").length
|
35
|
+
max_fraction_digits = fractional_part.gsub(@grouping_separator, "").length
|
36
|
+
min_exponent_digits = exponent_part.gsub("#", "").length
|
37
|
+
max_exponent_digits = exponent_part.length
|
38
|
+
|
39
|
+
integer_parts = integer_part.split(@grouping_separator)[1..-1]
|
40
|
+
@primary_grouping_size = integer_parts[-1].length rescue 0
|
41
|
+
@secondary_grouping_size = integer_parts[-2].length rescue @primary_grouping_size
|
42
|
+
|
43
|
+
fractional_parts = fractional_part.split(@grouping_separator)[0..-2]
|
44
|
+
@fractional_grouping_size = fractional_parts[0].length rescue 0
|
45
|
+
|
46
|
+
numeric_part_regexp = "[-+]?"
|
47
|
+
|
48
|
+
if @primary_grouping_size == 0
|
49
|
+
integer_regexp = "[0-9]*[0-9]{#{min_integer_digits}}"
|
50
|
+
else
|
51
|
+
leading_regexp = "([0-9]{0,#{@secondary_grouping_size - 1}}#{Regexp.escape(@grouping_separator)})?"
|
52
|
+
secondary_groups = "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)})*"
|
53
|
+
if min_integer_digits > @primary_grouping_size
|
54
|
+
remaining_req_digits = min_integer_digits - @primary_grouping_size
|
55
|
+
req_secondary_groups = remaining_req_digits / @secondary_grouping_size > 0 ? "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}){#{remaining_req_digits / @secondary_grouping_size}}" : ""
|
56
|
+
if remaining_req_digits % @secondary_grouping_size > 0
|
57
|
+
final_req_digits = "[0-9]{#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
|
58
|
+
final_opt_digits = "[0-9]{0,#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
|
59
|
+
integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{remaining_req_digits % @secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
|
60
|
+
else
|
61
|
+
integer_regexp = "(#{leading_regexp}#{secondary_groups})?#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
|
62
|
+
end
|
63
|
+
else
|
64
|
+
final_req_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{#{@primary_grouping_size - min_integer_digits}}" : ""
|
65
|
+
final_opt_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{0,#{@primary_grouping_size - min_integer_digits}}" : ""
|
66
|
+
integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{min_integer_digits}}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
numeric_part_regexp += integer_regexp
|
71
|
+
|
72
|
+
if max_fraction_digits > 0
|
73
|
+
if @fractional_grouping_size == 0
|
74
|
+
fractional_regexp = ""
|
75
|
+
fractional_regexp += "[0-9]{#{min_fraction_digits}}" if min_fraction_digits > 0
|
76
|
+
fractional_regexp += "[0-9]{0,#{max_fraction_digits - min_fraction_digits}}" unless min_fraction_digits == max_fraction_digits
|
77
|
+
fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}"
|
78
|
+
fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0
|
79
|
+
numeric_part_regexp += fractional_regexp
|
80
|
+
else
|
81
|
+
fractional_regexp = ""
|
82
|
+
|
83
|
+
if min_fraction_digits > 0
|
84
|
+
if min_fraction_digits >= @fractional_grouping_size
|
85
|
+
# first group of required digits - something like "[0-9]{3}"
|
86
|
+
fractional_regexp += "[0-9]{#{@fractional_grouping_size}}"
|
87
|
+
# additional groups of required digits - something like "(,[0-9]{3}){1}"
|
88
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){#{min_fraction_digits / @fractional_grouping_size - 1}}" if min_fraction_digits / @fractional_grouping_size > 1
|
89
|
+
fractional_regexp += "#{Regexp.escape(@grouping_separator)}" if min_fraction_digits % @fractional_grouping_size > 0
|
90
|
+
end
|
91
|
+
# additional required digits - something like ",[0-9]{1}"
|
92
|
+
fractional_regexp += "[0-9]{#{min_fraction_digits % @fractional_grouping_size}}" if min_fraction_digits % @fractional_grouping_size > 0
|
93
|
+
|
94
|
+
opt_fractional_digits = max_fraction_digits - min_fraction_digits
|
95
|
+
if opt_fractional_digits > 0
|
96
|
+
fractional_regexp += "("
|
97
|
+
|
98
|
+
if min_fraction_digits % @fractional_grouping_size > 0
|
99
|
+
# optional fractional digits to complete the group
|
100
|
+
fractional_regexp += "[0-9]{0,#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}"
|
101
|
+
fractional_regexp += "|"
|
102
|
+
fractional_regexp += "[0-9]{#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}"
|
103
|
+
else
|
104
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?"
|
105
|
+
fractional_regexp += "|"
|
106
|
+
fractional_regexp += "#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}"
|
107
|
+
end
|
108
|
+
|
109
|
+
remaining_opt_fractional_digits = opt_fractional_digits - (@fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size))
|
110
|
+
if remaining_opt_fractional_digits > 0
|
111
|
+
if remaining_opt_fractional_digits % @fractional_grouping_size > 0
|
112
|
+
# optional fraction digits in groups
|
113
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{remaining_opt_fractional_digits / @fractional_grouping_size}}" if remaining_opt_fractional_digits > @fractional_grouping_size
|
114
|
+
# remaining optional fraction digits
|
115
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?"
|
116
|
+
else
|
117
|
+
# optional fraction digits in groups
|
118
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size
|
119
|
+
# remaining optional fraction digits
|
120
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?"
|
121
|
+
end
|
122
|
+
|
123
|
+
# optional fraction digits in groups
|
124
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size
|
125
|
+
# remaining optional fraction digits
|
126
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?" if remaining_opt_fractional_digits % @fractional_grouping_size > 0
|
127
|
+
end
|
128
|
+
fractional_regexp += ")"
|
129
|
+
end
|
130
|
+
elsif max_fraction_digits % @fractional_grouping_size > 0
|
131
|
+
# optional fractional digits in groups
|
132
|
+
fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{max_fraction_digits / @fractional_grouping_size}}"
|
133
|
+
# remaining optional fraction digits
|
134
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{max_fraction_digits % @fractional_grouping_size}})?" if max_fraction_digits % @fractional_grouping_size > 0
|
135
|
+
else
|
136
|
+
fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{(max_fraction_digits / @fractional_grouping_size) - 1}}" if max_fraction_digits > @fractional_grouping_size
|
137
|
+
fractional_regexp += "[0-9]{#{@fractional_grouping_size}}"
|
138
|
+
end
|
139
|
+
fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}"
|
140
|
+
fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0
|
141
|
+
numeric_part_regexp += fractional_regexp
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
if max_exponent_digits > 0
|
146
|
+
numeric_part_regexp += "E"
|
147
|
+
numeric_part_regexp += "[0-9]{0,#{max_exponent_digits - min_exponent_digits}}" unless max_exponent_digits == min_exponent_digits
|
148
|
+
numeric_part_regexp += "[0-9]{#{min_exponent_digits}}" unless min_exponent_digits == 0
|
149
|
+
end
|
150
|
+
|
151
|
+
@regexp = Regexp.new("^(?<prefix>#{Regexp.escape(@prefix)})(?<numeric_part>#{numeric_part_regexp})(?<suffix>#{suffix})$")
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def match(value)
|
156
|
+
value =~ @regexp ? true : false
|
157
|
+
end
|
158
|
+
|
159
|
+
def parse(value)
|
160
|
+
if @pattern.nil?
|
161
|
+
return nil if !@grouping_separator.nil? && value =~ Regexp.new("((^#{Regexp.escape(@grouping_separator)})|#{Regexp.escape(@grouping_separator)}{2})")
|
162
|
+
value.gsub!(@grouping_separator, "") unless @grouping_separator.nil?
|
163
|
+
if value =~ INTEGER_REGEXP
|
164
|
+
case value[-1]
|
165
|
+
when "%"
|
166
|
+
return value.to_f / 100
|
167
|
+
when "‰"
|
168
|
+
return value.to_f / 1000
|
169
|
+
else
|
170
|
+
return value.to_i
|
171
|
+
end
|
172
|
+
elsif value =~ @regexp
|
173
|
+
case value
|
174
|
+
when "NaN"
|
175
|
+
return Float::NAN
|
176
|
+
when "INF"
|
177
|
+
return Float::INFINITY
|
178
|
+
when "-INF"
|
179
|
+
return -Float::INFINITY
|
180
|
+
else
|
181
|
+
case value[-1]
|
182
|
+
when "%"
|
183
|
+
return value.to_f / 100
|
184
|
+
when "‰"
|
185
|
+
return value.to_f / 1000
|
186
|
+
else
|
187
|
+
return value.to_f
|
188
|
+
end
|
189
|
+
end
|
190
|
+
else
|
191
|
+
return nil
|
192
|
+
end
|
193
|
+
else
|
194
|
+
match = @regexp.match(value)
|
195
|
+
return nil if match.nil?
|
196
|
+
number = match["numeric_part"].gsub(@grouping_separator, "")
|
197
|
+
return number.to_i if @integer_pattern
|
198
|
+
return number.to_f
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
private
|
203
|
+
INTEGER_REGEXP = /^[-+]?[0-9]+[%‰]?$/
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
class NumberFormatError < StandardError
|
208
|
+
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,761 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class PropertyChecker
|
4
|
+
|
5
|
+
class << self
|
6
|
+
|
7
|
+
def check_property(property, value, base_url, lang)
|
8
|
+
if PROPERTIES.include? property
|
9
|
+
return PROPERTIES[property].call(value, base_url, lang)
|
10
|
+
elsif property =~ /^([a-z]+):/ && NAMESPACES.include?(property.split(":")[0])
|
11
|
+
value, warnings = check_common_property_value(value, base_url, lang)
|
12
|
+
return value, warnings, :annotation
|
13
|
+
else
|
14
|
+
return value, :invalid_property, nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def check_common_property_value(value, base_url, lang)
|
20
|
+
case value
|
21
|
+
when Hash
|
22
|
+
value = value.clone
|
23
|
+
warnings = []
|
24
|
+
value.each do |p,v|
|
25
|
+
case p
|
26
|
+
when "@context"
|
27
|
+
raise Csvlint::Csvw::MetadataError.new(p), "common property has @context property"
|
28
|
+
when "@list"
|
29
|
+
raise Csvlint::Csvw::MetadataError.new(p), "common property has @list property"
|
30
|
+
when "@set"
|
31
|
+
raise Csvlint::Csvw::MetadataError.new(p), "common property has @set property"
|
32
|
+
when "@type"
|
33
|
+
if value["@value"] && BUILT_IN_DATATYPES.include?(v)
|
34
|
+
elsif !value["@value"] && BUILT_IN_TYPES.include?(v)
|
35
|
+
elsif v =~ /^([a-z]+):/ && NAMESPACES.include?(v.split(":")[0])
|
36
|
+
else
|
37
|
+
# must be an absolute URI
|
38
|
+
begin
|
39
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @type (#{v})" if URI(v).scheme.nil?
|
40
|
+
rescue
|
41
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @type (#{v})"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
when "@id"
|
45
|
+
unless base_url.nil?
|
46
|
+
begin
|
47
|
+
v = URI.join(base_url, v)
|
48
|
+
rescue
|
49
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @id (#{v})"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
when "@value"
|
53
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property with @value has both @language and @type" if value["@type"] && value["@language"]
|
54
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property with @value has properties other than @language or @type" unless value.except("@type").except("@language").except("@value").empty?
|
55
|
+
when "@language"
|
56
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property with @language lacks a @value" unless value["@value"]
|
57
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @language (#{v})" unless v =~ BCP47_LANGUAGE_REGEXP || v.nil?
|
58
|
+
else
|
59
|
+
if p[0] == "@"
|
60
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has property other than @id, @type, @value or @language beginning with @ (#{p})"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
if v.instance_of? Hash
|
64
|
+
v, w = check_common_property_value(v, base_url, lang)
|
65
|
+
warnings += Array(w)
|
66
|
+
end
|
67
|
+
value[p] = v
|
68
|
+
end
|
69
|
+
return value, warnings
|
70
|
+
else
|
71
|
+
return value, nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def convert_value_facet(value, property, datatype)
|
76
|
+
if value[property]
|
77
|
+
if DATE_FORMAT_DATATYPES.include?(datatype)
|
78
|
+
format = Csvlint::Csvw::DateFormat.new(nil, datatype)
|
79
|
+
v = format.parse(value[property])
|
80
|
+
if v.nil?
|
81
|
+
value.delete(property)
|
82
|
+
return [":invalid_#{property}".to_sym]
|
83
|
+
else
|
84
|
+
value[property] = v
|
85
|
+
return []
|
86
|
+
end
|
87
|
+
elsif NUMERIC_FORMAT_DATATYPES.include?(datatype)
|
88
|
+
return []
|
89
|
+
else
|
90
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.#{property}"), "#{property} is only allowed for numeric, date/time and duration types"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
return []
|
94
|
+
end
|
95
|
+
|
96
|
+
def array_property(type)
|
97
|
+
return lambda { |value, base_url, lang|
|
98
|
+
return value, nil, type if value.instance_of? Array
|
99
|
+
return false, :invalid_value, type
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
def boolean_property(type)
|
104
|
+
return lambda { |value, base_url, lang|
|
105
|
+
return value, nil, type if value == true || value == false
|
106
|
+
return false, :invalid_value, type
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
def string_property(type)
|
111
|
+
return lambda { |value, base_url, lang|
|
112
|
+
return value, nil, type if value.instance_of? String
|
113
|
+
return "", :invalid_value, type
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
def numeric_property(type)
|
118
|
+
return lambda { |value, base_url, lang|
|
119
|
+
return value, nil, type if value.kind_of?(Integer) && value >= 0
|
120
|
+
return nil, :invalid_value, type
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
124
|
+
def link_property(type)
|
125
|
+
return lambda { |value, base_url, lang|
|
126
|
+
raise Csvlint::Csvw::MetadataError.new(), "URL #{value} starts with _:" if value.to_s =~ /^_:/
|
127
|
+
return (base_url.nil? ? URI(value) : URI.join(base_url, value)), nil, type if value.instance_of? String
|
128
|
+
return base_url, :invalid_value, type
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
def language_property(type)
|
133
|
+
return lambda { |value, base_url, lang|
|
134
|
+
return value, nil, type if value =~ BCP47_REGEXP
|
135
|
+
return nil, :invalid_value, type
|
136
|
+
}
|
137
|
+
end
|
138
|
+
|
139
|
+
def natural_language_property(type)
|
140
|
+
return lambda { |value, base_url, lang|
|
141
|
+
warnings = []
|
142
|
+
if value.instance_of? String
|
143
|
+
return { lang => [ value ] }, nil, type
|
144
|
+
elsif value.instance_of? Array
|
145
|
+
valid_titles = []
|
146
|
+
value.each do |title|
|
147
|
+
if title.instance_of? String
|
148
|
+
valid_titles << title
|
149
|
+
else
|
150
|
+
warnings << :invalid_value
|
151
|
+
end
|
152
|
+
end
|
153
|
+
return { lang => valid_titles }, warnings, type
|
154
|
+
elsif value.instance_of? Hash
|
155
|
+
value = value.clone
|
156
|
+
value.each do |l,v|
|
157
|
+
if l =~ BCP47_REGEXP
|
158
|
+
valid_titles = []
|
159
|
+
Array(v).each do |title|
|
160
|
+
if title.instance_of? String
|
161
|
+
valid_titles << title
|
162
|
+
else
|
163
|
+
warnings << :invalid_value
|
164
|
+
end
|
165
|
+
end
|
166
|
+
value[l] = valid_titles
|
167
|
+
else
|
168
|
+
value.delete(l)
|
169
|
+
warnings << :invalid_language
|
170
|
+
end
|
171
|
+
end
|
172
|
+
warnings << :invalid_value if value.empty?
|
173
|
+
return value, warnings, type
|
174
|
+
else
|
175
|
+
return {}, :invalid_value, type
|
176
|
+
end
|
177
|
+
}
|
178
|
+
end
|
179
|
+
|
180
|
+
def column_reference_property(type)
|
181
|
+
return lambda { |value, base_url, lang|
|
182
|
+
return Array(value), nil, type
|
183
|
+
}
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
PROPERTIES = {
|
190
|
+
# context properties
|
191
|
+
"@language" => language_property(:context),
|
192
|
+
"@base" => link_property(:context),
|
193
|
+
# common properties
|
194
|
+
"@id" => link_property(:common),
|
195
|
+
"notes" => array_property(:common),
|
196
|
+
"suppressOutput" => boolean_property(:common),
|
197
|
+
# inherited properties
|
198
|
+
"null" => lambda { |value, base_url, lang|
|
199
|
+
case value
|
200
|
+
when String
|
201
|
+
return [value], nil, :inherited
|
202
|
+
when Array
|
203
|
+
values = []
|
204
|
+
warnings = []
|
205
|
+
value.each do |v|
|
206
|
+
if v.instance_of? String
|
207
|
+
values << v
|
208
|
+
else
|
209
|
+
warnings << :invalid_value
|
210
|
+
end
|
211
|
+
end
|
212
|
+
return values, warnings, :inherited
|
213
|
+
else
|
214
|
+
return [""], :invalid_value, :inherited
|
215
|
+
end
|
216
|
+
},
|
217
|
+
"default" => string_property(:inherited),
|
218
|
+
"separator" => lambda { |value, base_url, lang|
|
219
|
+
return value, nil, :inherited if value.instance_of?(String) || value.nil?
|
220
|
+
return nil, :invalid_value, :inherited
|
221
|
+
},
|
222
|
+
"lang" => language_property(:inherited),
|
223
|
+
"datatype" => lambda { |value, base_url, lang|
|
224
|
+
value = value.clone
|
225
|
+
warnings = []
|
226
|
+
if value.instance_of? Hash
|
227
|
+
if value["@id"]
|
228
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.@id"), "datatype @id must not be the id of a built-in datatype (#{value["@id"]})" if BUILT_IN_DATATYPES.values.include?(value["@id"])
|
229
|
+
v,w,t = PROPERTIES["@id"].call(value["@id"], base_url, lang)
|
230
|
+
unless w.nil?
|
231
|
+
warnings << w
|
232
|
+
value.delete("@id")
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
if value["base"]
|
237
|
+
if BUILT_IN_DATATYPES.include? value["base"]
|
238
|
+
value["base"] = BUILT_IN_DATATYPES[value["base"]]
|
239
|
+
else
|
240
|
+
value["base"] = BUILT_IN_DATATYPES["string"]
|
241
|
+
warnings << :invalid_datatype_base
|
242
|
+
end
|
243
|
+
else
|
244
|
+
value["base"] = BUILT_IN_DATATYPES["string"]
|
245
|
+
end
|
246
|
+
elsif BUILT_IN_DATATYPES.include? value
|
247
|
+
value = { "@id" => BUILT_IN_DATATYPES[value] }
|
248
|
+
else
|
249
|
+
value = { "@id" => BUILT_IN_DATATYPES["string"] }
|
250
|
+
warnings << :invalid_value
|
251
|
+
end
|
252
|
+
|
253
|
+
unless STRING_DATATYPES.include?(value["base"]) || BINARY_DATATYPES.include?(value["base"])
|
254
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.length"), "datatypes based on #{value["base"]} cannot have a length facet" if value["length"]
|
255
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.minLength"), "datatypes based on #{value["base"]} cannot have a minLength facet" if value["minLength"]
|
256
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.maxLength"), "datatypes based on #{value["base"]} cannot have a maxLength facet" if value["maxLength"]
|
257
|
+
end
|
258
|
+
|
259
|
+
if value["minimum"]
|
260
|
+
value["minInclusive"] = value["minimum"]
|
261
|
+
value.delete("minimum")
|
262
|
+
end
|
263
|
+
if value["maximum"]
|
264
|
+
value["maxInclusive"] = value["maximum"]
|
265
|
+
value.delete("maximum")
|
266
|
+
end
|
267
|
+
|
268
|
+
warnings += convert_value_facet(value, "minInclusive", value["base"])
|
269
|
+
warnings += convert_value_facet(value, "minExclusive", value["base"])
|
270
|
+
warnings += convert_value_facet(value, "maxInclusive", value["base"])
|
271
|
+
warnings += convert_value_facet(value, "maxExclusive", value["base"])
|
272
|
+
|
273
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both minimum/minInclusive (#{value["minInclusive"]}) and minExclusive (#{value["minExclusive"]}" if value["minInclusive"] && value["minExclusive"]
|
274
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both maximum/maxInclusive (#{value["maxInclusive"]}) and maxExclusive (#{value["maxExclusive"]}" if value["maxInclusive"] && value["maxExclusive"]
|
275
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{value["minInclusive"]}) cannot be more than maxInclusive (#{value["maxInclusive"]}" if value["minInclusive"] && value["maxInclusive"] && value["minInclusive"] > value["maxInclusive"]
|
276
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{value["minInclusive"]}) cannot be more than or equal to maxExclusive (#{value["maxExclusive"]}" if value["minInclusive"] && value["maxExclusive"] && value["minInclusive"] >= value["maxExclusive"]
|
277
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{value["minExclusive"]}) cannot be more than or equal to maxExclusive (#{value["maxExclusive"]}" if value["minExclusive"] && value["maxExclusive"] && value["minExclusive"] > value["maxExclusive"]
|
278
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{value["minExclusive"]}) cannot be more than maxInclusive (#{value["maxInclusive"]}" if value["minExclusive"] && value["maxInclusive"] && value["minExclusive"] >= value["maxInclusive"]
|
279
|
+
|
280
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be less than minLength (#{value["minLength"]}" if value["length"] && value["minLength"] && value["length"] < value["minLength"]
|
281
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["length"] && value["maxLength"] && value["length"] > value["maxLength"]
|
282
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minLength (#{value["minLength"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["minLength"] && value["maxLength"] && value["minLength"] > value["maxLength"]
|
283
|
+
|
284
|
+
if value["format"]
|
285
|
+
if REGEXP_FORMAT_DATATYPES.include?(value["base"])
|
286
|
+
begin
|
287
|
+
value["format"] = Regexp.new(value["format"])
|
288
|
+
rescue RegexpError
|
289
|
+
value.delete("format")
|
290
|
+
warnings << :invalid_regex
|
291
|
+
end
|
292
|
+
elsif NUMERIC_FORMAT_DATATYPES.include?(value["base"])
|
293
|
+
value["format"] = { "pattern" => value["format"] } if value["format"].instance_of? String
|
294
|
+
begin
|
295
|
+
value["format"] = Csvlint::Csvw::NumberFormat.new(value["format"]["pattern"], value["format"]["groupChar"], value["format"]["decimalChar"] || ".")
|
296
|
+
rescue Csvlint::Csvw::NumberFormatError
|
297
|
+
value["format"] = Csvlint::Csvw::NumberFormat.new(nil, value["format"]["groupChar"], value["format"]["decimalChar"] || ".")
|
298
|
+
warnings << :invalid_number_format
|
299
|
+
end
|
300
|
+
elsif value["base"] == "http://www.w3.org/2001/XMLSchema#boolean"
|
301
|
+
if value["format"].instance_of? String
|
302
|
+
value["format"] = value["format"].split("|")
|
303
|
+
unless value["format"].length == 2
|
304
|
+
value.delete("format")
|
305
|
+
warnings << :invalid_boolean_format
|
306
|
+
end
|
307
|
+
else
|
308
|
+
value.delete("format")
|
309
|
+
warnings << :invalid_boolean_format
|
310
|
+
end
|
311
|
+
elsif DATE_FORMAT_DATATYPES.include?(value["base"])
|
312
|
+
if value["format"].instance_of? String
|
313
|
+
begin
|
314
|
+
value["format"] = Csvlint::Csvw::DateFormat.new(value["format"])
|
315
|
+
rescue Csvlint::CsvDateFormatError
|
316
|
+
value.delete("format")
|
317
|
+
warnings << :invalid_date_format
|
318
|
+
end
|
319
|
+
else
|
320
|
+
value.delete("format")
|
321
|
+
warnings << :invalid_date_format
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
325
|
+
return value, warnings, :inherited
|
326
|
+
},
|
327
|
+
"required" => boolean_property(:inherited),
|
328
|
+
"ordered" => boolean_property(:inherited),
|
329
|
+
"aboutUrl" => string_property(:inherited),
|
330
|
+
"propertyUrl" => string_property(:inherited),
|
331
|
+
"valueUrl" => string_property(:inherited),
|
332
|
+
"textDirection" => lambda { |value, base_url, lang|
|
333
|
+
value = value.to_sym
|
334
|
+
return value, nil, :inherited if [:ltr, :rtl, :auto, :inherit].include? value
|
335
|
+
return :inherit, :invalid_value, :inherited
|
336
|
+
},
|
337
|
+
# column level properties
|
338
|
+
"virtual" => boolean_property(:column),
|
339
|
+
"titles" => natural_language_property(:column),
|
340
|
+
"name" => lambda { |value, base_url, lang|
|
341
|
+
return value, nil, :column if value.instance_of?(String) && value =~ NAME_REGEXP
|
342
|
+
return nil, :invalid_value, :column
|
343
|
+
},
|
344
|
+
# table level properties
|
345
|
+
"transformations" => lambda { |value, base_url, lang|
|
346
|
+
transformations = []
|
347
|
+
warnings = []
|
348
|
+
if value.instance_of? Array
|
349
|
+
value.each_with_index do |transformation,i|
|
350
|
+
if transformation.instance_of? Hash
|
351
|
+
transformation = transformation.clone
|
352
|
+
transformation.each do |p,v|
|
353
|
+
if p == "@id"
|
354
|
+
raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@id"), "@id starts with _:" if v =~ /^_:/
|
355
|
+
elsif p == "@type"
|
356
|
+
raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@type"), "@type of transformation is not 'Template'" if v != 'Template'
|
357
|
+
elsif p == "url"
|
358
|
+
elsif p == "titles"
|
359
|
+
else
|
360
|
+
v, warning, type = check_property(p, v, base_url, lang)
|
361
|
+
unless type == :transformation && (warning.nil? || warning.empty?)
|
362
|
+
value.delete(p)
|
363
|
+
warnings << :invalid_property unless type == :transformation
|
364
|
+
warnings += Array(warning)
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
transformations << transformation
|
369
|
+
else
|
370
|
+
warnings << :invalid_transformation
|
371
|
+
end
|
372
|
+
end
|
373
|
+
else
|
374
|
+
warnings << :invalid_value
|
375
|
+
end
|
376
|
+
return transformations, warnings, :table
|
377
|
+
},
|
378
|
+
"tableDirection" => lambda { |value, base_url, lang|
|
379
|
+
value = value.to_sym
|
380
|
+
return value, nil, :table if [:ltr, :rtl, :auto].include? value
|
381
|
+
return :auto, :invalid_value, :table
|
382
|
+
},
|
383
|
+
"tableSchema" => lambda { |value, base_url, lang|
|
384
|
+
schema_base_url = base_url
|
385
|
+
schema_lang = lang
|
386
|
+
if value.instance_of? String
|
387
|
+
schema_url = URI.join(base_url, value).to_s
|
388
|
+
schema_base_url = schema_url
|
389
|
+
schema_ref = schema_url.start_with?("file:") ? File.new(schema_url[5..-1]) : schema_url
|
390
|
+
schema = JSON.parse( open(schema_ref).read )
|
391
|
+
schema["@id"] = schema["@id"] ? URI.join(schema_url, schema["@id"]).to_s : schema_url
|
392
|
+
if schema["@context"]
|
393
|
+
if schema["@context"].instance_of?(Array) && schema["@context"].length > 1
|
394
|
+
schema_base_url = schema["@context"][1]["@base"] ? URI.join(schema_base_url, schema["@context"][1]["@base"]).to_s : schema_base_url
|
395
|
+
schema_lang = schema["@context"][1]["@language"] || schema_lang
|
396
|
+
end
|
397
|
+
schema.delete("@context")
|
398
|
+
end
|
399
|
+
elsif value.instance_of? Hash
|
400
|
+
schema = value.clone
|
401
|
+
else
|
402
|
+
return {}, :invalid_value, :table
|
403
|
+
end
|
404
|
+
warnings = []
|
405
|
+
schema.each do |p,v|
|
406
|
+
if p == "@id"
|
407
|
+
raise Csvlint::Csvw::MetadataError.new("tableSchema.@id"), "@id starts with _:" if v =~ /^_:/
|
408
|
+
elsif p == "@type"
|
409
|
+
raise Csvlint::Csvw::MetadataError.new("tableSchema.@type"), "@type of schema is not 'Schema'" if v != 'Schema'
|
410
|
+
else
|
411
|
+
v, warning, type = check_property(p, v, schema_base_url, schema_lang)
|
412
|
+
if (type == :schema || type == :inherited) && (warning.nil? || warning.empty?)
|
413
|
+
schema[p] = v
|
414
|
+
else
|
415
|
+
schema.delete(p)
|
416
|
+
warnings << :invalid_property unless (type == :schema || type == :inherited)
|
417
|
+
warnings += Array(warning)
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
421
|
+
return schema, warnings, :table
|
422
|
+
},
|
423
|
+
"url" => link_property(:table),
|
424
|
+
"dialect" => lambda { |value, base_url, lang|
|
425
|
+
if value.instance_of? Hash
|
426
|
+
value = value.clone
|
427
|
+
warnings = []
|
428
|
+
value.each do |p,v|
|
429
|
+
if p == "@id"
|
430
|
+
raise Csvlint::Csvw::MetadataError.new("dialect.@id"), "@id starts with _:" if v =~ /^_:/
|
431
|
+
elsif p == "@type"
|
432
|
+
raise Csvlint::Csvw::MetadataError.new("dialect.@type"), "@type of dialect is not 'Dialect'" if v != 'Dialect'
|
433
|
+
else
|
434
|
+
v, warning, type = check_property(p, v, base_url, lang)
|
435
|
+
if type == :dialect && (warning.nil? || warning.empty?)
|
436
|
+
value[p] = v
|
437
|
+
else
|
438
|
+
value.delete(p)
|
439
|
+
warnings << :invalid_property unless type == :dialect
|
440
|
+
warnings += Array(warning)
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
444
|
+
return value, warnings, :table
|
445
|
+
else
|
446
|
+
return {}, :invalid_value, :table
|
447
|
+
end
|
448
|
+
},
|
449
|
+
# dialect properties
|
450
|
+
"commentPrefix" => string_property(:dialect),
|
451
|
+
"delimiter" => string_property(:dialect),
|
452
|
+
"doubleQuote" => boolean_property(:dialect),
|
453
|
+
"encoding" => lambda { |value, base_url, lang|
|
454
|
+
return value, nil, :dialect if VALID_ENCODINGS.include? value
|
455
|
+
return nil, :invalid_value, :dialect
|
456
|
+
},
|
457
|
+
"header" => boolean_property(:dialect),
|
458
|
+
"headerRowCount" => numeric_property(:dialect),
|
459
|
+
"lineTerminators" => array_property(:dialect),
|
460
|
+
"quoteChar" => string_property(:dialect),
|
461
|
+
"skipBlankRows" => boolean_property(:dialect),
|
462
|
+
"skipColumns" => numeric_property(:dialect),
|
463
|
+
"skipInitialSpace" => boolean_property(:dialect),
|
464
|
+
"skipRows" => numeric_property(:dialect),
|
465
|
+
"trim" => lambda { |value, base_url, lang|
|
466
|
+
value = :true if value == true || value == "true"
|
467
|
+
value = :false if value == false || value == "false"
|
468
|
+
value = :start if value == "start"
|
469
|
+
value = :end if value == "end"
|
470
|
+
return value, nil, :dialect if [:true, :false, :start, :end].include? value
|
471
|
+
return true, :invalid_value, :dialect
|
472
|
+
},
|
473
|
+
# schema properties
|
474
|
+
"columns" => lambda { |value, base_url, lang| return value, nil, :schema },
|
475
|
+
"primaryKey" => column_reference_property(:schema),
|
476
|
+
"foreignKeys" => lambda { |value, base_url, lang|
|
477
|
+
foreign_keys = []
|
478
|
+
warnings = []
|
479
|
+
if value.instance_of? Array
|
480
|
+
value.each_with_index do |foreign_key,i|
|
481
|
+
if foreign_key.instance_of? Hash
|
482
|
+
foreign_key = foreign_key.clone
|
483
|
+
foreign_key.each do |p,v|
|
484
|
+
v, warning, type = check_property(p, v, base_url, lang)
|
485
|
+
if type == :foreign_key && (warning.nil? || warning.empty?)
|
486
|
+
foreign_key[p] = v
|
487
|
+
elsif p =~ /:/
|
488
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.#{p}"), "foreignKey includes a prefixed (common) property"
|
489
|
+
else
|
490
|
+
foreign_key.delete(p)
|
491
|
+
warnings << :invalid_property unless type == :foreign_key
|
492
|
+
warnings += Array(warning)
|
493
|
+
end
|
494
|
+
end
|
495
|
+
foreign_keys << foreign_key
|
496
|
+
else
|
497
|
+
warnings << :invalid_foreign_key
|
498
|
+
end
|
499
|
+
end
|
500
|
+
else
|
501
|
+
warnings << :invalid_value
|
502
|
+
end
|
503
|
+
return foreign_keys, warnings, :schema
|
504
|
+
},
|
505
|
+
"rowTitles" => column_reference_property(:schema),
|
506
|
+
# transformation properties
|
507
|
+
"targetFormat" => lambda { |value, base_url, lang| return value, nil, :transformation },
|
508
|
+
"scriptFormat" => lambda { |value, base_url, lang| return value, nil, :transformation },
|
509
|
+
"source" => lambda { |value, base_url, lang| return value, nil, :transformation },
|
510
|
+
# foreignKey properties
|
511
|
+
"columnReference" => column_reference_property(:foreign_key),
|
512
|
+
"reference" => lambda { |value, base_url, lang|
|
513
|
+
if value.instance_of? Hash
|
514
|
+
value = value.clone
|
515
|
+
warnings = []
|
516
|
+
value.each do |p,v|
|
517
|
+
if ["resource", "schemaReference", "columnReference"].include? p
|
518
|
+
v, warning, type = check_property(p, v, base_url, lang)
|
519
|
+
if warning.nil? || warning.empty?
|
520
|
+
value[p] = v
|
521
|
+
else
|
522
|
+
value.delete(p)
|
523
|
+
warnings += Array(warning)
|
524
|
+
end
|
525
|
+
elsif p =~ /:/
|
526
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.#{p}"), "foreignKey reference includes a prefixed (common) property"
|
527
|
+
else
|
528
|
+
value.delete(p)
|
529
|
+
warnings << :invalid_property
|
530
|
+
end
|
531
|
+
end
|
532
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.columnReference"), "foreignKey reference columnReference is missing" unless value["columnReference"]
|
533
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference does not have either resource or schemaReference" unless value["resource"] || value["schemaReference"]
|
534
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference has both resource and schemaReference" if value["resource"] && value["schemaReference"]
|
535
|
+
return value, warnings, :foreign_key
|
536
|
+
else
|
537
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference is not an object"
|
538
|
+
end
|
539
|
+
},
|
540
|
+
# foreignKey reference properties
|
541
|
+
"resource" => lambda { |value, base_url, lang| return value, nil, :foreign_key_reference },
|
542
|
+
"schemaReference" => lambda { |value, base_url, lang|
|
543
|
+
return URI.join(base_url, value).to_s, nil, :foreign_key_reference
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
547
|
+
NAMESPACES = {
|
548
|
+
"dcat" => "http://www.w3.org/ns/dcat#",
|
549
|
+
"qb" => "http://purl.org/linked-data/cube#",
|
550
|
+
"grddl" => "http://www.w3.org/2003/g/data-view#",
|
551
|
+
"ma" => "http://www.w3.org/ns/ma-ont#",
|
552
|
+
"org" => "http://www.w3.org/ns/org#",
|
553
|
+
"owl" => "http://www.w3.org/2002/07/owl#",
|
554
|
+
"prov" => "http://www.w3.org/ns/prov#",
|
555
|
+
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
556
|
+
"rdfa" => "http://www.w3.org/ns/rdfa#",
|
557
|
+
"rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
|
558
|
+
"rif" => "http://www.w3.org/2007/rif#",
|
559
|
+
"rr" => "http://www.w3.org/ns/r2rml#",
|
560
|
+
"sd" => "http://www.w3.org/ns/sparql-service-description#",
|
561
|
+
"skos" => "http://www.w3.org/2004/02/skos/core#",
|
562
|
+
"skosxl" => "http://www.w3.org/2008/05/skos-xl#",
|
563
|
+
"wdr" => "http://www.w3.org/2007/05/powder#",
|
564
|
+
"void" => "http://rdfs.org/ns/void#",
|
565
|
+
"wdrs" => "http://www.w3.org/2007/05/powder-s#",
|
566
|
+
"xhv" => "http://www.w3.org/1999/xhtml/vocab#",
|
567
|
+
"xml" => "http://www.w3.org/XML/1998/namespace",
|
568
|
+
"xsd" => "http://www.w3.org/2001/XMLSchema#",
|
569
|
+
"cc" => "http://creativecommons.org/ns#",
|
570
|
+
"ctag" => "http://commontag.org/ns#",
|
571
|
+
"dc" => "http://purl.org/dc/terms/",
|
572
|
+
"dcterms" => "http://purl.org/dc/terms/",
|
573
|
+
"dc11" => "http://purl.org/dc/elements/1.1/",
|
574
|
+
"foaf" => "http://xmlns.com/foaf/0.1/",
|
575
|
+
"gr" => "http://purl.org/goodrelations/v1#",
|
576
|
+
"ical" => "http://www.w3.org/2002/12/cal/icaltzd#",
|
577
|
+
"og" => "http://ogp.me/ns#",
|
578
|
+
"rev" => "http://purl.org/stuff/rev#",
|
579
|
+
"sioc" => "http://rdfs.org/sioc/ns#",
|
580
|
+
"v" => "http://rdf.data-vocabulary.org/#",
|
581
|
+
"vcard" => "http://www.w3.org/2006/vcard/ns#",
|
582
|
+
"schema" => "http://schema.org/"
|
583
|
+
}
|
584
|
+
|
585
|
+
BCP47_REGULAR_REGEXP = "(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)"
|
586
|
+
BCP47_IRREGULAR_REGEXP = "(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)"
|
587
|
+
BCP47_GRANDFATHERED_REGEXP = "(?<grandfathered>" + BCP47_IRREGULAR_REGEXP + "|" + BCP47_REGULAR_REGEXP + ")"
|
588
|
+
BCP47_PRIVATE_USE_REGEXP = "(?<privateUse>x(-[A-Za-z0-9]{1,8})+)"
|
589
|
+
BCP47_SINGLETON_REGEXP = "[0-9A-WY-Za-wy-z]"
|
590
|
+
BCP47_EXTENSION_REGEXP = "(?<extension>" + BCP47_SINGLETON_REGEXP + "(-[A-Za-z0-9]{2,8})+)"
|
591
|
+
BCP47_VARIANT_REGEXP = "(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3})"
|
592
|
+
BCP47_REGION_REGEXP = "(?<region>[A-Za-z]{2}|[0-9]{3})"
|
593
|
+
BCP47_SCRIPT_REGEXP = "(?<script>[A-Za-z]{4})"
|
594
|
+
BCP47_EXTLANG_REGEXP = "(?<extlang>[A-Za-z]{3}(-[A-Za-z]{3}){0,2})"
|
595
|
+
BCP47_LANGUAGE_REGEXP = "(?<language>([A-Za-z]{2,3}(-" + BCP47_EXTLANG_REGEXP + ")?)|[A-Za-z]{4}|[A-Za-z]{5,8})"
|
596
|
+
BCP47_LANGTAG_REGEXP = "(" + BCP47_LANGUAGE_REGEXP + "(-" + BCP47_SCRIPT_REGEXP + ")?" + "(-" + BCP47_REGION_REGEXP + ")?" + "(-" + BCP47_VARIANT_REGEXP + ")*" + "(-" + BCP47_EXTENSION_REGEXP + ")*" + "(-" + BCP47_PRIVATE_USE_REGEXP + ")?" + ")"
|
597
|
+
BCP47_LANGUAGETAG_REGEXP = "^(" + BCP47_GRANDFATHERED_REGEXP + "|" + BCP47_LANGTAG_REGEXP + "|" + BCP47_PRIVATE_USE_REGEXP + ")$"
|
598
|
+
BCP47_REGEXP = Regexp.new(BCP47_LANGUAGETAG_REGEXP)
|
599
|
+
|
600
|
+
NAME_REGEXP = /^([A-Za-z0-9]|(%[A-F0-9][A-F0-9]))([A-Za-z0-9_]|(%[A-F0-9][A-F0-9]))*$/
|
601
|
+
|
602
|
+
BUILT_IN_TYPES = ["TableGroup", "Table", "Schema", "Column", "Dialect", "Template", "Datatype"]
|
603
|
+
|
604
|
+
REGEXP_FORMAT_DATATYPES = [
|
605
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
|
606
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
|
607
|
+
"http://www.w3.org/ns/csvw#JSON",
|
608
|
+
"http://www.w3.org/2001/XMLSchema#anyAtomicType",
|
609
|
+
"http://www.w3.org/2001/XMLSchema#anyURI",
|
610
|
+
"http://www.w3.org/2001/XMLSchema#base64Binary",
|
611
|
+
"http://www.w3.org/2001/XMLSchema#duration",
|
612
|
+
"http://www.w3.org/2001/XMLSchema#dayTimeDuration",
|
613
|
+
"http://www.w3.org/2001/XMLSchema#yearMonthDuration",
|
614
|
+
"http://www.w3.org/2001/XMLSchema#hexBinary",
|
615
|
+
"http://www.w3.org/2001/XMLSchema#QName",
|
616
|
+
"http://www.w3.org/2001/XMLSchema#string",
|
617
|
+
"http://www.w3.org/2001/XMLSchema#normalizedString",
|
618
|
+
"http://www.w3.org/2001/XMLSchema#token",
|
619
|
+
"http://www.w3.org/2001/XMLSchema#language",
|
620
|
+
"http://www.w3.org/2001/XMLSchema#Name",
|
621
|
+
"http://www.w3.org/2001/XMLSchema#NMTOKEN"
|
622
|
+
]
|
623
|
+
|
624
|
+
STRING_DATATYPES = [
|
625
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
|
626
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
|
627
|
+
"http://www.w3.org/ns/csvw#JSON",
|
628
|
+
"http://www.w3.org/2001/XMLSchema#string",
|
629
|
+
"http://www.w3.org/2001/XMLSchema#normalizedString",
|
630
|
+
"http://www.w3.org/2001/XMLSchema#token",
|
631
|
+
"http://www.w3.org/2001/XMLSchema#language",
|
632
|
+
"http://www.w3.org/2001/XMLSchema#Name",
|
633
|
+
"http://www.w3.org/2001/XMLSchema#NMTOKEN"
|
634
|
+
]
|
635
|
+
|
636
|
+
BINARY_DATATYPES = [
|
637
|
+
"http://www.w3.org/2001/XMLSchema#base64Binary",
|
638
|
+
"http://www.w3.org/2001/XMLSchema#hexBinary"
|
639
|
+
]
|
640
|
+
|
641
|
+
NUMERIC_FORMAT_DATATYPES = [
|
642
|
+
"http://www.w3.org/2001/XMLSchema#decimal",
|
643
|
+
"http://www.w3.org/2001/XMLSchema#integer",
|
644
|
+
"http://www.w3.org/2001/XMLSchema#long",
|
645
|
+
"http://www.w3.org/2001/XMLSchema#int",
|
646
|
+
"http://www.w3.org/2001/XMLSchema#short",
|
647
|
+
"http://www.w3.org/2001/XMLSchema#byte",
|
648
|
+
"http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
|
649
|
+
"http://www.w3.org/2001/XMLSchema#positiveInteger",
|
650
|
+
"http://www.w3.org/2001/XMLSchema#unsignedLong",
|
651
|
+
"http://www.w3.org/2001/XMLSchema#unsignedInt",
|
652
|
+
"http://www.w3.org/2001/XMLSchema#unsignedShort",
|
653
|
+
"http://www.w3.org/2001/XMLSchema#unsignedByte",
|
654
|
+
"http://www.w3.org/2001/XMLSchema#nonPositiveInteger",
|
655
|
+
"http://www.w3.org/2001/XMLSchema#negativeInteger",
|
656
|
+
"http://www.w3.org/2001/XMLSchema#double",
|
657
|
+
"http://www.w3.org/2001/XMLSchema#float"
|
658
|
+
]
|
659
|
+
|
660
|
+
DATE_FORMAT_DATATYPES = [
|
661
|
+
"http://www.w3.org/2001/XMLSchema#date",
|
662
|
+
"http://www.w3.org/2001/XMLSchema#dateTime",
|
663
|
+
"http://www.w3.org/2001/XMLSchema#dateTimeStamp",
|
664
|
+
"http://www.w3.org/2001/XMLSchema#time"
|
665
|
+
]
|
666
|
+
|
667
|
+
BUILT_IN_DATATYPES = {
|
668
|
+
"number" => "http://www.w3.org/2001/XMLSchema#double",
|
669
|
+
"binary" => "http://www.w3.org/2001/XMLSchema#base64Binary",
|
670
|
+
"datetime" => "http://www.w3.org/2001/XMLSchema#dateTime",
|
671
|
+
"any" => "http://www.w3.org/2001/XMLSchema#anyAtomicType",
|
672
|
+
"xml" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
|
673
|
+
"html" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
|
674
|
+
"json" => "http://www.w3.org/ns/csvw#JSON",
|
675
|
+
"anyAtomicType" => "http://www.w3.org/2001/XMLSchema#anyAtomicType",
|
676
|
+
"anyURI" => "http://www.w3.org/2001/XMLSchema#anyURI",
|
677
|
+
"base64Binary" => "http://www.w3.org/2001/XMLSchema#base64Binary",
|
678
|
+
"boolean" => "http://www.w3.org/2001/XMLSchema#boolean",
|
679
|
+
"date" => "http://www.w3.org/2001/XMLSchema#date",
|
680
|
+
"dateTime" => "http://www.w3.org/2001/XMLSchema#dateTime",
|
681
|
+
"dateTimeStamp" => "http://www.w3.org/2001/XMLSchema#dateTimeStamp",
|
682
|
+
"decimal" => "http://www.w3.org/2001/XMLSchema#decimal",
|
683
|
+
"integer" => "http://www.w3.org/2001/XMLSchema#integer",
|
684
|
+
"long" => "http://www.w3.org/2001/XMLSchema#long",
|
685
|
+
"int" => "http://www.w3.org/2001/XMLSchema#int",
|
686
|
+
"short" => "http://www.w3.org/2001/XMLSchema#short",
|
687
|
+
"byte" => "http://www.w3.org/2001/XMLSchema#byte",
|
688
|
+
"nonNegativeInteger" => "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
|
689
|
+
"positiveInteger" => "http://www.w3.org/2001/XMLSchema#positiveInteger",
|
690
|
+
"unsignedLong" => "http://www.w3.org/2001/XMLSchema#unsignedLong",
|
691
|
+
"unsignedInt" => "http://www.w3.org/2001/XMLSchema#unsignedInt",
|
692
|
+
"unsignedShort" => "http://www.w3.org/2001/XMLSchema#unsignedShort",
|
693
|
+
"unsignedByte" => "http://www.w3.org/2001/XMLSchema#unsignedByte",
|
694
|
+
"nonPositiveInteger" => "http://www.w3.org/2001/XMLSchema#nonPositiveInteger",
|
695
|
+
"negativeInteger" => "http://www.w3.org/2001/XMLSchema#negativeInteger",
|
696
|
+
"double" => "http://www.w3.org/2001/XMLSchema#double",
|
697
|
+
"duration" => "http://www.w3.org/2001/XMLSchema#duration",
|
698
|
+
"dayTimeDuration" => "http://www.w3.org/2001/XMLSchema#dayTimeDuration",
|
699
|
+
"yearMonthDuration" => "http://www.w3.org/2001/XMLSchema#yearMonthDuration",
|
700
|
+
"float" => "http://www.w3.org/2001/XMLSchema#float",
|
701
|
+
"gDay" => "http://www.w3.org/2001/XMLSchema#gDay",
|
702
|
+
"gMonth" => "http://www.w3.org/2001/XMLSchema#gMonth",
|
703
|
+
"gMonthDay" => "http://www.w3.org/2001/XMLSchema#gMonthDay",
|
704
|
+
"gYear" => "http://www.w3.org/2001/XMLSchema#gYear",
|
705
|
+
"gYearMonth" => "http://www.w3.org/2001/XMLSchema#gYearMonth",
|
706
|
+
"hexBinary" => "http://www.w3.org/2001/XMLSchema#hexBinary",
|
707
|
+
"QName" => "http://www.w3.org/2001/XMLSchema#QName",
|
708
|
+
"string" => "http://www.w3.org/2001/XMLSchema#string",
|
709
|
+
"normalizedString" => "http://www.w3.org/2001/XMLSchema#normalizedString",
|
710
|
+
"token" => "http://www.w3.org/2001/XMLSchema#token",
|
711
|
+
"language" => "http://www.w3.org/2001/XMLSchema#language",
|
712
|
+
"Name" => "http://www.w3.org/2001/XMLSchema#Name",
|
713
|
+
"NMTOKEN" => "http://www.w3.org/2001/XMLSchema#NMTOKEN",
|
714
|
+
"time" => "http://www.w3.org/2001/XMLSchema#time"
|
715
|
+
}
|
716
|
+
|
717
|
+
VALID_ENCODINGS = [
|
718
|
+
"utf-8",
|
719
|
+
"ibm866",
|
720
|
+
"iso-8859-2",
|
721
|
+
"iso-8859-3",
|
722
|
+
"iso-8859-4",
|
723
|
+
"iso-8859-5",
|
724
|
+
"iso-8859-6",
|
725
|
+
"iso-8859-7",
|
726
|
+
"iso-8859-8",
|
727
|
+
"iso-8859-8-i",
|
728
|
+
"iso-8859-10",
|
729
|
+
"iso-8859-13",
|
730
|
+
"iso-8859-14",
|
731
|
+
"iso-8859-15",
|
732
|
+
"iso-8859-16",
|
733
|
+
"koi8-r",
|
734
|
+
"koi8-u",
|
735
|
+
"macintosh",
|
736
|
+
"windows-874",
|
737
|
+
"windows-1250",
|
738
|
+
"windows-1251",
|
739
|
+
"windows-1252",
|
740
|
+
"windows-1253",
|
741
|
+
"windows-1254",
|
742
|
+
"windows-1255",
|
743
|
+
"windows-1256",
|
744
|
+
"windows-1257",
|
745
|
+
"windows-1258",
|
746
|
+
"x-mac-cyrillic",
|
747
|
+
"gb18030",
|
748
|
+
"hz-gb-2312",
|
749
|
+
"big5",
|
750
|
+
"euc-jp",
|
751
|
+
"iso-2022-jp",
|
752
|
+
"shift_jis",
|
753
|
+
"euc-kr",
|
754
|
+
"replacement",
|
755
|
+
"utf-16be",
|
756
|
+
"utf-16le",
|
757
|
+
"x-user-defined"
|
758
|
+
]
|
759
|
+
end
|
760
|
+
end
|
761
|
+
end
|