wjordan213-csvlint 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitattributes +2 -0
- data/.gitignore +28 -0
- data/.ruby-version +1 -0
- data/.travis.yml +32 -0
- data/CHANGELOG.md +361 -0
- data/Gemfile +7 -0
- data/LICENSE.md +22 -0
- data/README.md +328 -0
- data/Rakefile +17 -0
- data/bin/create_schema +32 -0
- data/bin/csvlint +10 -0
- data/features/check_format.feature +46 -0
- data/features/cli.feature +210 -0
- data/features/csv_options.feature +35 -0
- data/features/csvupload.feature +145 -0
- data/features/csvw_schema_validation.feature +127 -0
- data/features/fixtures/cr-line-endings.csv +0 -0
- data/features/fixtures/crlf-line-endings.csv +0 -0
- data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
- data/features/fixtures/inconsistent-line-endings.csv +0 -0
- data/features/fixtures/invalid-byte-sequence.csv +0 -0
- data/features/fixtures/invalid_many_rows.csv +0 -0
- data/features/fixtures/lf-line-endings.csv +0 -0
- data/features/fixtures/spreadsheet.xls +0 -0
- data/features/fixtures/spreadsheet.xlsx +0 -0
- data/features/fixtures/title-row.csv +0 -0
- data/features/fixtures/valid.csv +0 -0
- data/features/fixtures/valid_many_rows.csv +0 -0
- data/features/fixtures/windows-line-endings.csv +0 -0
- data/features/information.feature +22 -0
- data/features/parse_csv.feature +90 -0
- data/features/schema_validation.feature +105 -0
- data/features/sources.feature +17 -0
- data/features/step_definitions/cli_steps.rb +11 -0
- data/features/step_definitions/csv_options_steps.rb +24 -0
- data/features/step_definitions/information_steps.rb +13 -0
- data/features/step_definitions/parse_csv_steps.rb +42 -0
- data/features/step_definitions/schema_validation_steps.rb +33 -0
- data/features/step_definitions/sources_steps.rb +7 -0
- data/features/step_definitions/validation_errors_steps.rb +90 -0
- data/features/step_definitions/validation_info_steps.rb +22 -0
- data/features/step_definitions/validation_warnings_steps.rb +60 -0
- data/features/support/aruba.rb +56 -0
- data/features/support/env.rb +26 -0
- data/features/support/load_tests.rb +114 -0
- data/features/support/webmock.rb +1 -0
- data/features/validation_errors.feature +147 -0
- data/features/validation_info.feature +16 -0
- data/features/validation_warnings.feature +86 -0
- data/lib/csvlint.rb +27 -0
- data/lib/csvlint/cli.rb +165 -0
- data/lib/csvlint/csvw/column.rb +359 -0
- data/lib/csvlint/csvw/date_format.rb +182 -0
- data/lib/csvlint/csvw/metadata_error.rb +13 -0
- data/lib/csvlint/csvw/number_format.rb +211 -0
- data/lib/csvlint/csvw/property_checker.rb +761 -0
- data/lib/csvlint/csvw/table.rb +204 -0
- data/lib/csvlint/csvw/table_group.rb +165 -0
- data/lib/csvlint/error_collector.rb +27 -0
- data/lib/csvlint/error_message.rb +15 -0
- data/lib/csvlint/field.rb +196 -0
- data/lib/csvlint/schema.rb +92 -0
- data/lib/csvlint/validate.rb +599 -0
- data/lib/csvlint/version.rb +3 -0
- data/spec/csvw/column_spec.rb +112 -0
- data/spec/csvw/date_format_spec.rb +49 -0
- data/spec/csvw/number_format_spec.rb +417 -0
- data/spec/csvw/table_group_spec.rb +143 -0
- data/spec/csvw/table_spec.rb +90 -0
- data/spec/field_spec.rb +252 -0
- data/spec/schema_spec.rb +211 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/validator_spec.rb +619 -0
- data/wjordan213_csvlint.gemspec +46 -0
- metadata +490 -0
@@ -0,0 +1,182 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class DateFormat
|
4
|
+
|
5
|
+
attr_reader :pattern
|
6
|
+
|
7
|
+
def initialize(pattern, datatype=nil)
|
8
|
+
@pattern = pattern
|
9
|
+
|
10
|
+
if @pattern.nil?
|
11
|
+
@regexp = DEFAULT_REGEXP[datatype]
|
12
|
+
@type = datatype
|
13
|
+
else
|
14
|
+
test_pattern = pattern.clone
|
15
|
+
test_pattern.gsub!(/S+/, "")
|
16
|
+
FIELDS.keys.sort_by{|f| -f.length}.each do |field|
|
17
|
+
test_pattern.gsub!(field, "")
|
18
|
+
end
|
19
|
+
raise Csvw::DateFormatError, "unrecognised date field symbols in date format" if test_pattern =~ /[GyYuUrQqMLlwWdDFgEecahHKkjJmsSAzZOvVXx]/
|
20
|
+
|
21
|
+
@regexp = DATE_PATTERN_REGEXP[@pattern]
|
22
|
+
@type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#time" : "http://www.w3.org/2001/XMLSchema#date"
|
23
|
+
@regexp = @regexp || TIME_PATTERN_REGEXP[@pattern]
|
24
|
+
@type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#dateTime" : @type
|
25
|
+
@regexp = @regexp || DATE_TIME_PATTERN_REGEXP[@pattern]
|
26
|
+
|
27
|
+
if @regexp.nil?
|
28
|
+
regexp = @pattern
|
29
|
+
|
30
|
+
@type = "http://www.w3.org/2001/XMLSchema#date" if !(regexp =~ /HH/) && regexp =~ /yyyy/
|
31
|
+
@type = "http://www.w3.org/2001/XMLSchema#time" if regexp =~ /HH/ && !(regexp =~ /yyyy/)
|
32
|
+
@type = "http://www.w3.org/2001/XMLSchema#dateTime" if regexp =~ /HH/ && regexp =~ /yyyy/
|
33
|
+
|
34
|
+
regexp = regexp.sub("HH", FIELDS["HH"].to_s)
|
35
|
+
regexp = regexp.sub("mm", FIELDS["mm"].to_s)
|
36
|
+
if @pattern =~ /ss\.S+/
|
37
|
+
max_fractional_seconds = @pattern.split(".")[-1].length
|
38
|
+
regexp = regexp.sub(/ss\.S+$/, "(?<second>#{FIELDS["ss"]}(\.[0-9]{1,#{max_fractional_seconds}})?)")
|
39
|
+
else
|
40
|
+
regexp = regexp.sub("ss", "(?<second>#{FIELDS["ss"]})")
|
41
|
+
end
|
42
|
+
|
43
|
+
if regexp =~ /yyyy/
|
44
|
+
regexp = regexp.sub("yyyy", FIELDS["yyyy"].to_s)
|
45
|
+
regexp = regexp.sub("MM", FIELDS["MM"].to_s)
|
46
|
+
regexp = regexp.sub("M", FIELDS["M"].to_s)
|
47
|
+
regexp = regexp.sub("dd", FIELDS["dd"].to_s)
|
48
|
+
regexp = regexp.sub(/d(?=[-T \/\.])/, FIELDS["d"].to_s)
|
49
|
+
end
|
50
|
+
|
51
|
+
regexp = regexp.sub("XXX", FIELDS["XXX"].to_s)
|
52
|
+
regexp = regexp.sub("XX", FIELDS["XX"].to_s)
|
53
|
+
regexp = regexp.sub("X", FIELDS["X"].to_s)
|
54
|
+
regexp = regexp.sub("xxx", FIELDS["xxx"].to_s)
|
55
|
+
regexp = regexp.sub("xx", FIELDS["xx"].to_s)
|
56
|
+
regexp = regexp.sub(/x(?!:)/, FIELDS["x"].to_s)
|
57
|
+
|
58
|
+
@regexp = Regexp.new("^#{regexp}$")
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def match(value)
|
64
|
+
value =~ @regexp ? true : false
|
65
|
+
end
|
66
|
+
|
67
|
+
def parse(value)
|
68
|
+
match = @regexp.match(value)
|
69
|
+
return nil if match.nil?
|
70
|
+
# STDERR.puts(@regexp)
|
71
|
+
# STDERR.puts(value)
|
72
|
+
# STDERR.puts(match.inspect)
|
73
|
+
case @type
|
74
|
+
when "http://www.w3.org/2001/XMLSchema#date"
|
75
|
+
begin
|
76
|
+
return Date.new(match["year"].to_i, match["month"].to_i, match["day"].to_i)
|
77
|
+
rescue ArgumentError
|
78
|
+
return nil
|
79
|
+
end
|
80
|
+
when "http://www.w3.org/2001/XMLSchema#dateTime"
|
81
|
+
begin
|
82
|
+
return DateTime.new(match["year"].to_i, match["month"].to_i, match["day"].to_i, match["hour"].to_i, match["minute"].to_i, (match.names.include?("second") ? match["second"].to_f : 0), match.names.include?("timezone") && match["timezone"] ? match["timezone"] : '')
|
83
|
+
rescue ArgumentError
|
84
|
+
return nil
|
85
|
+
end
|
86
|
+
else
|
87
|
+
value = {}
|
88
|
+
match.names.each do |field|
|
89
|
+
unless match[field].nil?
|
90
|
+
case field
|
91
|
+
when "timezone"
|
92
|
+
tz = match["timezone"]
|
93
|
+
tz = "+00:00" if tz == 'Z'
|
94
|
+
tz += ':00' if tz.length == 3
|
95
|
+
tz = "#{tz[0..2]}:#{tz[3..4]}" unless tz =~ /:/
|
96
|
+
value["timezone"] = tz
|
97
|
+
when "second"
|
98
|
+
value["second"] = match["second"].to_f
|
99
|
+
else
|
100
|
+
value[field] = match[field].to_i
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
return value
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
FIELDS = {
|
110
|
+
"yyyy" => /(?<year>-?([1-9][0-9]{3,}|0[0-9]{3}))/,
|
111
|
+
"MM" => /(?<month>0[1-9]|1[0-2])/,
|
112
|
+
"M" => /(?<month>[1-9]|1[0-2])/,
|
113
|
+
"dd" => /(?<day>0[1-9]|[12][0-9]|3[01])/,
|
114
|
+
"d" => /(?<day>[1-9]|[12][0-9]|3[01])/,
|
115
|
+
"HH" => /(?<hour>[01][0-9]|2[0-3])/,
|
116
|
+
"mm" => /(?<minute>[0-5][0-9])/,
|
117
|
+
"ss" => /([0-6][0-9])/,
|
118
|
+
"X" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
|
119
|
+
"XX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
|
120
|
+
"XXX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
|
121
|
+
"x" => /(?<timezone>[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
|
122
|
+
"xx" => /(?<timezone>[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
|
123
|
+
"xxx" => /(?<timezone>[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
|
124
|
+
}
|
125
|
+
|
126
|
+
DATE_PATTERN_REGEXP = {
|
127
|
+
"yyyy-MM-dd" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}$"),
|
128
|
+
"yyyyMMdd" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["MM"]}#{FIELDS["dd"]}$"),
|
129
|
+
"dd-MM-yyyy" => Regexp.new("^#{FIELDS["dd"]}-#{FIELDS["MM"]}-#{FIELDS["yyyy"]}$"),
|
130
|
+
"d-M-yyyy" => Regexp.new("^#{FIELDS["d"]}-#{FIELDS["M"]}-#{FIELDS["yyyy"]}$"),
|
131
|
+
"MM-dd-yyyy" => Regexp.new("^#{FIELDS["MM"]}-#{FIELDS["dd"]}-#{FIELDS["yyyy"]}$"),
|
132
|
+
"M-d-yyyy" => Regexp.new("^#{FIELDS["M"]}-#{FIELDS["d"]}-#{FIELDS["yyyy"]}$"),
|
133
|
+
"dd/MM/yyyy" => Regexp.new("^#{FIELDS["dd"]}/#{FIELDS["MM"]}/#{FIELDS["yyyy"]}$"),
|
134
|
+
"d/M/yyyy" => Regexp.new("^#{FIELDS["d"]}/#{FIELDS["M"]}/#{FIELDS["yyyy"]}$"),
|
135
|
+
"MM/dd/yyyy" => Regexp.new("^#{FIELDS["MM"]}/#{FIELDS["dd"]}/#{FIELDS["yyyy"]}$"),
|
136
|
+
"M/d/yyyy" => Regexp.new("^#{FIELDS["M"]}/#{FIELDS["d"]}/#{FIELDS["yyyy"]}$"),
|
137
|
+
"dd.MM.yyyy" => Regexp.new("^#{FIELDS["dd"]}.#{FIELDS["MM"]}.#{FIELDS["yyyy"]}$"),
|
138
|
+
"d.M.yyyy" => Regexp.new("^#{FIELDS["d"]}.#{FIELDS["M"]}.#{FIELDS["yyyy"]}$"),
|
139
|
+
"MM.dd.yyyy" => Regexp.new("^#{FIELDS["MM"]}.#{FIELDS["dd"]}.#{FIELDS["yyyy"]}$"),
|
140
|
+
"M.d.yyyy" => Regexp.new("^#{FIELDS["M"]}.#{FIELDS["d"]}.#{FIELDS["yyyy"]}$")
|
141
|
+
}
|
142
|
+
|
143
|
+
TIME_PATTERN_REGEXP = {
|
144
|
+
"HH:mm:ss" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
|
145
|
+
"HHmmss" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}(?<second>#{FIELDS["ss"]})$"),
|
146
|
+
"HH:mm" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}$"),
|
147
|
+
"HHmm" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}$")
|
148
|
+
}
|
149
|
+
|
150
|
+
DATE_TIME_PATTERN_REGEXP = {
|
151
|
+
"yyyy-MM-ddTHH:mm:ss" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
|
152
|
+
"yyyy-MM-ddTHH:mm" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}$")
|
153
|
+
}
|
154
|
+
|
155
|
+
DEFAULT_REGEXP = {
|
156
|
+
"http://www.w3.org/2001/XMLSchema#date" =>
|
157
|
+
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
158
|
+
"http://www.w3.org/2001/XMLSchema#dateTime" =>
|
159
|
+
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$"),
|
160
|
+
"http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
|
161
|
+
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}$"),
|
162
|
+
"http://www.w3.org/2001/XMLSchema#gDay" =>
|
163
|
+
Regexp.new("^---#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
164
|
+
"http://www.w3.org/2001/XMLSchema#gMonth" =>
|
165
|
+
Regexp.new("^--#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
|
166
|
+
"http://www.w3.org/2001/XMLSchema#gMonthDay" =>
|
167
|
+
Regexp.new("^--#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
168
|
+
"http://www.w3.org/2001/XMLSchema#gYear" =>
|
169
|
+
Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["XXX"]}?$"),
|
170
|
+
"http://www.w3.org/2001/XMLSchema#gYearMonth" =>
|
171
|
+
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
|
172
|
+
"http://www.w3.org/2001/XMLSchema#time" =>
|
173
|
+
Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$")
|
174
|
+
}
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
class DateFormatError < StandardError
|
179
|
+
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class NumberFormat
|
4
|
+
|
5
|
+
attr_reader :pattern, :prefix, :numeric_part, :suffix, :grouping_separator, :decimal_separator, :primary_grouping_size, :secondary_grouping_size, :fractional_grouping_size
|
6
|
+
|
7
|
+
def initialize(pattern=nil, grouping_separator=nil, decimal_separator=".")
|
8
|
+
@pattern = pattern
|
9
|
+
@grouping_separator = grouping_separator || (@pattern.nil? ? nil : ",")
|
10
|
+
@decimal_separator = decimal_separator || "."
|
11
|
+
if pattern.nil?
|
12
|
+
@regexp = Regexp.new("^(([-+]?[0-9]+(#{Regexp.escape(@decimal_separator)}[0-9]+)?([Ee][-+]?[0-9]+)?[%‰]?)|NaN|INF|-INF)$")
|
13
|
+
else
|
14
|
+
numeric_part_regexp = Regexp.new("(?<numeric_part>([0#Ee]|#{Regexp.escape(@grouping_separator)}|#{Regexp.escape(@decimal_separator)})+)")
|
15
|
+
number_format_regexp = Regexp.new("^(?<prefix>.*?)#{numeric_part_regexp}(?<suffix>.*?)$")
|
16
|
+
match = number_format_regexp.match(pattern)
|
17
|
+
raise Csvw::NumberFormatError, "invalid number format" if match.nil?
|
18
|
+
|
19
|
+
@prefix = match["prefix"]
|
20
|
+
@numeric_part = match["numeric_part"]
|
21
|
+
@suffix = match["suffix"]
|
22
|
+
|
23
|
+
parts = @numeric_part.split("E")
|
24
|
+
mantissa_part = parts[0]
|
25
|
+
exponent_part = parts[1] || ""
|
26
|
+
mantissa_parts = mantissa_part.split(@decimal_separator)
|
27
|
+
# raise Csvw::NumberFormatError, "more than two decimal separators in number format" if parts.length > 2
|
28
|
+
integer_part = mantissa_parts[0]
|
29
|
+
fractional_part = mantissa_parts[1] || ""
|
30
|
+
|
31
|
+
@integer_pattern = exponent_part == "" && fractional_part == ""
|
32
|
+
|
33
|
+
min_integer_digits = integer_part.gsub(@grouping_separator, "").gsub("#", "").length
|
34
|
+
min_fraction_digits = fractional_part.gsub(@grouping_separator, "").gsub("#", "").length
|
35
|
+
max_fraction_digits = fractional_part.gsub(@grouping_separator, "").length
|
36
|
+
min_exponent_digits = exponent_part.gsub("#", "").length
|
37
|
+
max_exponent_digits = exponent_part.length
|
38
|
+
|
39
|
+
integer_parts = integer_part.split(@grouping_separator)[1..-1]
|
40
|
+
@primary_grouping_size = integer_parts[-1].length rescue 0
|
41
|
+
@secondary_grouping_size = integer_parts[-2].length rescue @primary_grouping_size
|
42
|
+
|
43
|
+
fractional_parts = fractional_part.split(@grouping_separator)[0..-2]
|
44
|
+
@fractional_grouping_size = fractional_parts[0].length rescue 0
|
45
|
+
|
46
|
+
numeric_part_regexp = "[-+]?"
|
47
|
+
|
48
|
+
if @primary_grouping_size == 0
|
49
|
+
integer_regexp = "[0-9]*[0-9]{#{min_integer_digits}}"
|
50
|
+
else
|
51
|
+
leading_regexp = "([0-9]{0,#{@secondary_grouping_size - 1}}#{Regexp.escape(@grouping_separator)})?"
|
52
|
+
secondary_groups = "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)})*"
|
53
|
+
if min_integer_digits > @primary_grouping_size
|
54
|
+
remaining_req_digits = min_integer_digits - @primary_grouping_size
|
55
|
+
req_secondary_groups = remaining_req_digits / @secondary_grouping_size > 0 ? "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}){#{remaining_req_digits / @secondary_grouping_size}}" : ""
|
56
|
+
if remaining_req_digits % @secondary_grouping_size > 0
|
57
|
+
final_req_digits = "[0-9]{#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
|
58
|
+
final_opt_digits = "[0-9]{0,#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
|
59
|
+
integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{remaining_req_digits % @secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
|
60
|
+
else
|
61
|
+
integer_regexp = "(#{leading_regexp}#{secondary_groups})?#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
|
62
|
+
end
|
63
|
+
else
|
64
|
+
final_req_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{#{@primary_grouping_size - min_integer_digits}}" : ""
|
65
|
+
final_opt_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{0,#{@primary_grouping_size - min_integer_digits}}" : ""
|
66
|
+
integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{min_integer_digits}}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
numeric_part_regexp += integer_regexp
|
71
|
+
|
72
|
+
if max_fraction_digits > 0
|
73
|
+
if @fractional_grouping_size == 0
|
74
|
+
fractional_regexp = ""
|
75
|
+
fractional_regexp += "[0-9]{#{min_fraction_digits}}" if min_fraction_digits > 0
|
76
|
+
fractional_regexp += "[0-9]{0,#{max_fraction_digits - min_fraction_digits}}" unless min_fraction_digits == max_fraction_digits
|
77
|
+
fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}"
|
78
|
+
fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0
|
79
|
+
numeric_part_regexp += fractional_regexp
|
80
|
+
else
|
81
|
+
fractional_regexp = ""
|
82
|
+
|
83
|
+
if min_fraction_digits > 0
|
84
|
+
if min_fraction_digits >= @fractional_grouping_size
|
85
|
+
# first group of required digits - something like "[0-9]{3}"
|
86
|
+
fractional_regexp += "[0-9]{#{@fractional_grouping_size}}"
|
87
|
+
# additional groups of required digits - something like "(,[0-9]{3}){1}"
|
88
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){#{min_fraction_digits / @fractional_grouping_size - 1}}" if min_fraction_digits / @fractional_grouping_size > 1
|
89
|
+
fractional_regexp += "#{Regexp.escape(@grouping_separator)}" if min_fraction_digits % @fractional_grouping_size > 0
|
90
|
+
end
|
91
|
+
# additional required digits - something like ",[0-9]{1}"
|
92
|
+
fractional_regexp += "[0-9]{#{min_fraction_digits % @fractional_grouping_size}}" if min_fraction_digits % @fractional_grouping_size > 0
|
93
|
+
|
94
|
+
opt_fractional_digits = max_fraction_digits - min_fraction_digits
|
95
|
+
if opt_fractional_digits > 0
|
96
|
+
fractional_regexp += "("
|
97
|
+
|
98
|
+
if min_fraction_digits % @fractional_grouping_size > 0
|
99
|
+
# optional fractional digits to complete the group
|
100
|
+
fractional_regexp += "[0-9]{0,#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}"
|
101
|
+
fractional_regexp += "|"
|
102
|
+
fractional_regexp += "[0-9]{#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}"
|
103
|
+
else
|
104
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?"
|
105
|
+
fractional_regexp += "|"
|
106
|
+
fractional_regexp += "#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}"
|
107
|
+
end
|
108
|
+
|
109
|
+
remaining_opt_fractional_digits = opt_fractional_digits - (@fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size))
|
110
|
+
if remaining_opt_fractional_digits > 0
|
111
|
+
if remaining_opt_fractional_digits % @fractional_grouping_size > 0
|
112
|
+
# optional fraction digits in groups
|
113
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{remaining_opt_fractional_digits / @fractional_grouping_size}}" if remaining_opt_fractional_digits > @fractional_grouping_size
|
114
|
+
# remaining optional fraction digits
|
115
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?"
|
116
|
+
else
|
117
|
+
# optional fraction digits in groups
|
118
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size
|
119
|
+
# remaining optional fraction digits
|
120
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?"
|
121
|
+
end
|
122
|
+
|
123
|
+
# optional fraction digits in groups
|
124
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size
|
125
|
+
# remaining optional fraction digits
|
126
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?" if remaining_opt_fractional_digits % @fractional_grouping_size > 0
|
127
|
+
end
|
128
|
+
fractional_regexp += ")"
|
129
|
+
end
|
130
|
+
elsif max_fraction_digits % @fractional_grouping_size > 0
|
131
|
+
# optional fractional digits in groups
|
132
|
+
fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{max_fraction_digits / @fractional_grouping_size}}"
|
133
|
+
# remaining optional fraction digits
|
134
|
+
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{max_fraction_digits % @fractional_grouping_size}})?" if max_fraction_digits % @fractional_grouping_size > 0
|
135
|
+
else
|
136
|
+
fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{(max_fraction_digits / @fractional_grouping_size) - 1}}" if max_fraction_digits > @fractional_grouping_size
|
137
|
+
fractional_regexp += "[0-9]{1,#{@fractional_grouping_size}}"
|
138
|
+
end
|
139
|
+
fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}"
|
140
|
+
fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0
|
141
|
+
numeric_part_regexp += fractional_regexp
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
if max_exponent_digits > 0
|
146
|
+
numeric_part_regexp += "E"
|
147
|
+
numeric_part_regexp += "[0-9]{0,#{max_exponent_digits - min_exponent_digits}}" unless max_exponent_digits == min_exponent_digits
|
148
|
+
numeric_part_regexp += "[0-9]{#{min_exponent_digits}}" unless min_exponent_digits == 0
|
149
|
+
end
|
150
|
+
|
151
|
+
@regexp = Regexp.new("^(?<prefix>#{Regexp.escape(@prefix)})(?<numeric_part>#{numeric_part_regexp})(?<suffix>#{suffix})$")
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def match(value)
|
156
|
+
value =~ @regexp ? true : false
|
157
|
+
end
|
158
|
+
|
159
|
+
def parse(value)
|
160
|
+
if @pattern.nil?
|
161
|
+
return nil if !@grouping_separator.nil? && value =~ Regexp.new("((^#{Regexp.escape(@grouping_separator)})|#{Regexp.escape(@grouping_separator)}{2})")
|
162
|
+
value.gsub!(@grouping_separator, "") unless @grouping_separator.nil?
|
163
|
+
if value =~ INTEGER_REGEXP
|
164
|
+
case value[-1]
|
165
|
+
when "%"
|
166
|
+
return value.to_f / 100
|
167
|
+
when "‰"
|
168
|
+
return value.to_f / 1000
|
169
|
+
else
|
170
|
+
return value.to_i
|
171
|
+
end
|
172
|
+
elsif value =~ @regexp
|
173
|
+
case value
|
174
|
+
when "NaN"
|
175
|
+
return Float::NAN
|
176
|
+
when "INF"
|
177
|
+
return Float::INFINITY
|
178
|
+
when "-INF"
|
179
|
+
return -Float::INFINITY
|
180
|
+
else
|
181
|
+
case value[-1]
|
182
|
+
when "%"
|
183
|
+
return value.to_f / 100
|
184
|
+
when "‰"
|
185
|
+
return value.to_f / 1000
|
186
|
+
else
|
187
|
+
return value.to_f
|
188
|
+
end
|
189
|
+
end
|
190
|
+
else
|
191
|
+
return nil
|
192
|
+
end
|
193
|
+
else
|
194
|
+
match = @regexp.match(value)
|
195
|
+
return nil if match.nil?
|
196
|
+
number = match["numeric_part"].gsub(@grouping_separator, "")
|
197
|
+
return number.to_i if @integer_pattern
|
198
|
+
return number.to_f
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
private
|
203
|
+
INTEGER_REGEXP = /^[-+]?[0-9]+[%‰]?$/
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
class NumberFormatError < StandardError
|
208
|
+
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,761 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class PropertyChecker
|
4
|
+
|
5
|
+
class << self
|
6
|
+
|
7
|
+
def check_property(property, value, base_url, lang)
|
8
|
+
if PROPERTIES.include? property
|
9
|
+
return PROPERTIES[property].call(value, base_url, lang)
|
10
|
+
elsif property =~ /^([a-z]+):/ && NAMESPACES.include?(property.split(":")[0])
|
11
|
+
value, warnings = check_common_property_value(value, base_url, lang)
|
12
|
+
return value, warnings, :annotation
|
13
|
+
else
|
14
|
+
return value, :invalid_property, nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def check_common_property_value(value, base_url, lang)
|
20
|
+
case value
|
21
|
+
when Hash
|
22
|
+
value = value.clone
|
23
|
+
warnings = []
|
24
|
+
value.each do |p,v|
|
25
|
+
case p
|
26
|
+
when "@context"
|
27
|
+
raise Csvlint::Csvw::MetadataError.new(p), "common property has @context property"
|
28
|
+
when "@list"
|
29
|
+
raise Csvlint::Csvw::MetadataError.new(p), "common property has @list property"
|
30
|
+
when "@set"
|
31
|
+
raise Csvlint::Csvw::MetadataError.new(p), "common property has @set property"
|
32
|
+
when "@type"
|
33
|
+
if value["@value"] && BUILT_IN_DATATYPES.include?(v)
|
34
|
+
elsif !value["@value"] && BUILT_IN_TYPES.include?(v)
|
35
|
+
elsif v =~ /^([a-z]+):/ && NAMESPACES.include?(v.split(":")[0])
|
36
|
+
else
|
37
|
+
# must be an absolute URI
|
38
|
+
begin
|
39
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @type (#{v})" if URI(v).scheme.nil?
|
40
|
+
rescue
|
41
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @type (#{v})"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
when "@id"
|
45
|
+
unless base_url.nil?
|
46
|
+
begin
|
47
|
+
v = URI.join(base_url, v)
|
48
|
+
rescue
|
49
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @id (#{v})"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
when "@value"
|
53
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property with @value has both @language and @type" if value["@type"] && value["@language"]
|
54
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property with @value has properties other than @language or @type" unless value.except("@type").except("@language").except("@value").empty?
|
55
|
+
when "@language"
|
56
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property with @language lacks a @value" unless value["@value"]
|
57
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @language (#{v})" unless v =~ BCP47_LANGUAGE_REGEXP || v.nil?
|
58
|
+
else
|
59
|
+
if p[0] == "@"
|
60
|
+
raise Csvlint::Csvw::MetadataError.new(), "common property has property other than @id, @type, @value or @language beginning with @ (#{p})"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
if v.instance_of? Hash
|
64
|
+
v, w = check_common_property_value(v, base_url, lang)
|
65
|
+
warnings += Array(w)
|
66
|
+
end
|
67
|
+
value[p] = v
|
68
|
+
end
|
69
|
+
return value, warnings
|
70
|
+
else
|
71
|
+
return value, nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def convert_value_facet(value, property, datatype)
|
76
|
+
if value[property]
|
77
|
+
if DATE_FORMAT_DATATYPES.include?(datatype)
|
78
|
+
format = Csvlint::Csvw::DateFormat.new(nil, datatype)
|
79
|
+
v = format.parse(value[property])
|
80
|
+
if v.nil?
|
81
|
+
value.delete(property)
|
82
|
+
return [":invalid_#{property}".to_sym]
|
83
|
+
else
|
84
|
+
value[property] = v
|
85
|
+
return []
|
86
|
+
end
|
87
|
+
elsif NUMERIC_FORMAT_DATATYPES.include?(datatype)
|
88
|
+
return []
|
89
|
+
else
|
90
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.#{property}"), "#{property} is only allowed for numeric, date/time and duration types"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
return []
|
94
|
+
end
|
95
|
+
|
96
|
+
def array_property(type)
|
97
|
+
return lambda { |value, base_url, lang|
|
98
|
+
return value, nil, type if value.instance_of? Array
|
99
|
+
return false, :invalid_value, type
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
def boolean_property(type)
|
104
|
+
return lambda { |value, base_url, lang|
|
105
|
+
return value, nil, type if value == true || value == false
|
106
|
+
return false, :invalid_value, type
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
def string_property(type)
|
111
|
+
return lambda { |value, base_url, lang|
|
112
|
+
return value, nil, type if value.instance_of? String
|
113
|
+
return "", :invalid_value, type
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
def numeric_property(type)
|
118
|
+
return lambda { |value, base_url, lang|
|
119
|
+
return value, nil, type if value.kind_of?(Integer) && value >= 0
|
120
|
+
return nil, :invalid_value, type
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
124
|
+
def link_property(type)
|
125
|
+
return lambda { |value, base_url, lang|
|
126
|
+
raise Csvlint::Csvw::MetadataError.new(), "URL #{value} starts with _:" if value.to_s =~ /^_:/
|
127
|
+
return (base_url.nil? ? URI(value) : URI.join(base_url, value)), nil, type if value.instance_of? String
|
128
|
+
return base_url, :invalid_value, type
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
def language_property(type)
|
133
|
+
return lambda { |value, base_url, lang|
|
134
|
+
return value, nil, type if value =~ BCP47_REGEXP
|
135
|
+
return nil, :invalid_value, type
|
136
|
+
}
|
137
|
+
end
|
138
|
+
|
139
|
+
def natural_language_property(type)
|
140
|
+
return lambda { |value, base_url, lang|
|
141
|
+
warnings = []
|
142
|
+
if value.instance_of? String
|
143
|
+
return { lang => [ value ] }, nil, type
|
144
|
+
elsif value.instance_of? Array
|
145
|
+
valid_titles = []
|
146
|
+
value.each do |title|
|
147
|
+
if title.instance_of? String
|
148
|
+
valid_titles << title
|
149
|
+
else
|
150
|
+
warnings << :invalid_value
|
151
|
+
end
|
152
|
+
end
|
153
|
+
return { lang => valid_titles }, warnings, type
|
154
|
+
elsif value.instance_of? Hash
|
155
|
+
value = value.clone
|
156
|
+
value.each do |l,v|
|
157
|
+
if l =~ BCP47_REGEXP
|
158
|
+
valid_titles = []
|
159
|
+
Array(v).each do |title|
|
160
|
+
if title.instance_of? String
|
161
|
+
valid_titles << title
|
162
|
+
else
|
163
|
+
warnings << :invalid_value
|
164
|
+
end
|
165
|
+
end
|
166
|
+
value[l] = valid_titles
|
167
|
+
else
|
168
|
+
value.delete(l)
|
169
|
+
warnings << :invalid_language
|
170
|
+
end
|
171
|
+
end
|
172
|
+
warnings << :invalid_value if value.empty?
|
173
|
+
return value, warnings, type
|
174
|
+
else
|
175
|
+
return {}, :invalid_value, type
|
176
|
+
end
|
177
|
+
}
|
178
|
+
end
|
179
|
+
|
180
|
+
def column_reference_property(type)
|
181
|
+
return lambda { |value, base_url, lang|
|
182
|
+
return Array(value), nil, type
|
183
|
+
}
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
PROPERTIES = {
|
190
|
+
# context properties
|
191
|
+
"@language" => language_property(:context),
|
192
|
+
"@base" => link_property(:context),
|
193
|
+
# common properties
|
194
|
+
"@id" => link_property(:common),
|
195
|
+
"notes" => array_property(:common),
|
196
|
+
"suppressOutput" => boolean_property(:common),
|
197
|
+
# inherited properties
|
198
|
+
"null" => lambda { |value, base_url, lang|
|
199
|
+
case value
|
200
|
+
when String
|
201
|
+
return [value], nil, :inherited
|
202
|
+
when Array
|
203
|
+
values = []
|
204
|
+
warnings = []
|
205
|
+
value.each do |v|
|
206
|
+
if v.instance_of? String
|
207
|
+
values << v
|
208
|
+
else
|
209
|
+
warnings << :invalid_value
|
210
|
+
end
|
211
|
+
end
|
212
|
+
return values, warnings, :inherited
|
213
|
+
else
|
214
|
+
return [""], :invalid_value, :inherited
|
215
|
+
end
|
216
|
+
},
|
217
|
+
"default" => string_property(:inherited),
|
218
|
+
"separator" => lambda { |value, base_url, lang|
|
219
|
+
return value, nil, :inherited if value.instance_of?(String) || value.nil?
|
220
|
+
return nil, :invalid_value, :inherited
|
221
|
+
},
|
222
|
+
"lang" => language_property(:inherited),
|
223
|
+
"datatype" => lambda { |value, base_url, lang|
|
224
|
+
value = value.clone
|
225
|
+
warnings = []
|
226
|
+
if value.instance_of? Hash
|
227
|
+
if value["@id"]
|
228
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.@id"), "datatype @id must not be the id of a built-in datatype (#{value["@id"]})" if BUILT_IN_DATATYPES.values.include?(value["@id"])
|
229
|
+
v,w,t = PROPERTIES["@id"].call(value["@id"], base_url, lang)
|
230
|
+
unless w.nil?
|
231
|
+
warnings << w
|
232
|
+
value.delete("@id")
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
if value["base"]
|
237
|
+
if BUILT_IN_DATATYPES.include? value["base"]
|
238
|
+
value["base"] = BUILT_IN_DATATYPES[value["base"]]
|
239
|
+
else
|
240
|
+
value["base"] = BUILT_IN_DATATYPES["string"]
|
241
|
+
warnings << :invalid_datatype_base
|
242
|
+
end
|
243
|
+
else
|
244
|
+
value["base"] = BUILT_IN_DATATYPES["string"]
|
245
|
+
end
|
246
|
+
elsif BUILT_IN_DATATYPES.include? value
|
247
|
+
value = { "@id" => BUILT_IN_DATATYPES[value] }
|
248
|
+
else
|
249
|
+
value = { "@id" => BUILT_IN_DATATYPES["string"] }
|
250
|
+
warnings << :invalid_value
|
251
|
+
end
|
252
|
+
|
253
|
+
unless STRING_DATATYPES.include?(value["base"]) || BINARY_DATATYPES.include?(value["base"])
|
254
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.length"), "datatypes based on #{value["base"]} cannot have a length facet" if value["length"]
|
255
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.minLength"), "datatypes based on #{value["base"]} cannot have a minLength facet" if value["minLength"]
|
256
|
+
raise Csvlint::Csvw::MetadataError.new("datatype.maxLength"), "datatypes based on #{value["base"]} cannot have a maxLength facet" if value["maxLength"]
|
257
|
+
end
|
258
|
+
|
259
|
+
if value["minimum"]
|
260
|
+
value["minInclusive"] = value["minimum"]
|
261
|
+
value.delete("minimum")
|
262
|
+
end
|
263
|
+
if value["maximum"]
|
264
|
+
value["maxInclusive"] = value["maximum"]
|
265
|
+
value.delete("maximum")
|
266
|
+
end
|
267
|
+
|
268
|
+
warnings += convert_value_facet(value, "minInclusive", value["base"])
|
269
|
+
warnings += convert_value_facet(value, "minExclusive", value["base"])
|
270
|
+
warnings += convert_value_facet(value, "maxInclusive", value["base"])
|
271
|
+
warnings += convert_value_facet(value, "maxExclusive", value["base"])
|
272
|
+
|
273
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both minimum/minInclusive (#{value["minInclusive"]}) and minExclusive (#{value["minExclusive"]}" if value["minInclusive"] && value["minExclusive"]
|
274
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both maximum/maxInclusive (#{value["maxInclusive"]}) and maxExclusive (#{value["maxExclusive"]}" if value["maxInclusive"] && value["maxExclusive"]
|
275
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{value["minInclusive"]}) cannot be more than maxInclusive (#{value["maxInclusive"]}" if value["minInclusive"] && value["maxInclusive"] && value["minInclusive"] > value["maxInclusive"]
|
276
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{value["minInclusive"]}) cannot be more than or equal to maxExclusive (#{value["maxExclusive"]}" if value["minInclusive"] && value["maxExclusive"] && value["minInclusive"] >= value["maxExclusive"]
|
277
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{value["minExclusive"]}) cannot be more than or equal to maxExclusive (#{value["maxExclusive"]}" if value["minExclusive"] && value["maxExclusive"] && value["minExclusive"] > value["maxExclusive"]
|
278
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{value["minExclusive"]}) cannot be more than maxInclusive (#{value["maxInclusive"]}" if value["minExclusive"] && value["maxInclusive"] && value["minExclusive"] >= value["maxInclusive"]
|
279
|
+
|
280
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be less than minLength (#{value["minLength"]}" if value["length"] && value["minLength"] && value["length"] < value["minLength"]
|
281
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["length"] && value["maxLength"] && value["length"] > value["maxLength"]
|
282
|
+
raise Csvlint::Csvw::MetadataError.new(""), "datatype minLength (#{value["minLength"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["minLength"] && value["maxLength"] && value["minLength"] > value["maxLength"]
|
283
|
+
|
284
|
+
if value["format"]
|
285
|
+
if REGEXP_FORMAT_DATATYPES.include?(value["base"])
|
286
|
+
begin
|
287
|
+
value["format"] = Regexp.new(value["format"])
|
288
|
+
rescue RegexpError
|
289
|
+
value.delete("format")
|
290
|
+
warnings << :invalid_regex
|
291
|
+
end
|
292
|
+
elsif NUMERIC_FORMAT_DATATYPES.include?(value["base"])
|
293
|
+
value["format"] = { "pattern" => value["format"] } if value["format"].instance_of? String
|
294
|
+
begin
|
295
|
+
value["format"] = Csvlint::Csvw::NumberFormat.new(value["format"]["pattern"], value["format"]["groupChar"], value["format"]["decimalChar"] || ".")
|
296
|
+
rescue Csvlint::Csvw::NumberFormatError
|
297
|
+
value["format"] = Csvlint::Csvw::NumberFormat.new(nil, value["format"]["groupChar"], value["format"]["decimalChar"] || ".")
|
298
|
+
warnings << :invalid_number_format
|
299
|
+
end
|
300
|
+
elsif value["base"] == "http://www.w3.org/2001/XMLSchema#boolean"
|
301
|
+
if value["format"].instance_of? String
|
302
|
+
value["format"] = value["format"].split("|")
|
303
|
+
unless value["format"].length == 2
|
304
|
+
value.delete("format")
|
305
|
+
warnings << :invalid_boolean_format
|
306
|
+
end
|
307
|
+
else
|
308
|
+
value.delete("format")
|
309
|
+
warnings << :invalid_boolean_format
|
310
|
+
end
|
311
|
+
elsif DATE_FORMAT_DATATYPES.include?(value["base"])
|
312
|
+
if value["format"].instance_of? String
|
313
|
+
begin
|
314
|
+
value["format"] = Csvlint::Csvw::DateFormat.new(value["format"])
|
315
|
+
rescue Csvlint::CsvDateFormatError
|
316
|
+
value.delete("format")
|
317
|
+
warnings << :invalid_date_format
|
318
|
+
end
|
319
|
+
else
|
320
|
+
value.delete("format")
|
321
|
+
warnings << :invalid_date_format
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
325
|
+
return value, warnings, :inherited
|
326
|
+
},
|
327
|
+
"required" => boolean_property(:inherited),
|
328
|
+
"ordered" => boolean_property(:inherited),
|
329
|
+
"aboutUrl" => string_property(:inherited),
|
330
|
+
"propertyUrl" => string_property(:inherited),
|
331
|
+
"valueUrl" => string_property(:inherited),
|
332
|
+
"textDirection" => lambda { |value, base_url, lang|
|
333
|
+
value = value.to_sym
|
334
|
+
return value, nil, :inherited if [:ltr, :rtl, :auto, :inherit].include? value
|
335
|
+
return :inherit, :invalid_value, :inherited
|
336
|
+
},
|
337
|
+
# column level properties
|
338
|
+
"virtual" => boolean_property(:column),
|
339
|
+
"titles" => natural_language_property(:column),
|
340
|
+
"name" => lambda { |value, base_url, lang|
|
341
|
+
return value, nil, :column if value.instance_of?(String) && value =~ NAME_REGEXP
|
342
|
+
return nil, :invalid_value, :column
|
343
|
+
},
|
344
|
+
# table level properties
|
345
|
+
"transformations" => lambda { |value, base_url, lang|
|
346
|
+
transformations = []
|
347
|
+
warnings = []
|
348
|
+
if value.instance_of? Array
|
349
|
+
value.each_with_index do |transformation,i|
|
350
|
+
if transformation.instance_of? Hash
|
351
|
+
transformation = transformation.clone
|
352
|
+
transformation.each do |p,v|
|
353
|
+
if p == "@id"
|
354
|
+
raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@id"), "@id starts with _:" if v =~ /^_:/
|
355
|
+
elsif p == "@type"
|
356
|
+
raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@type"), "@type of transformation is not 'Template'" if v != 'Template'
|
357
|
+
elsif p == "url"
|
358
|
+
elsif p == "titles"
|
359
|
+
else
|
360
|
+
v, warning, type = check_property(p, v, base_url, lang)
|
361
|
+
unless type == :transformation && (warning.nil? || warning.empty?)
|
362
|
+
value.delete(p)
|
363
|
+
warnings << :invalid_property unless type == :transformation
|
364
|
+
warnings += Array(warning)
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
transformations << transformation
|
369
|
+
else
|
370
|
+
warnings << :invalid_transformation
|
371
|
+
end
|
372
|
+
end
|
373
|
+
else
|
374
|
+
warnings << :invalid_value
|
375
|
+
end
|
376
|
+
return transformations, warnings, :table
|
377
|
+
},
|
378
|
+
"tableDirection" => lambda { |value, base_url, lang|
|
379
|
+
value = value.to_sym
|
380
|
+
return value, nil, :table if [:ltr, :rtl, :auto].include? value
|
381
|
+
return :auto, :invalid_value, :table
|
382
|
+
},
|
383
|
+
"tableSchema" => lambda { |value, base_url, lang|
|
384
|
+
schema_base_url = base_url
|
385
|
+
schema_lang = lang
|
386
|
+
if value.instance_of? String
|
387
|
+
schema_url = URI.join(base_url, value).to_s
|
388
|
+
schema_base_url = schema_url
|
389
|
+
schema_ref = schema_url.start_with?("file:") ? File.new(schema_url[5..-1]) : schema_url
|
390
|
+
schema = JSON.parse( open(schema_ref).read )
|
391
|
+
schema["@id"] = schema["@id"] ? URI.join(schema_url, schema["@id"]).to_s : schema_url
|
392
|
+
if schema["@context"]
|
393
|
+
if schema["@context"].instance_of?(Array) && schema["@context"].length > 1
|
394
|
+
schema_base_url = schema["@context"][1]["@base"] ? URI.join(schema_base_url, schema["@context"][1]["@base"]).to_s : schema_base_url
|
395
|
+
schema_lang = schema["@context"][1]["@language"] || schema_lang
|
396
|
+
end
|
397
|
+
schema.delete("@context")
|
398
|
+
end
|
399
|
+
elsif value.instance_of? Hash
|
400
|
+
schema = value.clone
|
401
|
+
else
|
402
|
+
return {}, :invalid_value, :table
|
403
|
+
end
|
404
|
+
warnings = []
|
405
|
+
schema.each do |p,v|
|
406
|
+
if p == "@id"
|
407
|
+
raise Csvlint::Csvw::MetadataError.new("tableSchema.@id"), "@id starts with _:" if v =~ /^_:/
|
408
|
+
elsif p == "@type"
|
409
|
+
raise Csvlint::Csvw::MetadataError.new("tableSchema.@type"), "@type of schema is not 'Schema'" if v != 'Schema'
|
410
|
+
else
|
411
|
+
v, warning, type = check_property(p, v, schema_base_url, schema_lang)
|
412
|
+
if (type == :schema || type == :inherited) && (warning.nil? || warning.empty?)
|
413
|
+
schema[p] = v
|
414
|
+
else
|
415
|
+
schema.delete(p)
|
416
|
+
warnings << :invalid_property unless (type == :schema || type == :inherited)
|
417
|
+
warnings += Array(warning)
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
421
|
+
return schema, warnings, :table
|
422
|
+
},
|
423
|
+
"url" => link_property(:table),
|
424
|
+
"dialect" => lambda { |value, base_url, lang|
|
425
|
+
if value.instance_of? Hash
|
426
|
+
value = value.clone
|
427
|
+
warnings = []
|
428
|
+
value.each do |p,v|
|
429
|
+
if p == "@id"
|
430
|
+
raise Csvlint::Csvw::MetadataError.new("dialect.@id"), "@id starts with _:" if v =~ /^_:/
|
431
|
+
elsif p == "@type"
|
432
|
+
raise Csvlint::Csvw::MetadataError.new("dialect.@type"), "@type of dialect is not 'Dialect'" if v != 'Dialect'
|
433
|
+
else
|
434
|
+
v, warning, type = check_property(p, v, base_url, lang)
|
435
|
+
if type == :dialect && (warning.nil? || warning.empty?)
|
436
|
+
value[p] = v
|
437
|
+
else
|
438
|
+
value.delete(p)
|
439
|
+
warnings << :invalid_property unless type == :dialect
|
440
|
+
warnings += Array(warning)
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
444
|
+
return value, warnings, :table
|
445
|
+
else
|
446
|
+
return {}, :invalid_value, :table
|
447
|
+
end
|
448
|
+
},
|
449
|
+
# dialect properties
|
450
|
+
"commentPrefix" => string_property(:dialect),
|
451
|
+
"delimiter" => string_property(:dialect),
|
452
|
+
"doubleQuote" => boolean_property(:dialect),
|
453
|
+
"encoding" => lambda { |value, base_url, lang|
|
454
|
+
return value, nil, :dialect if VALID_ENCODINGS.include? value
|
455
|
+
return nil, :invalid_value, :dialect
|
456
|
+
},
|
457
|
+
"header" => boolean_property(:dialect),
|
458
|
+
"headerRowCount" => numeric_property(:dialect),
|
459
|
+
"lineTerminators" => array_property(:dialect),
|
460
|
+
"quoteChar" => string_property(:dialect),
|
461
|
+
"skipBlankRows" => boolean_property(:dialect),
|
462
|
+
"skipColumns" => numeric_property(:dialect),
|
463
|
+
"skipInitialSpace" => boolean_property(:dialect),
|
464
|
+
"skipRows" => numeric_property(:dialect),
|
465
|
+
"trim" => lambda { |value, base_url, lang|
|
466
|
+
value = :true if value == true || value == "true"
|
467
|
+
value = :false if value == false || value == "false"
|
468
|
+
value = :start if value == "start"
|
469
|
+
value = :end if value == "end"
|
470
|
+
return value, nil, :dialect if [:true, :false, :start, :end].include? value
|
471
|
+
return true, :invalid_value, :dialect
|
472
|
+
},
|
473
|
+
# schema properties
|
474
|
+
"columns" => lambda { |value, base_url, lang| return value, nil, :schema },
|
475
|
+
"primaryKey" => column_reference_property(:schema),
|
476
|
+
"foreignKeys" => lambda { |value, base_url, lang|
|
477
|
+
foreign_keys = []
|
478
|
+
warnings = []
|
479
|
+
if value.instance_of? Array
|
480
|
+
value.each_with_index do |foreign_key,i|
|
481
|
+
if foreign_key.instance_of? Hash
|
482
|
+
foreign_key = foreign_key.clone
|
483
|
+
foreign_key.each do |p,v|
|
484
|
+
v, warning, type = check_property(p, v, base_url, lang)
|
485
|
+
if type == :foreign_key && (warning.nil? || warning.empty?)
|
486
|
+
foreign_key[p] = v
|
487
|
+
elsif p =~ /:/
|
488
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.#{p}"), "foreignKey includes a prefixed (common) property"
|
489
|
+
else
|
490
|
+
foreign_key.delete(p)
|
491
|
+
warnings << :invalid_property unless type == :foreign_key
|
492
|
+
warnings += Array(warning)
|
493
|
+
end
|
494
|
+
end
|
495
|
+
foreign_keys << foreign_key
|
496
|
+
else
|
497
|
+
warnings << :invalid_foreign_key
|
498
|
+
end
|
499
|
+
end
|
500
|
+
else
|
501
|
+
warnings << :invalid_value
|
502
|
+
end
|
503
|
+
return foreign_keys, warnings, :schema
|
504
|
+
},
|
505
|
+
"rowTitles" => column_reference_property(:schema),
|
506
|
+
# transformation properties
|
507
|
+
"targetFormat" => lambda { |value, base_url, lang| return value, nil, :transformation },
|
508
|
+
"scriptFormat" => lambda { |value, base_url, lang| return value, nil, :transformation },
|
509
|
+
"source" => lambda { |value, base_url, lang| return value, nil, :transformation },
|
510
|
+
# foreignKey properties
|
511
|
+
"columnReference" => column_reference_property(:foreign_key),
|
512
|
+
"reference" => lambda { |value, base_url, lang|
|
513
|
+
if value.instance_of? Hash
|
514
|
+
value = value.clone
|
515
|
+
warnings = []
|
516
|
+
value.each do |p,v|
|
517
|
+
if ["resource", "schemaReference", "columnReference"].include? p
|
518
|
+
v, warning, type = check_property(p, v, base_url, lang)
|
519
|
+
if warning.nil? || warning.empty?
|
520
|
+
value[p] = v
|
521
|
+
else
|
522
|
+
value.delete(p)
|
523
|
+
warnings += Array(warning)
|
524
|
+
end
|
525
|
+
elsif p =~ /:/
|
526
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.#{p}"), "foreignKey reference includes a prefixed (common) property"
|
527
|
+
else
|
528
|
+
value.delete(p)
|
529
|
+
warnings << :invalid_property
|
530
|
+
end
|
531
|
+
end
|
532
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.columnReference"), "foreignKey reference columnReference is missing" unless value["columnReference"]
|
533
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference does not have either resource or schemaReference" unless value["resource"] || value["schemaReference"]
|
534
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference has both resource and schemaReference" if value["resource"] && value["schemaReference"]
|
535
|
+
return value, warnings, :foreign_key
|
536
|
+
else
|
537
|
+
raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference is not an object"
|
538
|
+
end
|
539
|
+
},
|
540
|
+
# foreignKey reference properties
|
541
|
+
"resource" => lambda { |value, base_url, lang| return value, nil, :foreign_key_reference },
|
542
|
+
"schemaReference" => lambda { |value, base_url, lang|
|
543
|
+
return URI.join(base_url, value).to_s, nil, :foreign_key_reference
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
547
|
+
NAMESPACES = {
|
548
|
+
"dcat" => "http://www.w3.org/ns/dcat#",
|
549
|
+
"qb" => "http://purl.org/linked-data/cube#",
|
550
|
+
"grddl" => "http://www.w3.org/2003/g/data-view#",
|
551
|
+
"ma" => "http://www.w3.org/ns/ma-ont#",
|
552
|
+
"org" => "http://www.w3.org/ns/org#",
|
553
|
+
"owl" => "http://www.w3.org/2002/07/owl#",
|
554
|
+
"prov" => "http://www.w3.org/ns/prov#",
|
555
|
+
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
556
|
+
"rdfa" => "http://www.w3.org/ns/rdfa#",
|
557
|
+
"rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
|
558
|
+
"rif" => "http://www.w3.org/2007/rif#",
|
559
|
+
"rr" => "http://www.w3.org/ns/r2rml#",
|
560
|
+
"sd" => "http://www.w3.org/ns/sparql-service-description#",
|
561
|
+
"skos" => "http://www.w3.org/2004/02/skos/core#",
|
562
|
+
"skosxl" => "http://www.w3.org/2008/05/skos-xl#",
|
563
|
+
"wdr" => "http://www.w3.org/2007/05/powder#",
|
564
|
+
"void" => "http://rdfs.org/ns/void#",
|
565
|
+
"wdrs" => "http://www.w3.org/2007/05/powder-s#",
|
566
|
+
"xhv" => "http://www.w3.org/1999/xhtml/vocab#",
|
567
|
+
"xml" => "http://www.w3.org/XML/1998/namespace",
|
568
|
+
"xsd" => "http://www.w3.org/2001/XMLSchema#",
|
569
|
+
"cc" => "http://creativecommons.org/ns#",
|
570
|
+
"ctag" => "http://commontag.org/ns#",
|
571
|
+
"dc" => "http://purl.org/dc/terms/",
|
572
|
+
"dcterms" => "http://purl.org/dc/terms/",
|
573
|
+
"dc11" => "http://purl.org/dc/elements/1.1/",
|
574
|
+
"foaf" => "http://xmlns.com/foaf/0.1/",
|
575
|
+
"gr" => "http://purl.org/goodrelations/v1#",
|
576
|
+
"ical" => "http://www.w3.org/2002/12/cal/icaltzd#",
|
577
|
+
"og" => "http://ogp.me/ns#",
|
578
|
+
"rev" => "http://purl.org/stuff/rev#",
|
579
|
+
"sioc" => "http://rdfs.org/sioc/ns#",
|
580
|
+
"v" => "http://rdf.data-vocabulary.org/#",
|
581
|
+
"vcard" => "http://www.w3.org/2006/vcard/ns#",
|
582
|
+
"schema" => "http://schema.org/"
|
583
|
+
}
|
584
|
+
|
585
|
+
BCP47_REGULAR_REGEXP = "(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)"
|
586
|
+
BCP47_IRREGULAR_REGEXP = "(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)"
|
587
|
+
BCP47_GRANDFATHERED_REGEXP = "(?<grandfathered>" + BCP47_IRREGULAR_REGEXP + "|" + BCP47_REGULAR_REGEXP + ")"
|
588
|
+
BCP47_PRIVATE_USE_REGEXP = "(?<privateUse>x(-[A-Za-z0-9]{1,8})+)"
|
589
|
+
BCP47_SINGLETON_REGEXP = "[0-9A-WY-Za-wy-z]"
|
590
|
+
BCP47_EXTENSION_REGEXP = "(?<extension>" + BCP47_SINGLETON_REGEXP + "(-[A-Za-z0-9]{2,8})+)"
|
591
|
+
BCP47_VARIANT_REGEXP = "(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3})"
|
592
|
+
BCP47_REGION_REGEXP = "(?<region>[A-Za-z]{2}|[0-9]{3})"
|
593
|
+
BCP47_SCRIPT_REGEXP = "(?<script>[A-Za-z]{4})"
|
594
|
+
BCP47_EXTLANG_REGEXP = "(?<extlang>[A-Za-z]{3}(-[A-Za-z]{3}){0,2})"
|
595
|
+
BCP47_LANGUAGE_REGEXP = "(?<language>([A-Za-z]{2,3}(-" + BCP47_EXTLANG_REGEXP + ")?)|[A-Za-z]{4}|[A-Za-z]{5,8})"
|
596
|
+
BCP47_LANGTAG_REGEXP = "(" + BCP47_LANGUAGE_REGEXP + "(-" + BCP47_SCRIPT_REGEXP + ")?" + "(-" + BCP47_REGION_REGEXP + ")?" + "(-" + BCP47_VARIANT_REGEXP + ")*" + "(-" + BCP47_EXTENSION_REGEXP + ")*" + "(-" + BCP47_PRIVATE_USE_REGEXP + ")?" + ")"
|
597
|
+
BCP47_LANGUAGETAG_REGEXP = "^(" + BCP47_GRANDFATHERED_REGEXP + "|" + BCP47_LANGTAG_REGEXP + "|" + BCP47_PRIVATE_USE_REGEXP + ")$"
|
598
|
+
BCP47_REGEXP = Regexp.new(BCP47_LANGUAGETAG_REGEXP)
|
599
|
+
|
600
|
+
NAME_REGEXP = /^([A-Za-z0-9]|(%[A-F0-9][A-F0-9]))([A-Za-z0-9_]|(%[A-F0-9][A-F0-9]))*$/
|
601
|
+
|
602
|
+
BUILT_IN_TYPES = ["TableGroup", "Table", "Schema", "Column", "Dialect", "Template", "Datatype"]
|
603
|
+
|
604
|
+
REGEXP_FORMAT_DATATYPES = [
|
605
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
|
606
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
|
607
|
+
"http://www.w3.org/ns/csvw#JSON",
|
608
|
+
"http://www.w3.org/2001/XMLSchema#anyAtomicType",
|
609
|
+
"http://www.w3.org/2001/XMLSchema#anyURI",
|
610
|
+
"http://www.w3.org/2001/XMLSchema#base64Binary",
|
611
|
+
"http://www.w3.org/2001/XMLSchema#duration",
|
612
|
+
"http://www.w3.org/2001/XMLSchema#dayTimeDuration",
|
613
|
+
"http://www.w3.org/2001/XMLSchema#yearMonthDuration",
|
614
|
+
"http://www.w3.org/2001/XMLSchema#hexBinary",
|
615
|
+
"http://www.w3.org/2001/XMLSchema#QName",
|
616
|
+
"http://www.w3.org/2001/XMLSchema#string",
|
617
|
+
"http://www.w3.org/2001/XMLSchema#normalizedString",
|
618
|
+
"http://www.w3.org/2001/XMLSchema#token",
|
619
|
+
"http://www.w3.org/2001/XMLSchema#language",
|
620
|
+
"http://www.w3.org/2001/XMLSchema#Name",
|
621
|
+
"http://www.w3.org/2001/XMLSchema#NMTOKEN"
|
622
|
+
]
|
623
|
+
|
624
|
+
STRING_DATATYPES = [
|
625
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
|
626
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
|
627
|
+
"http://www.w3.org/ns/csvw#JSON",
|
628
|
+
"http://www.w3.org/2001/XMLSchema#string",
|
629
|
+
"http://www.w3.org/2001/XMLSchema#normalizedString",
|
630
|
+
"http://www.w3.org/2001/XMLSchema#token",
|
631
|
+
"http://www.w3.org/2001/XMLSchema#language",
|
632
|
+
"http://www.w3.org/2001/XMLSchema#Name",
|
633
|
+
"http://www.w3.org/2001/XMLSchema#NMTOKEN"
|
634
|
+
]
|
635
|
+
|
636
|
+
BINARY_DATATYPES = [
|
637
|
+
"http://www.w3.org/2001/XMLSchema#base64Binary",
|
638
|
+
"http://www.w3.org/2001/XMLSchema#hexBinary"
|
639
|
+
]
|
640
|
+
|
641
|
+
NUMERIC_FORMAT_DATATYPES = [
|
642
|
+
"http://www.w3.org/2001/XMLSchema#decimal",
|
643
|
+
"http://www.w3.org/2001/XMLSchema#integer",
|
644
|
+
"http://www.w3.org/2001/XMLSchema#long",
|
645
|
+
"http://www.w3.org/2001/XMLSchema#int",
|
646
|
+
"http://www.w3.org/2001/XMLSchema#short",
|
647
|
+
"http://www.w3.org/2001/XMLSchema#byte",
|
648
|
+
"http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
|
649
|
+
"http://www.w3.org/2001/XMLSchema#positiveInteger",
|
650
|
+
"http://www.w3.org/2001/XMLSchema#unsignedLong",
|
651
|
+
"http://www.w3.org/2001/XMLSchema#unsignedInt",
|
652
|
+
"http://www.w3.org/2001/XMLSchema#unsignedShort",
|
653
|
+
"http://www.w3.org/2001/XMLSchema#unsignedByte",
|
654
|
+
"http://www.w3.org/2001/XMLSchema#nonPositiveInteger",
|
655
|
+
"http://www.w3.org/2001/XMLSchema#negativeInteger",
|
656
|
+
"http://www.w3.org/2001/XMLSchema#double",
|
657
|
+
"http://www.w3.org/2001/XMLSchema#float"
|
658
|
+
]
|
659
|
+
|
660
|
+
DATE_FORMAT_DATATYPES = [
|
661
|
+
"http://www.w3.org/2001/XMLSchema#date",
|
662
|
+
"http://www.w3.org/2001/XMLSchema#dateTime",
|
663
|
+
"http://www.w3.org/2001/XMLSchema#dateTimeStamp",
|
664
|
+
"http://www.w3.org/2001/XMLSchema#time"
|
665
|
+
]
|
666
|
+
|
667
|
+
BUILT_IN_DATATYPES = {
|
668
|
+
"number" => "http://www.w3.org/2001/XMLSchema#double",
|
669
|
+
"binary" => "http://www.w3.org/2001/XMLSchema#base64Binary",
|
670
|
+
"datetime" => "http://www.w3.org/2001/XMLSchema#dateTime",
|
671
|
+
"any" => "http://www.w3.org/2001/XMLSchema#anyAtomicType",
|
672
|
+
"xml" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
|
673
|
+
"html" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
|
674
|
+
"json" => "http://www.w3.org/ns/csvw#JSON",
|
675
|
+
"anyAtomicType" => "http://www.w3.org/2001/XMLSchema#anyAtomicType",
|
676
|
+
"anyURI" => "http://www.w3.org/2001/XMLSchema#anyURI",
|
677
|
+
"base64Binary" => "http://www.w3.org/2001/XMLSchema#base64Binary",
|
678
|
+
"boolean" => "http://www.w3.org/2001/XMLSchema#boolean",
|
679
|
+
"date" => "http://www.w3.org/2001/XMLSchema#date",
|
680
|
+
"dateTime" => "http://www.w3.org/2001/XMLSchema#dateTime",
|
681
|
+
"dateTimeStamp" => "http://www.w3.org/2001/XMLSchema#dateTimeStamp",
|
682
|
+
"decimal" => "http://www.w3.org/2001/XMLSchema#decimal",
|
683
|
+
"integer" => "http://www.w3.org/2001/XMLSchema#integer",
|
684
|
+
"long" => "http://www.w3.org/2001/XMLSchema#long",
|
685
|
+
"int" => "http://www.w3.org/2001/XMLSchema#int",
|
686
|
+
"short" => "http://www.w3.org/2001/XMLSchema#short",
|
687
|
+
"byte" => "http://www.w3.org/2001/XMLSchema#byte",
|
688
|
+
"nonNegativeInteger" => "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
|
689
|
+
"positiveInteger" => "http://www.w3.org/2001/XMLSchema#positiveInteger",
|
690
|
+
"unsignedLong" => "http://www.w3.org/2001/XMLSchema#unsignedLong",
|
691
|
+
"unsignedInt" => "http://www.w3.org/2001/XMLSchema#unsignedInt",
|
692
|
+
"unsignedShort" => "http://www.w3.org/2001/XMLSchema#unsignedShort",
|
693
|
+
"unsignedByte" => "http://www.w3.org/2001/XMLSchema#unsignedByte",
|
694
|
+
"nonPositiveInteger" => "http://www.w3.org/2001/XMLSchema#nonPositiveInteger",
|
695
|
+
"negativeInteger" => "http://www.w3.org/2001/XMLSchema#negativeInteger",
|
696
|
+
"double" => "http://www.w3.org/2001/XMLSchema#double",
|
697
|
+
"duration" => "http://www.w3.org/2001/XMLSchema#duration",
|
698
|
+
"dayTimeDuration" => "http://www.w3.org/2001/XMLSchema#dayTimeDuration",
|
699
|
+
"yearMonthDuration" => "http://www.w3.org/2001/XMLSchema#yearMonthDuration",
|
700
|
+
"float" => "http://www.w3.org/2001/XMLSchema#float",
|
701
|
+
"gDay" => "http://www.w3.org/2001/XMLSchema#gDay",
|
702
|
+
"gMonth" => "http://www.w3.org/2001/XMLSchema#gMonth",
|
703
|
+
"gMonthDay" => "http://www.w3.org/2001/XMLSchema#gMonthDay",
|
704
|
+
"gYear" => "http://www.w3.org/2001/XMLSchema#gYear",
|
705
|
+
"gYearMonth" => "http://www.w3.org/2001/XMLSchema#gYearMonth",
|
706
|
+
"hexBinary" => "http://www.w3.org/2001/XMLSchema#hexBinary",
|
707
|
+
"QName" => "http://www.w3.org/2001/XMLSchema#QName",
|
708
|
+
"string" => "http://www.w3.org/2001/XMLSchema#string",
|
709
|
+
"normalizedString" => "http://www.w3.org/2001/XMLSchema#normalizedString",
|
710
|
+
"token" => "http://www.w3.org/2001/XMLSchema#token",
|
711
|
+
"language" => "http://www.w3.org/2001/XMLSchema#language",
|
712
|
+
"Name" => "http://www.w3.org/2001/XMLSchema#Name",
|
713
|
+
"NMTOKEN" => "http://www.w3.org/2001/XMLSchema#NMTOKEN",
|
714
|
+
"time" => "http://www.w3.org/2001/XMLSchema#time"
|
715
|
+
}
|
716
|
+
|
717
|
+
VALID_ENCODINGS = [
|
718
|
+
"utf-8",
|
719
|
+
"ibm866",
|
720
|
+
"iso-8859-2",
|
721
|
+
"iso-8859-3",
|
722
|
+
"iso-8859-4",
|
723
|
+
"iso-8859-5",
|
724
|
+
"iso-8859-6",
|
725
|
+
"iso-8859-7",
|
726
|
+
"iso-8859-8",
|
727
|
+
"iso-8859-8-i",
|
728
|
+
"iso-8859-10",
|
729
|
+
"iso-8859-13",
|
730
|
+
"iso-8859-14",
|
731
|
+
"iso-8859-15",
|
732
|
+
"iso-8859-16",
|
733
|
+
"koi8-r",
|
734
|
+
"koi8-u",
|
735
|
+
"macintosh",
|
736
|
+
"windows-874",
|
737
|
+
"windows-1250",
|
738
|
+
"windows-1251",
|
739
|
+
"windows-1252",
|
740
|
+
"windows-1253",
|
741
|
+
"windows-1254",
|
742
|
+
"windows-1255",
|
743
|
+
"windows-1256",
|
744
|
+
"windows-1257",
|
745
|
+
"windows-1258",
|
746
|
+
"x-mac-cyrillic",
|
747
|
+
"gb18030",
|
748
|
+
"hz-gb-2312",
|
749
|
+
"big5",
|
750
|
+
"euc-jp",
|
751
|
+
"iso-2022-jp",
|
752
|
+
"shift_jis",
|
753
|
+
"euc-kr",
|
754
|
+
"replacement",
|
755
|
+
"utf-16be",
|
756
|
+
"utf-16le",
|
757
|
+
"x-user-defined"
|
758
|
+
]
|
759
|
+
end
|
760
|
+
end
|
761
|
+
end
|