wjordan213-csvlint 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,182 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class DateFormat
4
+
5
+ attr_reader :pattern
6
+
7
+ def initialize(pattern, datatype=nil)
8
+ @pattern = pattern
9
+
10
+ if @pattern.nil?
11
+ @regexp = DEFAULT_REGEXP[datatype]
12
+ @type = datatype
13
+ else
14
+ test_pattern = pattern.clone
15
+ test_pattern.gsub!(/S+/, "")
16
+ FIELDS.keys.sort_by{|f| -f.length}.each do |field|
17
+ test_pattern.gsub!(field, "")
18
+ end
19
+ raise Csvw::DateFormatError, "unrecognised date field symbols in date format" if test_pattern =~ /[GyYuUrQqMLlwWdDFgEecahHKkjJmsSAzZOvVXx]/
20
+
21
+ @regexp = DATE_PATTERN_REGEXP[@pattern]
22
+ @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#time" : "http://www.w3.org/2001/XMLSchema#date"
23
+ @regexp = @regexp || TIME_PATTERN_REGEXP[@pattern]
24
+ @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#dateTime" : @type
25
+ @regexp = @regexp || DATE_TIME_PATTERN_REGEXP[@pattern]
26
+
27
+ if @regexp.nil?
28
+ regexp = @pattern
29
+
30
+ @type = "http://www.w3.org/2001/XMLSchema#date" if !(regexp =~ /HH/) && regexp =~ /yyyy/
31
+ @type = "http://www.w3.org/2001/XMLSchema#time" if regexp =~ /HH/ && !(regexp =~ /yyyy/)
32
+ @type = "http://www.w3.org/2001/XMLSchema#dateTime" if regexp =~ /HH/ && regexp =~ /yyyy/
33
+
34
+ regexp = regexp.sub("HH", FIELDS["HH"].to_s)
35
+ regexp = regexp.sub("mm", FIELDS["mm"].to_s)
36
+ if @pattern =~ /ss\.S+/
37
+ max_fractional_seconds = @pattern.split(".")[-1].length
38
+ regexp = regexp.sub(/ss\.S+$/, "(?<second>#{FIELDS["ss"]}(\.[0-9]{1,#{max_fractional_seconds}})?)")
39
+ else
40
+ regexp = regexp.sub("ss", "(?<second>#{FIELDS["ss"]})")
41
+ end
42
+
43
+ if regexp =~ /yyyy/
44
+ regexp = regexp.sub("yyyy", FIELDS["yyyy"].to_s)
45
+ regexp = regexp.sub("MM", FIELDS["MM"].to_s)
46
+ regexp = regexp.sub("M", FIELDS["M"].to_s)
47
+ regexp = regexp.sub("dd", FIELDS["dd"].to_s)
48
+ regexp = regexp.sub(/d(?=[-T \/\.])/, FIELDS["d"].to_s)
49
+ end
50
+
51
+ regexp = regexp.sub("XXX", FIELDS["XXX"].to_s)
52
+ regexp = regexp.sub("XX", FIELDS["XX"].to_s)
53
+ regexp = regexp.sub("X", FIELDS["X"].to_s)
54
+ regexp = regexp.sub("xxx", FIELDS["xxx"].to_s)
55
+ regexp = regexp.sub("xx", FIELDS["xx"].to_s)
56
+ regexp = regexp.sub(/x(?!:)/, FIELDS["x"].to_s)
57
+
58
+ @regexp = Regexp.new("^#{regexp}$")
59
+ end
60
+ end
61
+ end
62
+
63
+ def match(value)
64
+ value =~ @regexp ? true : false
65
+ end
66
+
67
+ def parse(value)
68
+ match = @regexp.match(value)
69
+ return nil if match.nil?
70
+ # STDERR.puts(@regexp)
71
+ # STDERR.puts(value)
72
+ # STDERR.puts(match.inspect)
73
+ case @type
74
+ when "http://www.w3.org/2001/XMLSchema#date"
75
+ begin
76
+ return Date.new(match["year"].to_i, match["month"].to_i, match["day"].to_i)
77
+ rescue ArgumentError
78
+ return nil
79
+ end
80
+ when "http://www.w3.org/2001/XMLSchema#dateTime"
81
+ begin
82
+ return DateTime.new(match["year"].to_i, match["month"].to_i, match["day"].to_i, match["hour"].to_i, match["minute"].to_i, (match.names.include?("second") ? match["second"].to_f : 0), match.names.include?("timezone") && match["timezone"] ? match["timezone"] : '')
83
+ rescue ArgumentError
84
+ return nil
85
+ end
86
+ else
87
+ value = {}
88
+ match.names.each do |field|
89
+ unless match[field].nil?
90
+ case field
91
+ when "timezone"
92
+ tz = match["timezone"]
93
+ tz = "+00:00" if tz == 'Z'
94
+ tz += ':00' if tz.length == 3
95
+ tz = "#{tz[0..2]}:#{tz[3..4]}" unless tz =~ /:/
96
+ value["timezone"] = tz
97
+ when "second"
98
+ value["second"] = match["second"].to_f
99
+ else
100
+ value[field] = match[field].to_i
101
+ end
102
+ end
103
+ end
104
+ return value
105
+ end
106
+ end
107
+
108
+ private
109
+ FIELDS = {
110
+ "yyyy" => /(?<year>-?([1-9][0-9]{3,}|0[0-9]{3}))/,
111
+ "MM" => /(?<month>0[1-9]|1[0-2])/,
112
+ "M" => /(?<month>[1-9]|1[0-2])/,
113
+ "dd" => /(?<day>0[1-9]|[12][0-9]|3[01])/,
114
+ "d" => /(?<day>[1-9]|[12][0-9]|3[01])/,
115
+ "HH" => /(?<hour>[01][0-9]|2[0-3])/,
116
+ "mm" => /(?<minute>[0-5][0-9])/,
117
+ "ss" => /([0-6][0-9])/,
118
+ "X" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
119
+ "XX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
120
+ "XXX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
121
+ "x" => /(?<timezone>[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
122
+ "xx" => /(?<timezone>[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
123
+ "xxx" => /(?<timezone>[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
124
+ }
125
+
126
+ DATE_PATTERN_REGEXP = {
127
+ "yyyy-MM-dd" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}$"),
128
+ "yyyyMMdd" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["MM"]}#{FIELDS["dd"]}$"),
129
+ "dd-MM-yyyy" => Regexp.new("^#{FIELDS["dd"]}-#{FIELDS["MM"]}-#{FIELDS["yyyy"]}$"),
130
+ "d-M-yyyy" => Regexp.new("^#{FIELDS["d"]}-#{FIELDS["M"]}-#{FIELDS["yyyy"]}$"),
131
+ "MM-dd-yyyy" => Regexp.new("^#{FIELDS["MM"]}-#{FIELDS["dd"]}-#{FIELDS["yyyy"]}$"),
132
+ "M-d-yyyy" => Regexp.new("^#{FIELDS["M"]}-#{FIELDS["d"]}-#{FIELDS["yyyy"]}$"),
133
+ "dd/MM/yyyy" => Regexp.new("^#{FIELDS["dd"]}/#{FIELDS["MM"]}/#{FIELDS["yyyy"]}$"),
134
+ "d/M/yyyy" => Regexp.new("^#{FIELDS["d"]}/#{FIELDS["M"]}/#{FIELDS["yyyy"]}$"),
135
+ "MM/dd/yyyy" => Regexp.new("^#{FIELDS["MM"]}/#{FIELDS["dd"]}/#{FIELDS["yyyy"]}$"),
136
+ "M/d/yyyy" => Regexp.new("^#{FIELDS["M"]}/#{FIELDS["d"]}/#{FIELDS["yyyy"]}$"),
137
+ "dd.MM.yyyy" => Regexp.new("^#{FIELDS["dd"]}.#{FIELDS["MM"]}.#{FIELDS["yyyy"]}$"),
138
+ "d.M.yyyy" => Regexp.new("^#{FIELDS["d"]}.#{FIELDS["M"]}.#{FIELDS["yyyy"]}$"),
139
+ "MM.dd.yyyy" => Regexp.new("^#{FIELDS["MM"]}.#{FIELDS["dd"]}.#{FIELDS["yyyy"]}$"),
140
+ "M.d.yyyy" => Regexp.new("^#{FIELDS["M"]}.#{FIELDS["d"]}.#{FIELDS["yyyy"]}$")
141
+ }
142
+
143
+ TIME_PATTERN_REGEXP = {
144
+ "HH:mm:ss" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
145
+ "HHmmss" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}(?<second>#{FIELDS["ss"]})$"),
146
+ "HH:mm" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}$"),
147
+ "HHmm" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}$")
148
+ }
149
+
150
+ DATE_TIME_PATTERN_REGEXP = {
151
+ "yyyy-MM-ddTHH:mm:ss" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
152
+ "yyyy-MM-ddTHH:mm" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}$")
153
+ }
154
+
155
+ DEFAULT_REGEXP = {
156
+ "http://www.w3.org/2001/XMLSchema#date" =>
157
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
158
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
159
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$"),
160
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
161
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}$"),
162
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
163
+ Regexp.new("^---#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
164
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
165
+ Regexp.new("^--#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
166
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
167
+ Regexp.new("^--#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
168
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
169
+ Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["XXX"]}?$"),
170
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
171
+ Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
172
+ "http://www.w3.org/2001/XMLSchema#time" =>
173
+ Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$")
174
+ }
175
+
176
+ end
177
+
178
+ class DateFormatError < StandardError
179
+
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,13 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class MetadataError < StandardError
4
+
5
+ attr_reader :path
6
+
7
+ def initialize(path=nil)
8
+ @path = path
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,211 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class NumberFormat
4
+
5
+ attr_reader :pattern, :prefix, :numeric_part, :suffix, :grouping_separator, :decimal_separator, :primary_grouping_size, :secondary_grouping_size, :fractional_grouping_size
6
+
7
+ def initialize(pattern=nil, grouping_separator=nil, decimal_separator=".")
8
+ @pattern = pattern
9
+ @grouping_separator = grouping_separator || (@pattern.nil? ? nil : ",")
10
+ @decimal_separator = decimal_separator || "."
11
+ if pattern.nil?
12
+ @regexp = Regexp.new("^(([-+]?[0-9]+(#{Regexp.escape(@decimal_separator)}[0-9]+)?([Ee][-+]?[0-9]+)?[%‰]?)|NaN|INF|-INF)$")
13
+ else
14
+ numeric_part_regexp = Regexp.new("(?<numeric_part>([0#Ee]|#{Regexp.escape(@grouping_separator)}|#{Regexp.escape(@decimal_separator)})+)")
15
+ number_format_regexp = Regexp.new("^(?<prefix>.*?)#{numeric_part_regexp}(?<suffix>.*?)$")
16
+ match = number_format_regexp.match(pattern)
17
+ raise Csvw::NumberFormatError, "invalid number format" if match.nil?
18
+
19
+ @prefix = match["prefix"]
20
+ @numeric_part = match["numeric_part"]
21
+ @suffix = match["suffix"]
22
+
23
+ parts = @numeric_part.split("E")
24
+ mantissa_part = parts[0]
25
+ exponent_part = parts[1] || ""
26
+ mantissa_parts = mantissa_part.split(@decimal_separator)
27
+ # raise Csvw::NumberFormatError, "more than two decimal separators in number format" if parts.length > 2
28
+ integer_part = mantissa_parts[0]
29
+ fractional_part = mantissa_parts[1] || ""
30
+
31
+ @integer_pattern = exponent_part == "" && fractional_part == ""
32
+
33
+ min_integer_digits = integer_part.gsub(@grouping_separator, "").gsub("#", "").length
34
+ min_fraction_digits = fractional_part.gsub(@grouping_separator, "").gsub("#", "").length
35
+ max_fraction_digits = fractional_part.gsub(@grouping_separator, "").length
36
+ min_exponent_digits = exponent_part.gsub("#", "").length
37
+ max_exponent_digits = exponent_part.length
38
+
39
+ integer_parts = integer_part.split(@grouping_separator)[1..-1]
40
+ @primary_grouping_size = integer_parts[-1].length rescue 0
41
+ @secondary_grouping_size = integer_parts[-2].length rescue @primary_grouping_size
42
+
43
+ fractional_parts = fractional_part.split(@grouping_separator)[0..-2]
44
+ @fractional_grouping_size = fractional_parts[0].length rescue 0
45
+
46
+ numeric_part_regexp = "[-+]?"
47
+
48
+ if @primary_grouping_size == 0
49
+ integer_regexp = "[0-9]*[0-9]{#{min_integer_digits}}"
50
+ else
51
+ leading_regexp = "([0-9]{0,#{@secondary_grouping_size - 1}}#{Regexp.escape(@grouping_separator)})?"
52
+ secondary_groups = "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)})*"
53
+ if min_integer_digits > @primary_grouping_size
54
+ remaining_req_digits = min_integer_digits - @primary_grouping_size
55
+ req_secondary_groups = remaining_req_digits / @secondary_grouping_size > 0 ? "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}){#{remaining_req_digits / @secondary_grouping_size}}" : ""
56
+ if remaining_req_digits % @secondary_grouping_size > 0
57
+ final_req_digits = "[0-9]{#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
58
+ final_opt_digits = "[0-9]{0,#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
59
+ integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{remaining_req_digits % @secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
60
+ else
61
+ integer_regexp = "(#{leading_regexp}#{secondary_groups})?#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
62
+ end
63
+ else
64
+ final_req_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{#{@primary_grouping_size - min_integer_digits}}" : ""
65
+ final_opt_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{0,#{@primary_grouping_size - min_integer_digits}}" : ""
66
+ integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{min_integer_digits}}"
67
+ end
68
+ end
69
+
70
+ numeric_part_regexp += integer_regexp
71
+
72
+ if max_fraction_digits > 0
73
+ if @fractional_grouping_size == 0
74
+ fractional_regexp = ""
75
+ fractional_regexp += "[0-9]{#{min_fraction_digits}}" if min_fraction_digits > 0
76
+ fractional_regexp += "[0-9]{0,#{max_fraction_digits - min_fraction_digits}}" unless min_fraction_digits == max_fraction_digits
77
+ fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}"
78
+ fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0
79
+ numeric_part_regexp += fractional_regexp
80
+ else
81
+ fractional_regexp = ""
82
+
83
+ if min_fraction_digits > 0
84
+ if min_fraction_digits >= @fractional_grouping_size
85
+ # first group of required digits - something like "[0-9]{3}"
86
+ fractional_regexp += "[0-9]{#{@fractional_grouping_size}}"
87
+ # additional groups of required digits - something like "(,[0-9]{3}){1}"
88
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){#{min_fraction_digits / @fractional_grouping_size - 1}}" if min_fraction_digits / @fractional_grouping_size > 1
89
+ fractional_regexp += "#{Regexp.escape(@grouping_separator)}" if min_fraction_digits % @fractional_grouping_size > 0
90
+ end
91
+ # additional required digits - something like ",[0-9]{1}"
92
+ fractional_regexp += "[0-9]{#{min_fraction_digits % @fractional_grouping_size}}" if min_fraction_digits % @fractional_grouping_size > 0
93
+
94
+ opt_fractional_digits = max_fraction_digits - min_fraction_digits
95
+ if opt_fractional_digits > 0
96
+ fractional_regexp += "("
97
+
98
+ if min_fraction_digits % @fractional_grouping_size > 0
99
+ # optional fractional digits to complete the group
100
+ fractional_regexp += "[0-9]{0,#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}"
101
+ fractional_regexp += "|"
102
+ fractional_regexp += "[0-9]{#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}"
103
+ else
104
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?"
105
+ fractional_regexp += "|"
106
+ fractional_regexp += "#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}"
107
+ end
108
+
109
+ remaining_opt_fractional_digits = opt_fractional_digits - (@fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size))
110
+ if remaining_opt_fractional_digits > 0
111
+ if remaining_opt_fractional_digits % @fractional_grouping_size > 0
112
+ # optional fraction digits in groups
113
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{remaining_opt_fractional_digits / @fractional_grouping_size}}" if remaining_opt_fractional_digits > @fractional_grouping_size
114
+ # remaining optional fraction digits
115
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?"
116
+ else
117
+ # optional fraction digits in groups
118
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size
119
+ # remaining optional fraction digits
120
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?"
121
+ end
122
+
123
+ # optional fraction digits in groups
124
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size
125
+ # remaining optional fraction digits
126
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?" if remaining_opt_fractional_digits % @fractional_grouping_size > 0
127
+ end
128
+ fractional_regexp += ")"
129
+ end
130
+ elsif max_fraction_digits % @fractional_grouping_size > 0
131
+ # optional fractional digits in groups
132
+ fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{max_fraction_digits / @fractional_grouping_size}}"
133
+ # remaining optional fraction digits
134
+ fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{max_fraction_digits % @fractional_grouping_size}})?" if max_fraction_digits % @fractional_grouping_size > 0
135
+ else
136
+ fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{(max_fraction_digits / @fractional_grouping_size) - 1}}" if max_fraction_digits > @fractional_grouping_size
137
+ fractional_regexp += "[0-9]{1,#{@fractional_grouping_size}}"
138
+ end
139
+ fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}"
140
+ fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0
141
+ numeric_part_regexp += fractional_regexp
142
+ end
143
+ end
144
+
145
+ if max_exponent_digits > 0
146
+ numeric_part_regexp += "E"
147
+ numeric_part_regexp += "[0-9]{0,#{max_exponent_digits - min_exponent_digits}}" unless max_exponent_digits == min_exponent_digits
148
+ numeric_part_regexp += "[0-9]{#{min_exponent_digits}}" unless min_exponent_digits == 0
149
+ end
150
+
151
+ @regexp = Regexp.new("^(?<prefix>#{Regexp.escape(@prefix)})(?<numeric_part>#{numeric_part_regexp})(?<suffix>#{suffix})$")
152
+ end
153
+ end
154
+
155
+ def match(value)
156
+ value =~ @regexp ? true : false
157
+ end
158
+
159
+ def parse(value)
160
+ if @pattern.nil?
161
+ return nil if !@grouping_separator.nil? && value =~ Regexp.new("((^#{Regexp.escape(@grouping_separator)})|#{Regexp.escape(@grouping_separator)}{2})")
162
+ value.gsub!(@grouping_separator, "") unless @grouping_separator.nil?
163
+ if value =~ INTEGER_REGEXP
164
+ case value[-1]
165
+ when "%"
166
+ return value.to_f / 100
167
+ when "‰"
168
+ return value.to_f / 1000
169
+ else
170
+ return value.to_i
171
+ end
172
+ elsif value =~ @regexp
173
+ case value
174
+ when "NaN"
175
+ return Float::NAN
176
+ when "INF"
177
+ return Float::INFINITY
178
+ when "-INF"
179
+ return -Float::INFINITY
180
+ else
181
+ case value[-1]
182
+ when "%"
183
+ return value.to_f / 100
184
+ when "‰"
185
+ return value.to_f / 1000
186
+ else
187
+ return value.to_f
188
+ end
189
+ end
190
+ else
191
+ return nil
192
+ end
193
+ else
194
+ match = @regexp.match(value)
195
+ return nil if match.nil?
196
+ number = match["numeric_part"].gsub(@grouping_separator, "")
197
+ return number.to_i if @integer_pattern
198
+ return number.to_f
199
+ end
200
+ end
201
+
202
+ private
203
+ INTEGER_REGEXP = /^[-+]?[0-9]+[%‰]?$/
204
+
205
+ end
206
+
207
+ class NumberFormatError < StandardError
208
+
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,761 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class PropertyChecker
4
+
5
+ class << self
6
+
7
+ def check_property(property, value, base_url, lang)
8
+ if PROPERTIES.include? property
9
+ return PROPERTIES[property].call(value, base_url, lang)
10
+ elsif property =~ /^([a-z]+):/ && NAMESPACES.include?(property.split(":")[0])
11
+ value, warnings = check_common_property_value(value, base_url, lang)
12
+ return value, warnings, :annotation
13
+ else
14
+ return value, :invalid_property, nil
15
+ end
16
+ end
17
+
18
+ private
19
+ def check_common_property_value(value, base_url, lang)
20
+ case value
21
+ when Hash
22
+ value = value.clone
23
+ warnings = []
24
+ value.each do |p,v|
25
+ case p
26
+ when "@context"
27
+ raise Csvlint::Csvw::MetadataError.new(p), "common property has @context property"
28
+ when "@list"
29
+ raise Csvlint::Csvw::MetadataError.new(p), "common property has @list property"
30
+ when "@set"
31
+ raise Csvlint::Csvw::MetadataError.new(p), "common property has @set property"
32
+ when "@type"
33
+ if value["@value"] && BUILT_IN_DATATYPES.include?(v)
34
+ elsif !value["@value"] && BUILT_IN_TYPES.include?(v)
35
+ elsif v =~ /^([a-z]+):/ && NAMESPACES.include?(v.split(":")[0])
36
+ else
37
+ # must be an absolute URI
38
+ begin
39
+ raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @type (#{v})" if URI(v).scheme.nil?
40
+ rescue
41
+ raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @type (#{v})"
42
+ end
43
+ end
44
+ when "@id"
45
+ unless base_url.nil?
46
+ begin
47
+ v = URI.join(base_url, v)
48
+ rescue
49
+ raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @id (#{v})"
50
+ end
51
+ end
52
+ when "@value"
53
+ raise Csvlint::Csvw::MetadataError.new(), "common property with @value has both @language and @type" if value["@type"] && value["@language"]
54
+ raise Csvlint::Csvw::MetadataError.new(), "common property with @value has properties other than @language or @type" unless value.except("@type").except("@language").except("@value").empty?
55
+ when "@language"
56
+ raise Csvlint::Csvw::MetadataError.new(), "common property with @language lacks a @value" unless value["@value"]
57
+ raise Csvlint::Csvw::MetadataError.new(), "common property has invalid @language (#{v})" unless v =~ BCP47_LANGUAGE_REGEXP || v.nil?
58
+ else
59
+ if p[0] == "@"
60
+ raise Csvlint::Csvw::MetadataError.new(), "common property has property other than @id, @type, @value or @language beginning with @ (#{p})"
61
+ end
62
+ end
63
+ if v.instance_of? Hash
64
+ v, w = check_common_property_value(v, base_url, lang)
65
+ warnings += Array(w)
66
+ end
67
+ value[p] = v
68
+ end
69
+ return value, warnings
70
+ else
71
+ return value, nil
72
+ end
73
+ end
74
+
75
+ def convert_value_facet(value, property, datatype)
76
+ if value[property]
77
+ if DATE_FORMAT_DATATYPES.include?(datatype)
78
+ format = Csvlint::Csvw::DateFormat.new(nil, datatype)
79
+ v = format.parse(value[property])
80
+ if v.nil?
81
+ value.delete(property)
82
+ return [":invalid_#{property}".to_sym]
83
+ else
84
+ value[property] = v
85
+ return []
86
+ end
87
+ elsif NUMERIC_FORMAT_DATATYPES.include?(datatype)
88
+ return []
89
+ else
90
+ raise Csvlint::Csvw::MetadataError.new("datatype.#{property}"), "#{property} is only allowed for numeric, date/time and duration types"
91
+ end
92
+ end
93
+ return []
94
+ end
95
+
96
+ def array_property(type)
97
+ return lambda { |value, base_url, lang|
98
+ return value, nil, type if value.instance_of? Array
99
+ return false, :invalid_value, type
100
+ }
101
+ end
102
+
103
+ def boolean_property(type)
104
+ return lambda { |value, base_url, lang|
105
+ return value, nil, type if value == true || value == false
106
+ return false, :invalid_value, type
107
+ }
108
+ end
109
+
110
+ def string_property(type)
111
+ return lambda { |value, base_url, lang|
112
+ return value, nil, type if value.instance_of? String
113
+ return "", :invalid_value, type
114
+ }
115
+ end
116
+
117
+ def numeric_property(type)
118
+ return lambda { |value, base_url, lang|
119
+ return value, nil, type if value.kind_of?(Integer) && value >= 0
120
+ return nil, :invalid_value, type
121
+ }
122
+ end
123
+
124
+ def link_property(type)
125
+ return lambda { |value, base_url, lang|
126
+ raise Csvlint::Csvw::MetadataError.new(), "URL #{value} starts with _:" if value.to_s =~ /^_:/
127
+ return (base_url.nil? ? URI(value) : URI.join(base_url, value)), nil, type if value.instance_of? String
128
+ return base_url, :invalid_value, type
129
+ }
130
+ end
131
+
132
+ def language_property(type)
133
+ return lambda { |value, base_url, lang|
134
+ return value, nil, type if value =~ BCP47_REGEXP
135
+ return nil, :invalid_value, type
136
+ }
137
+ end
138
+
139
+ def natural_language_property(type)
140
+ return lambda { |value, base_url, lang|
141
+ warnings = []
142
+ if value.instance_of? String
143
+ return { lang => [ value ] }, nil, type
144
+ elsif value.instance_of? Array
145
+ valid_titles = []
146
+ value.each do |title|
147
+ if title.instance_of? String
148
+ valid_titles << title
149
+ else
150
+ warnings << :invalid_value
151
+ end
152
+ end
153
+ return { lang => valid_titles }, warnings, type
154
+ elsif value.instance_of? Hash
155
+ value = value.clone
156
+ value.each do |l,v|
157
+ if l =~ BCP47_REGEXP
158
+ valid_titles = []
159
+ Array(v).each do |title|
160
+ if title.instance_of? String
161
+ valid_titles << title
162
+ else
163
+ warnings << :invalid_value
164
+ end
165
+ end
166
+ value[l] = valid_titles
167
+ else
168
+ value.delete(l)
169
+ warnings << :invalid_language
170
+ end
171
+ end
172
+ warnings << :invalid_value if value.empty?
173
+ return value, warnings, type
174
+ else
175
+ return {}, :invalid_value, type
176
+ end
177
+ }
178
+ end
179
+
180
+ def column_reference_property(type)
181
+ return lambda { |value, base_url, lang|
182
+ return Array(value), nil, type
183
+ }
184
+ end
185
+
186
+
187
+ end
188
+
189
+ PROPERTIES = {
190
+ # context properties
191
+ "@language" => language_property(:context),
192
+ "@base" => link_property(:context),
193
+ # common properties
194
+ "@id" => link_property(:common),
195
+ "notes" => array_property(:common),
196
+ "suppressOutput" => boolean_property(:common),
197
+ # inherited properties
198
+ "null" => lambda { |value, base_url, lang|
199
+ case value
200
+ when String
201
+ return [value], nil, :inherited
202
+ when Array
203
+ values = []
204
+ warnings = []
205
+ value.each do |v|
206
+ if v.instance_of? String
207
+ values << v
208
+ else
209
+ warnings << :invalid_value
210
+ end
211
+ end
212
+ return values, warnings, :inherited
213
+ else
214
+ return [""], :invalid_value, :inherited
215
+ end
216
+ },
217
+ "default" => string_property(:inherited),
218
+ "separator" => lambda { |value, base_url, lang|
219
+ return value, nil, :inherited if value.instance_of?(String) || value.nil?
220
+ return nil, :invalid_value, :inherited
221
+ },
222
+ "lang" => language_property(:inherited),
223
+ "datatype" => lambda { |value, base_url, lang|
224
+ value = value.clone
225
+ warnings = []
226
+ if value.instance_of? Hash
227
+ if value["@id"]
228
+ raise Csvlint::Csvw::MetadataError.new("datatype.@id"), "datatype @id must not be the id of a built-in datatype (#{value["@id"]})" if BUILT_IN_DATATYPES.values.include?(value["@id"])
229
+ v,w,t = PROPERTIES["@id"].call(value["@id"], base_url, lang)
230
+ unless w.nil?
231
+ warnings << w
232
+ value.delete("@id")
233
+ end
234
+ end
235
+
236
+ if value["base"]
237
+ if BUILT_IN_DATATYPES.include? value["base"]
238
+ value["base"] = BUILT_IN_DATATYPES[value["base"]]
239
+ else
240
+ value["base"] = BUILT_IN_DATATYPES["string"]
241
+ warnings << :invalid_datatype_base
242
+ end
243
+ else
244
+ value["base"] = BUILT_IN_DATATYPES["string"]
245
+ end
246
+ elsif BUILT_IN_DATATYPES.include? value
247
+ value = { "@id" => BUILT_IN_DATATYPES[value] }
248
+ else
249
+ value = { "@id" => BUILT_IN_DATATYPES["string"] }
250
+ warnings << :invalid_value
251
+ end
252
+
253
+ unless STRING_DATATYPES.include?(value["base"]) || BINARY_DATATYPES.include?(value["base"])
254
+ raise Csvlint::Csvw::MetadataError.new("datatype.length"), "datatypes based on #{value["base"]} cannot have a length facet" if value["length"]
255
+ raise Csvlint::Csvw::MetadataError.new("datatype.minLength"), "datatypes based on #{value["base"]} cannot have a minLength facet" if value["minLength"]
256
+ raise Csvlint::Csvw::MetadataError.new("datatype.maxLength"), "datatypes based on #{value["base"]} cannot have a maxLength facet" if value["maxLength"]
257
+ end
258
+
259
+ if value["minimum"]
260
+ value["minInclusive"] = value["minimum"]
261
+ value.delete("minimum")
262
+ end
263
+ if value["maximum"]
264
+ value["maxInclusive"] = value["maximum"]
265
+ value.delete("maximum")
266
+ end
267
+
268
+ warnings += convert_value_facet(value, "minInclusive", value["base"])
269
+ warnings += convert_value_facet(value, "minExclusive", value["base"])
270
+ warnings += convert_value_facet(value, "maxInclusive", value["base"])
271
+ warnings += convert_value_facet(value, "maxExclusive", value["base"])
272
+
273
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both minimum/minInclusive (#{value["minInclusive"]}) and minExclusive (#{value["minExclusive"]}" if value["minInclusive"] && value["minExclusive"]
274
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both maximum/maxInclusive (#{value["maxInclusive"]}) and maxExclusive (#{value["maxExclusive"]}" if value["maxInclusive"] && value["maxExclusive"]
275
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{value["minInclusive"]}) cannot be more than maxInclusive (#{value["maxInclusive"]}" if value["minInclusive"] && value["maxInclusive"] && value["minInclusive"] > value["maxInclusive"]
276
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{value["minInclusive"]}) cannot be more than or equal to maxExclusive (#{value["maxExclusive"]}" if value["minInclusive"] && value["maxExclusive"] && value["minInclusive"] >= value["maxExclusive"]
277
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{value["minExclusive"]}) cannot be more than or equal to maxExclusive (#{value["maxExclusive"]}" if value["minExclusive"] && value["maxExclusive"] && value["minExclusive"] > value["maxExclusive"]
278
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{value["minExclusive"]}) cannot be more than maxInclusive (#{value["maxInclusive"]}" if value["minExclusive"] && value["maxInclusive"] && value["minExclusive"] >= value["maxInclusive"]
279
+
280
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be less than minLength (#{value["minLength"]}" if value["length"] && value["minLength"] && value["length"] < value["minLength"]
281
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["length"] && value["maxLength"] && value["length"] > value["maxLength"]
282
+ raise Csvlint::Csvw::MetadataError.new(""), "datatype minLength (#{value["minLength"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["minLength"] && value["maxLength"] && value["minLength"] > value["maxLength"]
283
+
284
+ if value["format"]
285
+ if REGEXP_FORMAT_DATATYPES.include?(value["base"])
286
+ begin
287
+ value["format"] = Regexp.new(value["format"])
288
+ rescue RegexpError
289
+ value.delete("format")
290
+ warnings << :invalid_regex
291
+ end
292
+ elsif NUMERIC_FORMAT_DATATYPES.include?(value["base"])
293
+ value["format"] = { "pattern" => value["format"] } if value["format"].instance_of? String
294
+ begin
295
+ value["format"] = Csvlint::Csvw::NumberFormat.new(value["format"]["pattern"], value["format"]["groupChar"], value["format"]["decimalChar"] || ".")
296
+ rescue Csvlint::Csvw::NumberFormatError
297
+ value["format"] = Csvlint::Csvw::NumberFormat.new(nil, value["format"]["groupChar"], value["format"]["decimalChar"] || ".")
298
+ warnings << :invalid_number_format
299
+ end
300
+ elsif value["base"] == "http://www.w3.org/2001/XMLSchema#boolean"
301
+ if value["format"].instance_of? String
302
+ value["format"] = value["format"].split("|")
303
+ unless value["format"].length == 2
304
+ value.delete("format")
305
+ warnings << :invalid_boolean_format
306
+ end
307
+ else
308
+ value.delete("format")
309
+ warnings << :invalid_boolean_format
310
+ end
311
+ elsif DATE_FORMAT_DATATYPES.include?(value["base"])
312
+ if value["format"].instance_of? String
313
+ begin
314
+ value["format"] = Csvlint::Csvw::DateFormat.new(value["format"])
315
+ rescue Csvlint::CsvDateFormatError
316
+ value.delete("format")
317
+ warnings << :invalid_date_format
318
+ end
319
+ else
320
+ value.delete("format")
321
+ warnings << :invalid_date_format
322
+ end
323
+ end
324
+ end
325
+ return value, warnings, :inherited
326
+ },
327
+ "required" => boolean_property(:inherited),
328
+ "ordered" => boolean_property(:inherited),
329
+ "aboutUrl" => string_property(:inherited),
330
+ "propertyUrl" => string_property(:inherited),
331
+ "valueUrl" => string_property(:inherited),
332
+ "textDirection" => lambda { |value, base_url, lang|
333
+ value = value.to_sym
334
+ return value, nil, :inherited if [:ltr, :rtl, :auto, :inherit].include? value
335
+ return :inherit, :invalid_value, :inherited
336
+ },
337
+ # column level properties
338
+ "virtual" => boolean_property(:column),
339
+ "titles" => natural_language_property(:column),
340
+ "name" => lambda { |value, base_url, lang|
341
+ return value, nil, :column if value.instance_of?(String) && value =~ NAME_REGEXP
342
+ return nil, :invalid_value, :column
343
+ },
344
+ # table level properties
345
+ "transformations" => lambda { |value, base_url, lang|
346
+ transformations = []
347
+ warnings = []
348
+ if value.instance_of? Array
349
+ value.each_with_index do |transformation,i|
350
+ if transformation.instance_of? Hash
351
+ transformation = transformation.clone
352
+ transformation.each do |p,v|
353
+ if p == "@id"
354
+ raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@id"), "@id starts with _:" if v =~ /^_:/
355
+ elsif p == "@type"
356
+ raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@type"), "@type of transformation is not 'Template'" if v != 'Template'
357
+ elsif p == "url"
358
+ elsif p == "titles"
359
+ else
360
+ v, warning, type = check_property(p, v, base_url, lang)
361
+ unless type == :transformation && (warning.nil? || warning.empty?)
362
+ value.delete(p)
363
+ warnings << :invalid_property unless type == :transformation
364
+ warnings += Array(warning)
365
+ end
366
+ end
367
+ end
368
+ transformations << transformation
369
+ else
370
+ warnings << :invalid_transformation
371
+ end
372
+ end
373
+ else
374
+ warnings << :invalid_value
375
+ end
376
+ return transformations, warnings, :table
377
+ },
378
+ "tableDirection" => lambda { |value, base_url, lang|
379
+ value = value.to_sym
380
+ return value, nil, :table if [:ltr, :rtl, :auto].include? value
381
+ return :auto, :invalid_value, :table
382
+ },
383
+ "tableSchema" => lambda { |value, base_url, lang|
384
+ schema_base_url = base_url
385
+ schema_lang = lang
386
+ if value.instance_of? String
387
+ schema_url = URI.join(base_url, value).to_s
388
+ schema_base_url = schema_url
389
+ schema_ref = schema_url.start_with?("file:") ? File.new(schema_url[5..-1]) : schema_url
390
+ schema = JSON.parse( open(schema_ref).read )
391
+ schema["@id"] = schema["@id"] ? URI.join(schema_url, schema["@id"]).to_s : schema_url
392
+ if schema["@context"]
393
+ if schema["@context"].instance_of?(Array) && schema["@context"].length > 1
394
+ schema_base_url = schema["@context"][1]["@base"] ? URI.join(schema_base_url, schema["@context"][1]["@base"]).to_s : schema_base_url
395
+ schema_lang = schema["@context"][1]["@language"] || schema_lang
396
+ end
397
+ schema.delete("@context")
398
+ end
399
+ elsif value.instance_of? Hash
400
+ schema = value.clone
401
+ else
402
+ return {}, :invalid_value, :table
403
+ end
404
+ warnings = []
405
+ schema.each do |p,v|
406
+ if p == "@id"
407
+ raise Csvlint::Csvw::MetadataError.new("tableSchema.@id"), "@id starts with _:" if v =~ /^_:/
408
+ elsif p == "@type"
409
+ raise Csvlint::Csvw::MetadataError.new("tableSchema.@type"), "@type of schema is not 'Schema'" if v != 'Schema'
410
+ else
411
+ v, warning, type = check_property(p, v, schema_base_url, schema_lang)
412
+ if (type == :schema || type == :inherited) && (warning.nil? || warning.empty?)
413
+ schema[p] = v
414
+ else
415
+ schema.delete(p)
416
+ warnings << :invalid_property unless (type == :schema || type == :inherited)
417
+ warnings += Array(warning)
418
+ end
419
+ end
420
+ end
421
+ return schema, warnings, :table
422
+ },
423
+ "url" => link_property(:table),
424
+ "dialect" => lambda { |value, base_url, lang|
425
+ if value.instance_of? Hash
426
+ value = value.clone
427
+ warnings = []
428
+ value.each do |p,v|
429
+ if p == "@id"
430
+ raise Csvlint::Csvw::MetadataError.new("dialect.@id"), "@id starts with _:" if v =~ /^_:/
431
+ elsif p == "@type"
432
+ raise Csvlint::Csvw::MetadataError.new("dialect.@type"), "@type of dialect is not 'Dialect'" if v != 'Dialect'
433
+ else
434
+ v, warning, type = check_property(p, v, base_url, lang)
435
+ if type == :dialect && (warning.nil? || warning.empty?)
436
+ value[p] = v
437
+ else
438
+ value.delete(p)
439
+ warnings << :invalid_property unless type == :dialect
440
+ warnings += Array(warning)
441
+ end
442
+ end
443
+ end
444
+ return value, warnings, :table
445
+ else
446
+ return {}, :invalid_value, :table
447
+ end
448
+ },
449
+ # dialect properties
450
+ "commentPrefix" => string_property(:dialect),
451
+ "delimiter" => string_property(:dialect),
452
+ "doubleQuote" => boolean_property(:dialect),
453
+ "encoding" => lambda { |value, base_url, lang|
454
+ return value, nil, :dialect if VALID_ENCODINGS.include? value
455
+ return nil, :invalid_value, :dialect
456
+ },
457
+ "header" => boolean_property(:dialect),
458
+ "headerRowCount" => numeric_property(:dialect),
459
+ "lineTerminators" => array_property(:dialect),
460
+ "quoteChar" => string_property(:dialect),
461
+ "skipBlankRows" => boolean_property(:dialect),
462
+ "skipColumns" => numeric_property(:dialect),
463
+ "skipInitialSpace" => boolean_property(:dialect),
464
+ "skipRows" => numeric_property(:dialect),
465
+ "trim" => lambda { |value, base_url, lang|
466
+ value = :true if value == true || value == "true"
467
+ value = :false if value == false || value == "false"
468
+ value = :start if value == "start"
469
+ value = :end if value == "end"
470
+ return value, nil, :dialect if [:true, :false, :start, :end].include? value
471
+ return true, :invalid_value, :dialect
472
+ },
473
+ # schema properties
474
+ "columns" => lambda { |value, base_url, lang| return value, nil, :schema },
475
+ "primaryKey" => column_reference_property(:schema),
476
+ "foreignKeys" => lambda { |value, base_url, lang|
477
+ foreign_keys = []
478
+ warnings = []
479
+ if value.instance_of? Array
480
+ value.each_with_index do |foreign_key,i|
481
+ if foreign_key.instance_of? Hash
482
+ foreign_key = foreign_key.clone
483
+ foreign_key.each do |p,v|
484
+ v, warning, type = check_property(p, v, base_url, lang)
485
+ if type == :foreign_key && (warning.nil? || warning.empty?)
486
+ foreign_key[p] = v
487
+ elsif p =~ /:/
488
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.#{p}"), "foreignKey includes a prefixed (common) property"
489
+ else
490
+ foreign_key.delete(p)
491
+ warnings << :invalid_property unless type == :foreign_key
492
+ warnings += Array(warning)
493
+ end
494
+ end
495
+ foreign_keys << foreign_key
496
+ else
497
+ warnings << :invalid_foreign_key
498
+ end
499
+ end
500
+ else
501
+ warnings << :invalid_value
502
+ end
503
+ return foreign_keys, warnings, :schema
504
+ },
505
+ "rowTitles" => column_reference_property(:schema),
506
+ # transformation properties
507
+ "targetFormat" => lambda { |value, base_url, lang| return value, nil, :transformation },
508
+ "scriptFormat" => lambda { |value, base_url, lang| return value, nil, :transformation },
509
+ "source" => lambda { |value, base_url, lang| return value, nil, :transformation },
510
+ # foreignKey properties
511
+ "columnReference" => column_reference_property(:foreign_key),
512
+ "reference" => lambda { |value, base_url, lang|
513
+ if value.instance_of? Hash
514
+ value = value.clone
515
+ warnings = []
516
+ value.each do |p,v|
517
+ if ["resource", "schemaReference", "columnReference"].include? p
518
+ v, warning, type = check_property(p, v, base_url, lang)
519
+ if warning.nil? || warning.empty?
520
+ value[p] = v
521
+ else
522
+ value.delete(p)
523
+ warnings += Array(warning)
524
+ end
525
+ elsif p =~ /:/
526
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.#{p}"), "foreignKey reference includes a prefixed (common) property"
527
+ else
528
+ value.delete(p)
529
+ warnings << :invalid_property
530
+ end
531
+ end
532
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.columnReference"), "foreignKey reference columnReference is missing" unless value["columnReference"]
533
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference does not have either resource or schemaReference" unless value["resource"] || value["schemaReference"]
534
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference has both resource and schemaReference" if value["resource"] && value["schemaReference"]
535
+ return value, warnings, :foreign_key
536
+ else
537
+ raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference is not an object"
538
+ end
539
+ },
540
+ # foreignKey reference properties
541
+ "resource" => lambda { |value, base_url, lang| return value, nil, :foreign_key_reference },
542
+ "schemaReference" => lambda { |value, base_url, lang|
543
+ return URI.join(base_url, value).to_s, nil, :foreign_key_reference
544
+ }
545
+ }
546
+
547
+ NAMESPACES = {
548
+ "dcat" => "http://www.w3.org/ns/dcat#",
549
+ "qb" => "http://purl.org/linked-data/cube#",
550
+ "grddl" => "http://www.w3.org/2003/g/data-view#",
551
+ "ma" => "http://www.w3.org/ns/ma-ont#",
552
+ "org" => "http://www.w3.org/ns/org#",
553
+ "owl" => "http://www.w3.org/2002/07/owl#",
554
+ "prov" => "http://www.w3.org/ns/prov#",
555
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
556
+ "rdfa" => "http://www.w3.org/ns/rdfa#",
557
+ "rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
558
+ "rif" => "http://www.w3.org/2007/rif#",
559
+ "rr" => "http://www.w3.org/ns/r2rml#",
560
+ "sd" => "http://www.w3.org/ns/sparql-service-description#",
561
+ "skos" => "http://www.w3.org/2004/02/skos/core#",
562
+ "skosxl" => "http://www.w3.org/2008/05/skos-xl#",
563
+ "wdr" => "http://www.w3.org/2007/05/powder#",
564
+ "void" => "http://rdfs.org/ns/void#",
565
+ "wdrs" => "http://www.w3.org/2007/05/powder-s#",
566
+ "xhv" => "http://www.w3.org/1999/xhtml/vocab#",
567
+ "xml" => "http://www.w3.org/XML/1998/namespace",
568
+ "xsd" => "http://www.w3.org/2001/XMLSchema#",
569
+ "cc" => "http://creativecommons.org/ns#",
570
+ "ctag" => "http://commontag.org/ns#",
571
+ "dc" => "http://purl.org/dc/terms/",
572
+ "dcterms" => "http://purl.org/dc/terms/",
573
+ "dc11" => "http://purl.org/dc/elements/1.1/",
574
+ "foaf" => "http://xmlns.com/foaf/0.1/",
575
+ "gr" => "http://purl.org/goodrelations/v1#",
576
+ "ical" => "http://www.w3.org/2002/12/cal/icaltzd#",
577
+ "og" => "http://ogp.me/ns#",
578
+ "rev" => "http://purl.org/stuff/rev#",
579
+ "sioc" => "http://rdfs.org/sioc/ns#",
580
+ "v" => "http://rdf.data-vocabulary.org/#",
581
+ "vcard" => "http://www.w3.org/2006/vcard/ns#",
582
+ "schema" => "http://schema.org/"
583
+ }
584
+
585
+ BCP47_REGULAR_REGEXP = "(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)"
586
+ BCP47_IRREGULAR_REGEXP = "(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)"
587
+ BCP47_GRANDFATHERED_REGEXP = "(?<grandfathered>" + BCP47_IRREGULAR_REGEXP + "|" + BCP47_REGULAR_REGEXP + ")"
588
+ BCP47_PRIVATE_USE_REGEXP = "(?<privateUse>x(-[A-Za-z0-9]{1,8})+)"
589
+ BCP47_SINGLETON_REGEXP = "[0-9A-WY-Za-wy-z]"
590
+ BCP47_EXTENSION_REGEXP = "(?<extension>" + BCP47_SINGLETON_REGEXP + "(-[A-Za-z0-9]{2,8})+)"
591
+ BCP47_VARIANT_REGEXP = "(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3})"
592
+ BCP47_REGION_REGEXP = "(?<region>[A-Za-z]{2}|[0-9]{3})"
593
+ BCP47_SCRIPT_REGEXP = "(?<script>[A-Za-z]{4})"
594
+ BCP47_EXTLANG_REGEXP = "(?<extlang>[A-Za-z]{3}(-[A-Za-z]{3}){0,2})"
595
+ BCP47_LANGUAGE_REGEXP = "(?<language>([A-Za-z]{2,3}(-" + BCP47_EXTLANG_REGEXP + ")?)|[A-Za-z]{4}|[A-Za-z]{5,8})"
596
+ BCP47_LANGTAG_REGEXP = "(" + BCP47_LANGUAGE_REGEXP + "(-" + BCP47_SCRIPT_REGEXP + ")?" + "(-" + BCP47_REGION_REGEXP + ")?" + "(-" + BCP47_VARIANT_REGEXP + ")*" + "(-" + BCP47_EXTENSION_REGEXP + ")*" + "(-" + BCP47_PRIVATE_USE_REGEXP + ")?" + ")"
597
+ BCP47_LANGUAGETAG_REGEXP = "^(" + BCP47_GRANDFATHERED_REGEXP + "|" + BCP47_LANGTAG_REGEXP + "|" + BCP47_PRIVATE_USE_REGEXP + ")$"
598
+ BCP47_REGEXP = Regexp.new(BCP47_LANGUAGETAG_REGEXP)
599
+
600
+ NAME_REGEXP = /^([A-Za-z0-9]|(%[A-F0-9][A-F0-9]))([A-Za-z0-9_]|(%[A-F0-9][A-F0-9]))*$/
601
+
602
+ BUILT_IN_TYPES = ["TableGroup", "Table", "Schema", "Column", "Dialect", "Template", "Datatype"]
603
+
604
+ REGEXP_FORMAT_DATATYPES = [
605
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
606
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
607
+ "http://www.w3.org/ns/csvw#JSON",
608
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType",
609
+ "http://www.w3.org/2001/XMLSchema#anyURI",
610
+ "http://www.w3.org/2001/XMLSchema#base64Binary",
611
+ "http://www.w3.org/2001/XMLSchema#duration",
612
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration",
613
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration",
614
+ "http://www.w3.org/2001/XMLSchema#hexBinary",
615
+ "http://www.w3.org/2001/XMLSchema#QName",
616
+ "http://www.w3.org/2001/XMLSchema#string",
617
+ "http://www.w3.org/2001/XMLSchema#normalizedString",
618
+ "http://www.w3.org/2001/XMLSchema#token",
619
+ "http://www.w3.org/2001/XMLSchema#language",
620
+ "http://www.w3.org/2001/XMLSchema#Name",
621
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN"
622
+ ]
623
+
624
+ STRING_DATATYPES = [
625
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
626
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
627
+ "http://www.w3.org/ns/csvw#JSON",
628
+ "http://www.w3.org/2001/XMLSchema#string",
629
+ "http://www.w3.org/2001/XMLSchema#normalizedString",
630
+ "http://www.w3.org/2001/XMLSchema#token",
631
+ "http://www.w3.org/2001/XMLSchema#language",
632
+ "http://www.w3.org/2001/XMLSchema#Name",
633
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN"
634
+ ]
635
+
636
+ BINARY_DATATYPES = [
637
+ "http://www.w3.org/2001/XMLSchema#base64Binary",
638
+ "http://www.w3.org/2001/XMLSchema#hexBinary"
639
+ ]
640
+
641
+ NUMERIC_FORMAT_DATATYPES = [
642
+ "http://www.w3.org/2001/XMLSchema#decimal",
643
+ "http://www.w3.org/2001/XMLSchema#integer",
644
+ "http://www.w3.org/2001/XMLSchema#long",
645
+ "http://www.w3.org/2001/XMLSchema#int",
646
+ "http://www.w3.org/2001/XMLSchema#short",
647
+ "http://www.w3.org/2001/XMLSchema#byte",
648
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
649
+ "http://www.w3.org/2001/XMLSchema#positiveInteger",
650
+ "http://www.w3.org/2001/XMLSchema#unsignedLong",
651
+ "http://www.w3.org/2001/XMLSchema#unsignedInt",
652
+ "http://www.w3.org/2001/XMLSchema#unsignedShort",
653
+ "http://www.w3.org/2001/XMLSchema#unsignedByte",
654
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger",
655
+ "http://www.w3.org/2001/XMLSchema#negativeInteger",
656
+ "http://www.w3.org/2001/XMLSchema#double",
657
+ "http://www.w3.org/2001/XMLSchema#float"
658
+ ]
659
+
660
+ DATE_FORMAT_DATATYPES = [
661
+ "http://www.w3.org/2001/XMLSchema#date",
662
+ "http://www.w3.org/2001/XMLSchema#dateTime",
663
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp",
664
+ "http://www.w3.org/2001/XMLSchema#time"
665
+ ]
666
+
667
+ BUILT_IN_DATATYPES = {
668
+ "number" => "http://www.w3.org/2001/XMLSchema#double",
669
+ "binary" => "http://www.w3.org/2001/XMLSchema#base64Binary",
670
+ "datetime" => "http://www.w3.org/2001/XMLSchema#dateTime",
671
+ "any" => "http://www.w3.org/2001/XMLSchema#anyAtomicType",
672
+ "xml" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
673
+ "html" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML",
674
+ "json" => "http://www.w3.org/ns/csvw#JSON",
675
+ "anyAtomicType" => "http://www.w3.org/2001/XMLSchema#anyAtomicType",
676
+ "anyURI" => "http://www.w3.org/2001/XMLSchema#anyURI",
677
+ "base64Binary" => "http://www.w3.org/2001/XMLSchema#base64Binary",
678
+ "boolean" => "http://www.w3.org/2001/XMLSchema#boolean",
679
+ "date" => "http://www.w3.org/2001/XMLSchema#date",
680
+ "dateTime" => "http://www.w3.org/2001/XMLSchema#dateTime",
681
+ "dateTimeStamp" => "http://www.w3.org/2001/XMLSchema#dateTimeStamp",
682
+ "decimal" => "http://www.w3.org/2001/XMLSchema#decimal",
683
+ "integer" => "http://www.w3.org/2001/XMLSchema#integer",
684
+ "long" => "http://www.w3.org/2001/XMLSchema#long",
685
+ "int" => "http://www.w3.org/2001/XMLSchema#int",
686
+ "short" => "http://www.w3.org/2001/XMLSchema#short",
687
+ "byte" => "http://www.w3.org/2001/XMLSchema#byte",
688
+ "nonNegativeInteger" => "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
689
+ "positiveInteger" => "http://www.w3.org/2001/XMLSchema#positiveInteger",
690
+ "unsignedLong" => "http://www.w3.org/2001/XMLSchema#unsignedLong",
691
+ "unsignedInt" => "http://www.w3.org/2001/XMLSchema#unsignedInt",
692
+ "unsignedShort" => "http://www.w3.org/2001/XMLSchema#unsignedShort",
693
+ "unsignedByte" => "http://www.w3.org/2001/XMLSchema#unsignedByte",
694
+ "nonPositiveInteger" => "http://www.w3.org/2001/XMLSchema#nonPositiveInteger",
695
+ "negativeInteger" => "http://www.w3.org/2001/XMLSchema#negativeInteger",
696
+ "double" => "http://www.w3.org/2001/XMLSchema#double",
697
+ "duration" => "http://www.w3.org/2001/XMLSchema#duration",
698
+ "dayTimeDuration" => "http://www.w3.org/2001/XMLSchema#dayTimeDuration",
699
+ "yearMonthDuration" => "http://www.w3.org/2001/XMLSchema#yearMonthDuration",
700
+ "float" => "http://www.w3.org/2001/XMLSchema#float",
701
+ "gDay" => "http://www.w3.org/2001/XMLSchema#gDay",
702
+ "gMonth" => "http://www.w3.org/2001/XMLSchema#gMonth",
703
+ "gMonthDay" => "http://www.w3.org/2001/XMLSchema#gMonthDay",
704
+ "gYear" => "http://www.w3.org/2001/XMLSchema#gYear",
705
+ "gYearMonth" => "http://www.w3.org/2001/XMLSchema#gYearMonth",
706
+ "hexBinary" => "http://www.w3.org/2001/XMLSchema#hexBinary",
707
+ "QName" => "http://www.w3.org/2001/XMLSchema#QName",
708
+ "string" => "http://www.w3.org/2001/XMLSchema#string",
709
+ "normalizedString" => "http://www.w3.org/2001/XMLSchema#normalizedString",
710
+ "token" => "http://www.w3.org/2001/XMLSchema#token",
711
+ "language" => "http://www.w3.org/2001/XMLSchema#language",
712
+ "Name" => "http://www.w3.org/2001/XMLSchema#Name",
713
+ "NMTOKEN" => "http://www.w3.org/2001/XMLSchema#NMTOKEN",
714
+ "time" => "http://www.w3.org/2001/XMLSchema#time"
715
+ }
716
+
717
+ VALID_ENCODINGS = [
718
+ "utf-8",
719
+ "ibm866",
720
+ "iso-8859-2",
721
+ "iso-8859-3",
722
+ "iso-8859-4",
723
+ "iso-8859-5",
724
+ "iso-8859-6",
725
+ "iso-8859-7",
726
+ "iso-8859-8",
727
+ "iso-8859-8-i",
728
+ "iso-8859-10",
729
+ "iso-8859-13",
730
+ "iso-8859-14",
731
+ "iso-8859-15",
732
+ "iso-8859-16",
733
+ "koi8-r",
734
+ "koi8-u",
735
+ "macintosh",
736
+ "windows-874",
737
+ "windows-1250",
738
+ "windows-1251",
739
+ "windows-1252",
740
+ "windows-1253",
741
+ "windows-1254",
742
+ "windows-1255",
743
+ "windows-1256",
744
+ "windows-1257",
745
+ "windows-1258",
746
+ "x-mac-cyrillic",
747
+ "gb18030",
748
+ "hz-gb-2312",
749
+ "big5",
750
+ "euc-jp",
751
+ "iso-2022-jp",
752
+ "shift_jis",
753
+ "euc-kr",
754
+ "replacement",
755
+ "utf-16be",
756
+ "utf-16le",
757
+ "x-user-defined"
758
+ ]
759
+ end
760
+ end
761
+ end