wjordan213-csvlint 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,86 @@
1
+ Feature: Validation warnings
2
+
3
+ Scenario: UTF-8 Encoding
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "col1","col2","col3"
7
+ "abc","2","3"
8
+ """
9
+ And it is encoded as "utf-8"
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if there are warnings
12
+ Then there should be 0 warnings
13
+
14
+ Scenario: ISO-8859-1 Encoding
15
+ Given I have a CSV with the following content:
16
+ """
17
+ "col1","col2","col3"
18
+ "1","2","3"
19
+ """
20
+ And it is encoded as "iso-8859-1"
21
+ And it is stored at the url "http://example.com/example1.csv"
22
+ When I ask if there are warnings
23
+ Then there should be 1 warnings
24
+
25
+ Scenario: Correct content type
26
+ Given I have a CSV with the following content:
27
+ """
28
+ "col1","col2","col3"
29
+ "abc","2","3"
30
+ """
31
+ And the content type is set to "text/csv"
32
+ And it is stored at the url "http://example.com/example1.csv"
33
+ And I ask if there are warnings
34
+ Then there should be 0 warnings
35
+
36
+ Scenario: No extension
37
+ Given I have a CSV with the following content:
38
+ """
39
+ "col1","col2","col3"
40
+ "abc","2","3"
41
+ """
42
+ And the content type is set to "text/csv"
43
+ And it is stored at the url "http://example.com/example1"
44
+ And I ask if there are warnings
45
+ Then there should be 0 warnings
46
+
47
+ Scenario: Allow query params after extension
48
+ Given I have a CSV with the following content:
49
+ """
50
+ "col1","col2","col3"
51
+ "abc","2","3"
52
+ """
53
+ And the content type is set to "text/csv"
54
+ And it is stored at the url "http://example.com/example1.csv?query=param"
55
+ And I ask if there are warnings
56
+ Then there should be 0 warnings
57
+
58
+ Scenario: User doesn't supply encoding
59
+ Given I have a CSV with the following content:
60
+ """
61
+ "col1","col2","col3"
62
+ "abc","2","3"
63
+ """
64
+ And it is stored at the url "http://example.com/example1.csv" with no character set
65
+ When I ask if there are warnings
66
+ Then there should be 1 warnings
67
+ And that warning should have the type "no_encoding"
68
+
69
+ Scenario: Title rows
70
+ Given I have a CSV file called "title-row.csv"
71
+ And it is stored at the url "http://example.com/example1.csv"
72
+ And I ask if there are warnings
73
+ Then there should be 1 warnings
74
+ And that warning should have the type "title_row"
75
+
76
+ Scenario: catch excel warnings
77
+ Given I parse a file called "spreadsheet.xls"
78
+ And I ask if there are warnings
79
+ Then there should be 1 warnings
80
+ And that warning should have the type "excel"
81
+
82
+ Scenario: catch excel warnings
83
+ Given I parse a file called "spreadsheet.xlsx"
84
+ And I ask if there are warnings
85
+ Then there should be 1 warnings
86
+ And that warning should have the type "excel"
data/lib/csvlint.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'csv'
2
+ require 'date'
3
+ require 'open-uri'
4
+ require 'set'
5
+ require 'tempfile'
6
+ require 'typhoeus'
7
+
8
+ require 'active_support/core_ext/date/conversions'
9
+ require 'active_support/core_ext/time/conversions'
10
+ require 'mime/types'
11
+ require 'open_uri_redirections'
12
+ require 'uri_template'
13
+
14
+ require 'csvlint/error_message'
15
+ require 'csvlint/error_collector'
16
+ require 'csvlint/validate'
17
+ require 'csvlint/field'
18
+
19
+ require 'csvlint/csvw/metadata_error'
20
+ require 'csvlint/csvw/number_format'
21
+ require 'csvlint/csvw/date_format'
22
+ require 'csvlint/csvw/property_checker'
23
+ require 'csvlint/csvw/column'
24
+ require 'csvlint/csvw/table'
25
+ require 'csvlint/csvw/table_group'
26
+
27
+ require 'csvlint/schema'
@@ -0,0 +1,165 @@
1
+ require 'csvlint'
2
+ require 'colorize'
3
+ require 'json'
4
+ require 'pp'
5
+ require 'thor'
6
+
7
+ module Csvlint
8
+ class Cli < Thor
9
+
10
+ desc "myfile.csv OR csvlint http://example.com/myfile.csv", "Supports validating CSV files to check their syntax and contents"
11
+ option :dump_errors, desc: "Pretty print error and warning objects.", type: :boolean, aliases: :d
12
+ option :schema, banner: "FILENAME OR URL", desc: "Schema file", aliases: :s
13
+ def validate(source = nil)
14
+ source = read_source(source)
15
+ @schema = get_schema(options[:schema]) if options[:schema]
16
+ fetch_schema_tables(@schema, options) if source.nil?
17
+
18
+ valid = validate_csv(source, @schema, options[:dump])
19
+ exit 1 unless valid
20
+ end
21
+
22
+ def help
23
+ self.class.command_help(shell, :validate)
24
+ end
25
+
26
+ default_task :validate
27
+
28
+ private
29
+
30
+ def read_source(source)
31
+ if source.nil?
32
+ # If no source is present, try reading from stdin
33
+ if !$stdin.tty?
34
+ source = StringIO.new(STDIN.read) rescue nil
35
+ return_error "No CSV data to validate" if !options[:schema] && source.nil?
36
+ end
37
+ else
38
+ # If the source isn't a URL, it's a file
39
+ unless source =~ /^http(s)?/
40
+ begin
41
+ source = File.new( source )
42
+ rescue Errno::ENOENT
43
+ return_error "#{source} not found"
44
+ end
45
+ end
46
+ end
47
+
48
+ source
49
+ end
50
+
51
+ def get_schema(schema)
52
+ begin
53
+ schema = Csvlint::Schema.load_from_json(schema, false)
54
+ rescue Csvlint::Csvw::MetadataError => e
55
+ return_error "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
56
+ rescue OpenURI::HTTPError, Errno::ENOENT
57
+ return_error "#{options[:schema]} not found"
58
+ end
59
+
60
+ if schema.class == Csvlint::Schema && schema.description == "malformed"
61
+ return_error "invalid metadata: malformed JSON"
62
+ end
63
+
64
+ schema
65
+ end
66
+
67
+ def fetch_schema_tables(schema, options)
68
+ valid = true
69
+
70
+ unless schema.instance_of? Csvlint::Csvw::TableGroup
71
+ return_error "No CSV data to validate."
72
+ end
73
+ schema.tables.keys.each do |source|
74
+ begin
75
+ source = source.sub("file:","")
76
+ source = File.new( source )
77
+ rescue Errno::ENOENT
78
+ return_error "#{source} not found"
79
+ end unless source =~ /^http(s)?/
80
+ valid &= validate_csv(source, schema, options[:dump])
81
+ end
82
+
83
+ exit 1 unless valid
84
+ end
85
+
86
+ def print_error(index, error, dump, color)
87
+ location = ""
88
+ location += error.row.to_s if error.row
89
+ location += "#{error.row ? "," : ""}#{error.column.to_s}" if error.column
90
+ if error.row || error.column
91
+ location = "#{error.row ? "Row" : "Column"}: #{location}"
92
+ end
93
+ output_string = "#{index+1}. "
94
+ if error.column && @schema && @schema.class == Csvlint::Schema
95
+ output_string += "#{@schema.fields[error.column - 1].name}: "
96
+ end
97
+ output_string += "#{error.type}"
98
+ output_string += ". #{location}" unless location.empty?
99
+ output_string += ". #{error.content}" if error.content
100
+
101
+ if $stdout.tty?
102
+ puts output_string.colorize(color)
103
+ else
104
+ puts output_string
105
+ end
106
+
107
+ if dump
108
+ pp error
109
+ end
110
+ end
111
+
112
+ def print_errors(errors, dump)
113
+ if errors.size > 0
114
+ errors.each_with_index { |error, i| print_error(i, error, dump, :red) }
115
+ end
116
+ end
117
+
118
+ def return_error(message)
119
+ if $stdout.tty?
120
+ puts message.colorize(:red)
121
+ else
122
+ puts message
123
+ end
124
+ exit 1
125
+ end
126
+
127
+ def validate_csv(source, schema, dump)
128
+ @error_count = 0
129
+
130
+ validator = Csvlint::Validator.new( source, {}, schema, { lambda: report_lines } )
131
+
132
+ if source.class == String
133
+ csv = source
134
+ elsif source.class == File
135
+ csv = source.path
136
+ else
137
+ csv = "CSV"
138
+ end
139
+
140
+ if $stdout.tty?
141
+ puts "\r\n#{csv} is #{validator.valid? ? "VALID".green : "INVALID".red}"
142
+ else
143
+ puts "\r\n#{csv} is #{validator.valid? ? "VALID" : "INVALID"}"
144
+ end
145
+
146
+ print_errors(validator.errors, dump)
147
+ print_errors(validator.warnings, dump)
148
+
149
+ return validator.valid?
150
+ end
151
+
152
+ def report_lines
153
+ lambda do |row|
154
+ new_errors = row.errors.count
155
+ if new_errors > @error_count
156
+ print "!".red
157
+ else
158
+ print ".".green
159
+ end
160
+ @error_count = new_errors
161
+ end
162
+ end
163
+
164
+ end
165
+ end
@@ -0,0 +1,359 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class Column
4
+ include Csvlint::ErrorCollector
5
+
6
+ attr_reader :id, :about_url, :datatype, :default, :lang, :name, :null, :number, :ordered, :property_url, :required, :separator, :source_number, :suppress_output, :text_direction, :titles, :value_url, :virtual, :annotations
7
+
8
+ def initialize(number, name, id: nil, about_url: nil, datatype: { "@id" => "http://www.w3.org/2001/XMLSchema#string" }, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
9
+ @number = number
10
+ @name = name
11
+ @id = id
12
+ @about_url = about_url
13
+ @datatype = datatype
14
+ @default = default
15
+ @lang = lang
16
+ @null = null
17
+ @ordered = ordered
18
+ @property_url = property_url
19
+ @required = required
20
+ @separator = separator
21
+ @source_number = source_number || number
22
+ @suppress_output = suppress_output
23
+ @text_direction = text_direction
24
+ @titles = titles
25
+ @value_url = value_url
26
+ @virtual = virtual
27
+ @annotations = annotations
28
+ reset
29
+ @warnings += warnings
30
+ end
31
+
32
+ def self.from_json(number, column_desc, base_url=nil, lang="und", inherited_properties={})
33
+ annotations = {}
34
+ warnings = []
35
+ column_properties = {}
36
+ inherited_properties = inherited_properties.clone
37
+
38
+ column_desc.each do |property,value|
39
+ if property == "@type"
40
+ raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != 'Column'
41
+ else
42
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
43
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
44
+ if type == :annotation
45
+ annotations[property] = v
46
+ elsif type == :common || type == :column
47
+ column_properties[property] = v
48
+ elsif type == :inherited
49
+ inherited_properties[property] = v
50
+ else
51
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "column: #{property}", nil)
52
+ end
53
+ end
54
+ end
55
+
56
+ return self.new(number, column_properties["name"],
57
+ id: column_properties["@id"],
58
+ datatype: inherited_properties["datatype"] || { "@id" => "http://www.w3.org/2001/XMLSchema#string" },
59
+ lang: inherited_properties["lang"] || "und",
60
+ null: inherited_properties["null"] || [""],
61
+ property_url: column_desc["propertyUrl"],
62
+ required: inherited_properties["required"] || false,
63
+ separator: inherited_properties["separator"],
64
+ titles: column_properties["titles"],
65
+ virtual: column_properties["virtual"] || false,
66
+ annotations: annotations,
67
+ warnings: warnings
68
+ )
69
+ end
70
+
71
+ def validate_header(header)
72
+ reset
73
+ valid_headers = @titles ? @titles.map{ |l,v| v if Column.languages_match(l, lang) }.flatten : []
74
+ build_errors(:invalid_header, :schema, 1, @number, header, @titles) unless valid_headers.include? header
75
+ return valid?
76
+ end
77
+
78
+ def validate(string_value, row=nil)
79
+ reset
80
+ values = parse(string_value || "", row)
81
+ # STDERR.puts "#{name} - #{string_value.inspect} - #{values.inspect}"
82
+ values.each do |value|
83
+ validate_required(value, row)
84
+ validate_format(value, row)
85
+ validate_length(value, row)
86
+ validate_value(value, row)
87
+ end unless values.nil?
88
+ validate_required(values, row) if values.nil?
89
+ return valid?
90
+ end
91
+
92
+ def parse(string_value, row=nil)
93
+ return nil if null.include? string_value
94
+ string_values = @separator.nil? ? [string_value] : string_value.split(@separator)
95
+ values = []
96
+ string_values.each do |s|
97
+ value, warning = DATATYPE_PARSER[@datatype["base"] || @datatype["@id"]].call(s, @datatype["format"])
98
+ if warning.nil?
99
+ values << value
100
+ else
101
+ build_errors(warning, :schema, row, @number, s, @datatype)
102
+ values << s
103
+ end
104
+ end
105
+ return values
106
+ end
107
+
108
+ private
109
+ class << self
110
+
111
+ def create_date_parser(type, warning)
112
+ return lambda { |value, format|
113
+ format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
114
+ v = format.parse(value)
115
+ return nil, warning if v.nil?
116
+ return v, nil
117
+ }
118
+ end
119
+
120
+ def create_regexp_based_parser(regexp, warning)
121
+ return lambda { |value, format|
122
+ return nil, warning unless value =~ regexp
123
+ return value, nil
124
+ }
125
+ end
126
+
127
+ def languages_match(l1, l2)
128
+ return true if l1 == l2 || l1 == "und" || l2 == "und"
129
+ return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
130
+ return false
131
+ end
132
+ end
133
+
134
+ def validate_required(value, row)
135
+ build_errors(:required, :schema, row, number, value, { "required" => @required }) if @required && value.nil?
136
+ end
137
+
138
+ def validate_length(value, row)
139
+ if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
140
+ length = value.length
141
+ length = value.gsub(/==?$/,"").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
142
+ length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
143
+
144
+ build_errors(:min_length, :schema, row, number, value, { "minLength" => datatype["minLength"] }) if datatype["minLength"] && length < datatype["minLength"]
145
+ build_errors(:max_length, :schema, row, number, value, { "maxLength" => datatype["maxLength"] }) if datatype["maxLength"] && length > datatype["maxLength"]
146
+ build_errors(:length, :schema, row, number, value, { "length" => datatype["length"] }) if datatype["length"] && length != datatype["length"]
147
+ end
148
+ end
149
+
150
+ def validate_format(value, row)
151
+ if datatype["format"]
152
+ build_errors(:format, :schema, row, number, value, { "format" => datatype["format"] }) unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
153
+ end
154
+ end
155
+
156
+ def validate_value(value, row)
157
+ build_errors(:min_inclusive, :schema, row, number, value, { "minInclusive" => datatype["minInclusive"] }) if datatype["minInclusive"] && value < datatype["minInclusive"]
158
+ build_errors(:max_inclusive, :schema, row, number, value, { "maxInclusive" => datatype["maxInclusive"] }) if datatype["maxInclusive"] && value > datatype["maxInclusive"]
159
+ build_errors(:min_exclusive, :schema, row, number, value, { "minExclusive" => datatype["minExclusive"] }) if datatype["minExclusive"] && value <= datatype["minExclusive"]
160
+ build_errors(:max_exclusive, :schema, row, number, value, { "maxExclusive" => datatype["maxExclusive"] }) if datatype["maxExclusive"] && value >= datatype["maxExclusive"]
161
+ end
162
+
163
+ REGEXP_VALIDATION = lambda { |value, format| value =~ format }
164
+
165
+ NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
166
+
167
+ DATATYPE_FORMAT_VALIDATION = {
168
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
169
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
170
+ "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
171
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
172
+ "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
173
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
174
+ "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
175
+ "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
176
+ "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
177
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
178
+ "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
179
+ "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
180
+ "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
181
+ "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
182
+ "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
183
+ "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
184
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
185
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
186
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
187
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
188
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
189
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
190
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
191
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
192
+ "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
193
+ "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
194
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
195
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
196
+ "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
197
+ "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
198
+ "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
199
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
200
+ "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
201
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
202
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
203
+ "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
204
+ "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
205
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
206
+ "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
207
+ "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
208
+ "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
209
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
210
+ "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
211
+ }
212
+
213
+ ALL_VALUES_VALID = lambda { |value, format| return value, nil }
214
+
215
+ NUMERIC_PARSER = lambda { |value, format|
216
+ format = Csvlint::Csvw::NumberFormat.new() if format.nil?
217
+ v = format.parse(value)
218
+ return nil, :invalid_number if v.nil?
219
+ return v, nil
220
+ }
221
+
222
+ DATATYPE_PARSER = {
223
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => ALL_VALUES_VALID,
224
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => ALL_VALUES_VALID,
225
+ "http://www.w3.org/ns/csvw#JSON" => ALL_VALUES_VALID,
226
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
227
+ "http://www.w3.org/2001/XMLSchema#anyURI" => ALL_VALUES_VALID,
228
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => ALL_VALUES_VALID,
229
+ "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
230
+ if format.nil?
231
+ return true, nil if ["true", "1"].include? value
232
+ return false, nil if ["false", "0"].include? value
233
+ else
234
+ return true, nil if value == format[0]
235
+ return false, nil if value == format[1]
236
+ end
237
+ return value, :invalid_boolean
238
+ },
239
+ "http://www.w3.org/2001/XMLSchema#date" =>
240
+ create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
241
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
242
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
243
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
244
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
245
+ "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
246
+ return nil, :invalid_decimal if value =~ /(E|^(NaN|INF|-INF)$)/
247
+ return NUMERIC_PARSER.call(value, format)
248
+ },
249
+ "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
250
+ v, w = NUMERIC_PARSER.call(value, format)
251
+ return v, :invalid_integer unless w.nil?
252
+ return nil, :invalid_integer unless v.kind_of? Integer
253
+ return v, w
254
+ },
255
+ "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
256
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
257
+ return v, :invalid_long unless w.nil?
258
+ return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
259
+ return v, w
260
+ },
261
+ "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
262
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
263
+ return v, :invalid_int unless w.nil?
264
+ return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
265
+ return v, w
266
+ },
267
+ "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
268
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
269
+ return v, :invalid_short unless w.nil?
270
+ return nil, :invalid_short unless v <= 32767 && v >= -32768
271
+ return v, w
272
+ },
273
+ "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
274
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
275
+ return v, :invalid_byte unless w.nil?
276
+ return nil, :invalid_byte unless v <= 127 && v >= -128
277
+ return v, w
278
+ },
279
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
280
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
281
+ return v, :invalid_nonNegativeInteger unless w.nil?
282
+ return nil, :invalid_nonNegativeInteger unless v >= 0
283
+ return v, w
284
+ },
285
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
286
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
287
+ return v, :invalid_positiveInteger unless w.nil?
288
+ return nil, :invalid_positiveInteger unless v > 0
289
+ return v, w
290
+ },
291
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
292
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
293
+ return v, :invalid_unsignedLong unless w.nil?
294
+ return nil, :invalid_unsignedLong unless v <= 18446744073709551615
295
+ return v, w
296
+ },
297
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
298
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
299
+ return v, :invalid_unsignedInt unless w.nil?
300
+ return nil, :invalid_unsignedInt unless v <= 4294967295
301
+ return v, w
302
+ },
303
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
304
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
305
+ return v, :invalid_unsignedShort unless w.nil?
306
+ return nil, :invalid_unsignedShort unless v <= 65535
307
+ return v, w
308
+ },
309
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
310
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
311
+ return v, :invalid_unsignedByte unless w.nil?
312
+ return nil, :invalid_unsignedByte unless v <= 255
313
+ return v, w
314
+ },
315
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
316
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
317
+ return v, :invalid_nonPositiveInteger unless w.nil?
318
+ return nil, :invalid_nonPositiveInteger unless v <= 0
319
+ return v, w
320
+ },
321
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
322
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
323
+ return v, :invalid_negativeInteger unless w.nil?
324
+ return nil, :invalid_negativeInteger unless v < 0
325
+ return v, w
326
+ },
327
+ "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
328
+ # regular expressions here taken from XML Schema datatypes spec
329
+ "http://www.w3.org/2001/XMLSchema#duration" =>
330
+ create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
331
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
332
+ create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
333
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
334
+ create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
335
+ "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
336
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
337
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
338
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
339
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
340
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
341
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
342
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
343
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
344
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
345
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
346
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => ALL_VALUES_VALID,
347
+ "http://www.w3.org/2001/XMLSchema#QName" => ALL_VALUES_VALID,
348
+ "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
349
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => ALL_VALUES_VALID,
350
+ "http://www.w3.org/2001/XMLSchema#token" => ALL_VALUES_VALID,
351
+ "http://www.w3.org/2001/XMLSchema#language" => ALL_VALUES_VALID,
352
+ "http://www.w3.org/2001/XMLSchema#Name" => ALL_VALUES_VALID,
353
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => ALL_VALUES_VALID,
354
+ "http://www.w3.org/2001/XMLSchema#time" =>
355
+ create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
356
+ }
357
+ end
358
+ end
359
+ end