wjordan213-csvlint 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,86 @@
1
+ Feature: Validation warnings
2
+
3
+ Scenario: UTF-8 Encoding
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "col1","col2","col3"
7
+ "abc","2","3"
8
+ """
9
+ And it is encoded as "utf-8"
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if there are warnings
12
+ Then there should be 0 warnings
13
+
14
+ Scenario: ISO-8859-1 Encoding
15
+ Given I have a CSV with the following content:
16
+ """
17
+ "col1","col2","col3"
18
+ "1","2","3"
19
+ """
20
+ And it is encoded as "iso-8859-1"
21
+ And it is stored at the url "http://example.com/example1.csv"
22
+ When I ask if there are warnings
23
+ Then there should be 1 warnings
24
+
25
+ Scenario: Correct content type
26
+ Given I have a CSV with the following content:
27
+ """
28
+ "col1","col2","col3"
29
+ "abc","2","3"
30
+ """
31
+ And the content type is set to "text/csv"
32
+ And it is stored at the url "http://example.com/example1.csv"
33
+ And I ask if there are warnings
34
+ Then there should be 0 warnings
35
+
36
+ Scenario: No extension
37
+ Given I have a CSV with the following content:
38
+ """
39
+ "col1","col2","col3"
40
+ "abc","2","3"
41
+ """
42
+ And the content type is set to "text/csv"
43
+ And it is stored at the url "http://example.com/example1"
44
+ And I ask if there are warnings
45
+ Then there should be 0 warnings
46
+
47
+ Scenario: Allow query params after extension
48
+ Given I have a CSV with the following content:
49
+ """
50
+ "col1","col2","col3"
51
+ "abc","2","3"
52
+ """
53
+ And the content type is set to "text/csv"
54
+ And it is stored at the url "http://example.com/example1.csv?query=param"
55
+ And I ask if there are warnings
56
+ Then there should be 0 warnings
57
+
58
+ Scenario: User doesn't supply encoding
59
+ Given I have a CSV with the following content:
60
+ """
61
+ "col1","col2","col3"
62
+ "abc","2","3"
63
+ """
64
+ And it is stored at the url "http://example.com/example1.csv" with no character set
65
+ When I ask if there are warnings
66
+ Then there should be 1 warnings
67
+ And that warning should have the type "no_encoding"
68
+
69
+ Scenario: Title rows
70
+ Given I have a CSV file called "title-row.csv"
71
+ And it is stored at the url "http://example.com/example1.csv"
72
+ And I ask if there are warnings
73
+ Then there should be 1 warnings
74
+ And that warning should have the type "title_row"
75
+
76
+ Scenario: catch excel warnings
77
+ Given I parse a file called "spreadsheet.xls"
78
+ And I ask if there are warnings
79
+ Then there should be 1 warnings
80
+ And that warning should have the type "excel"
81
+
82
+ Scenario: catch excel warnings
83
+ Given I parse a file called "spreadsheet.xlsx"
84
+ And I ask if there are warnings
85
+ Then there should be 1 warnings
86
+ And that warning should have the type "excel"
data/lib/csvlint.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'csv'
2
+ require 'date'
3
+ require 'open-uri'
4
+ require 'set'
5
+ require 'tempfile'
6
+ require 'typhoeus'
7
+
8
+ require 'active_support/core_ext/date/conversions'
9
+ require 'active_support/core_ext/time/conversions'
10
+ require 'mime/types'
11
+ require 'open_uri_redirections'
12
+ require 'uri_template'
13
+
14
+ require 'csvlint/error_message'
15
+ require 'csvlint/error_collector'
16
+ require 'csvlint/validate'
17
+ require 'csvlint/field'
18
+
19
+ require 'csvlint/csvw/metadata_error'
20
+ require 'csvlint/csvw/number_format'
21
+ require 'csvlint/csvw/date_format'
22
+ require 'csvlint/csvw/property_checker'
23
+ require 'csvlint/csvw/column'
24
+ require 'csvlint/csvw/table'
25
+ require 'csvlint/csvw/table_group'
26
+
27
+ require 'csvlint/schema'
@@ -0,0 +1,165 @@
1
+ require 'csvlint'
2
+ require 'colorize'
3
+ require 'json'
4
+ require 'pp'
5
+ require 'thor'
6
+
7
+ module Csvlint
8
+ class Cli < Thor
9
+
10
+ desc "myfile.csv OR csvlint http://example.com/myfile.csv", "Supports validating CSV files to check their syntax and contents"
11
+ option :dump_errors, desc: "Pretty print error and warning objects.", type: :boolean, aliases: :d
12
+ option :schema, banner: "FILENAME OR URL", desc: "Schema file", aliases: :s
13
+ def validate(source = nil)
14
+ source = read_source(source)
15
+ @schema = get_schema(options[:schema]) if options[:schema]
16
+ fetch_schema_tables(@schema, options) if source.nil?
17
+
18
+ valid = validate_csv(source, @schema, options[:dump])
19
+ exit 1 unless valid
20
+ end
21
+
22
+ def help
23
+ self.class.command_help(shell, :validate)
24
+ end
25
+
26
+ default_task :validate
27
+
28
+ private
29
+
30
+ def read_source(source)
31
+ if source.nil?
32
+ # If no source is present, try reading from stdin
33
+ if !$stdin.tty?
34
+ source = StringIO.new(STDIN.read) rescue nil
35
+ return_error "No CSV data to validate" if !options[:schema] && source.nil?
36
+ end
37
+ else
38
+ # If the source isn't a URL, it's a file
39
+ unless source =~ /^http(s)?/
40
+ begin
41
+ source = File.new( source )
42
+ rescue Errno::ENOENT
43
+ return_error "#{source} not found"
44
+ end
45
+ end
46
+ end
47
+
48
+ source
49
+ end
50
+
51
+ def get_schema(schema)
52
+ begin
53
+ schema = Csvlint::Schema.load_from_json(schema, false)
54
+ rescue Csvlint::Csvw::MetadataError => e
55
+ return_error "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
56
+ rescue OpenURI::HTTPError, Errno::ENOENT
57
+ return_error "#{options[:schema]} not found"
58
+ end
59
+
60
+ if schema.class == Csvlint::Schema && schema.description == "malformed"
61
+ return_error "invalid metadata: malformed JSON"
62
+ end
63
+
64
+ schema
65
+ end
66
+
67
+ def fetch_schema_tables(schema, options)
68
+ valid = true
69
+
70
+ unless schema.instance_of? Csvlint::Csvw::TableGroup
71
+ return_error "No CSV data to validate."
72
+ end
73
+ schema.tables.keys.each do |source|
74
+ begin
75
+ source = source.sub("file:","")
76
+ source = File.new( source )
77
+ rescue Errno::ENOENT
78
+ return_error "#{source} not found"
79
+ end unless source =~ /^http(s)?/
80
+ valid &= validate_csv(source, schema, options[:dump])
81
+ end
82
+
83
+ exit 1 unless valid
84
+ end
85
+
86
+ def print_error(index, error, dump, color)
87
+ location = ""
88
+ location += error.row.to_s if error.row
89
+ location += "#{error.row ? "," : ""}#{error.column.to_s}" if error.column
90
+ if error.row || error.column
91
+ location = "#{error.row ? "Row" : "Column"}: #{location}"
92
+ end
93
+ output_string = "#{index+1}. "
94
+ if error.column && @schema && @schema.class == Csvlint::Schema
95
+ output_string += "#{@schema.fields[error.column - 1].name}: "
96
+ end
97
+ output_string += "#{error.type}"
98
+ output_string += ". #{location}" unless location.empty?
99
+ output_string += ". #{error.content}" if error.content
100
+
101
+ if $stdout.tty?
102
+ puts output_string.colorize(color)
103
+ else
104
+ puts output_string
105
+ end
106
+
107
+ if dump
108
+ pp error
109
+ end
110
+ end
111
+
112
+ def print_errors(errors, dump)
113
+ if errors.size > 0
114
+ errors.each_with_index { |error, i| print_error(i, error, dump, :red) }
115
+ end
116
+ end
117
+
118
+ def return_error(message)
119
+ if $stdout.tty?
120
+ puts message.colorize(:red)
121
+ else
122
+ puts message
123
+ end
124
+ exit 1
125
+ end
126
+
127
+ def validate_csv(source, schema, dump)
128
+ @error_count = 0
129
+
130
+ validator = Csvlint::Validator.new( source, {}, schema, { lambda: report_lines } )
131
+
132
+ if source.class == String
133
+ csv = source
134
+ elsif source.class == File
135
+ csv = source.path
136
+ else
137
+ csv = "CSV"
138
+ end
139
+
140
+ if $stdout.tty?
141
+ puts "\r\n#{csv} is #{validator.valid? ? "VALID".green : "INVALID".red}"
142
+ else
143
+ puts "\r\n#{csv} is #{validator.valid? ? "VALID" : "INVALID"}"
144
+ end
145
+
146
+ print_errors(validator.errors, dump)
147
+ print_errors(validator.warnings, dump)
148
+
149
+ return validator.valid?
150
+ end
151
+
152
+ def report_lines
153
+ lambda do |row|
154
+ new_errors = row.errors.count
155
+ if new_errors > @error_count
156
+ print "!".red
157
+ else
158
+ print ".".green
159
+ end
160
+ @error_count = new_errors
161
+ end
162
+ end
163
+
164
+ end
165
+ end
@@ -0,0 +1,359 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class Column
4
+ include Csvlint::ErrorCollector
5
+
6
+ attr_reader :id, :about_url, :datatype, :default, :lang, :name, :null, :number, :ordered, :property_url, :required, :separator, :source_number, :suppress_output, :text_direction, :titles, :value_url, :virtual, :annotations
7
+
8
+ def initialize(number, name, id: nil, about_url: nil, datatype: { "@id" => "http://www.w3.org/2001/XMLSchema#string" }, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
9
+ @number = number
10
+ @name = name
11
+ @id = id
12
+ @about_url = about_url
13
+ @datatype = datatype
14
+ @default = default
15
+ @lang = lang
16
+ @null = null
17
+ @ordered = ordered
18
+ @property_url = property_url
19
+ @required = required
20
+ @separator = separator
21
+ @source_number = source_number || number
22
+ @suppress_output = suppress_output
23
+ @text_direction = text_direction
24
+ @titles = titles
25
+ @value_url = value_url
26
+ @virtual = virtual
27
+ @annotations = annotations
28
+ reset
29
+ @warnings += warnings
30
+ end
31
+
32
+ def self.from_json(number, column_desc, base_url=nil, lang="und", inherited_properties={})
33
+ annotations = {}
34
+ warnings = []
35
+ column_properties = {}
36
+ inherited_properties = inherited_properties.clone
37
+
38
+ column_desc.each do |property,value|
39
+ if property == "@type"
40
+ raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != 'Column'
41
+ else
42
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
43
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
44
+ if type == :annotation
45
+ annotations[property] = v
46
+ elsif type == :common || type == :column
47
+ column_properties[property] = v
48
+ elsif type == :inherited
49
+ inherited_properties[property] = v
50
+ else
51
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "column: #{property}", nil)
52
+ end
53
+ end
54
+ end
55
+
56
+ return self.new(number, column_properties["name"],
57
+ id: column_properties["@id"],
58
+ datatype: inherited_properties["datatype"] || { "@id" => "http://www.w3.org/2001/XMLSchema#string" },
59
+ lang: inherited_properties["lang"] || "und",
60
+ null: inherited_properties["null"] || [""],
61
+ property_url: column_desc["propertyUrl"],
62
+ required: inherited_properties["required"] || false,
63
+ separator: inherited_properties["separator"],
64
+ titles: column_properties["titles"],
65
+ virtual: column_properties["virtual"] || false,
66
+ annotations: annotations,
67
+ warnings: warnings
68
+ )
69
+ end
70
+
71
+ def validate_header(header)
72
+ reset
73
+ valid_headers = @titles ? @titles.map{ |l,v| v if Column.languages_match(l, lang) }.flatten : []
74
+ build_errors(:invalid_header, :schema, 1, @number, header, @titles) unless valid_headers.include? header
75
+ return valid?
76
+ end
77
+
78
+ def validate(string_value, row=nil)
79
+ reset
80
+ values = parse(string_value || "", row)
81
+ # STDERR.puts "#{name} - #{string_value.inspect} - #{values.inspect}"
82
+ values.each do |value|
83
+ validate_required(value, row)
84
+ validate_format(value, row)
85
+ validate_length(value, row)
86
+ validate_value(value, row)
87
+ end unless values.nil?
88
+ validate_required(values, row) if values.nil?
89
+ return valid?
90
+ end
91
+
92
+ def parse(string_value, row=nil)
93
+ return nil if null.include? string_value
94
+ string_values = @separator.nil? ? [string_value] : string_value.split(@separator)
95
+ values = []
96
+ string_values.each do |s|
97
+ value, warning = DATATYPE_PARSER[@datatype["base"] || @datatype["@id"]].call(s, @datatype["format"])
98
+ if warning.nil?
99
+ values << value
100
+ else
101
+ build_errors(warning, :schema, row, @number, s, @datatype)
102
+ values << s
103
+ end
104
+ end
105
+ return values
106
+ end
107
+
108
+ private
109
+ class << self
110
+
111
+ def create_date_parser(type, warning)
112
+ return lambda { |value, format|
113
+ format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
114
+ v = format.parse(value)
115
+ return nil, warning if v.nil?
116
+ return v, nil
117
+ }
118
+ end
119
+
120
+ def create_regexp_based_parser(regexp, warning)
121
+ return lambda { |value, format|
122
+ return nil, warning unless value =~ regexp
123
+ return value, nil
124
+ }
125
+ end
126
+
127
+ def languages_match(l1, l2)
128
+ return true if l1 == l2 || l1 == "und" || l2 == "und"
129
+ return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
130
+ return false
131
+ end
132
+ end
133
+
134
+ def validate_required(value, row)
135
+ build_errors(:required, :schema, row, number, value, { "required" => @required }) if @required && value.nil?
136
+ end
137
+
138
+ def validate_length(value, row)
139
+ if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
140
+ length = value.length
141
+ length = value.gsub(/==?$/,"").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
142
+ length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
143
+
144
+ build_errors(:min_length, :schema, row, number, value, { "minLength" => datatype["minLength"] }) if datatype["minLength"] && length < datatype["minLength"]
145
+ build_errors(:max_length, :schema, row, number, value, { "maxLength" => datatype["maxLength"] }) if datatype["maxLength"] && length > datatype["maxLength"]
146
+ build_errors(:length, :schema, row, number, value, { "length" => datatype["length"] }) if datatype["length"] && length != datatype["length"]
147
+ end
148
+ end
149
+
150
+ def validate_format(value, row)
151
+ if datatype["format"]
152
+ build_errors(:format, :schema, row, number, value, { "format" => datatype["format"] }) unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
153
+ end
154
+ end
155
+
156
+ def validate_value(value, row)
157
+ build_errors(:min_inclusive, :schema, row, number, value, { "minInclusive" => datatype["minInclusive"] }) if datatype["minInclusive"] && value < datatype["minInclusive"]
158
+ build_errors(:max_inclusive, :schema, row, number, value, { "maxInclusive" => datatype["maxInclusive"] }) if datatype["maxInclusive"] && value > datatype["maxInclusive"]
159
+ build_errors(:min_exclusive, :schema, row, number, value, { "minExclusive" => datatype["minExclusive"] }) if datatype["minExclusive"] && value <= datatype["minExclusive"]
160
+ build_errors(:max_exclusive, :schema, row, number, value, { "maxExclusive" => datatype["maxExclusive"] }) if datatype["maxExclusive"] && value >= datatype["maxExclusive"]
161
+ end
162
+
163
+ REGEXP_VALIDATION = lambda { |value, format| value =~ format }
164
+
165
+ NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
166
+
167
+ DATATYPE_FORMAT_VALIDATION = {
168
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
169
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
170
+ "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
171
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
172
+ "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
173
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
174
+ "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
175
+ "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
176
+ "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
177
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
178
+ "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
179
+ "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
180
+ "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
181
+ "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
182
+ "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
183
+ "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
184
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
185
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
186
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
187
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
188
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
189
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
190
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
191
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
192
+ "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
193
+ "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
194
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
195
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
196
+ "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
197
+ "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
198
+ "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
199
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
200
+ "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
201
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
202
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
203
+ "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
204
+ "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
205
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
206
+ "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
207
+ "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
208
+ "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
209
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
210
+ "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
211
+ }
212
+
213
+ ALL_VALUES_VALID = lambda { |value, format| return value, nil }
214
+
215
+ NUMERIC_PARSER = lambda { |value, format|
216
+ format = Csvlint::Csvw::NumberFormat.new() if format.nil?
217
+ v = format.parse(value)
218
+ return nil, :invalid_number if v.nil?
219
+ return v, nil
220
+ }
221
+
222
+ DATATYPE_PARSER = {
223
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => ALL_VALUES_VALID,
224
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => ALL_VALUES_VALID,
225
+ "http://www.w3.org/ns/csvw#JSON" => ALL_VALUES_VALID,
226
+ "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
227
+ "http://www.w3.org/2001/XMLSchema#anyURI" => ALL_VALUES_VALID,
228
+ "http://www.w3.org/2001/XMLSchema#base64Binary" => ALL_VALUES_VALID,
229
+ "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
230
+ if format.nil?
231
+ return true, nil if ["true", "1"].include? value
232
+ return false, nil if ["false", "0"].include? value
233
+ else
234
+ return true, nil if value == format[0]
235
+ return false, nil if value == format[1]
236
+ end
237
+ return value, :invalid_boolean
238
+ },
239
+ "http://www.w3.org/2001/XMLSchema#date" =>
240
+ create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
241
+ "http://www.w3.org/2001/XMLSchema#dateTime" =>
242
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
243
+ "http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
244
+ create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
245
+ "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
246
+ return nil, :invalid_decimal if value =~ /(E|^(NaN|INF|-INF)$)/
247
+ return NUMERIC_PARSER.call(value, format)
248
+ },
249
+ "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
250
+ v, w = NUMERIC_PARSER.call(value, format)
251
+ return v, :invalid_integer unless w.nil?
252
+ return nil, :invalid_integer unless v.kind_of? Integer
253
+ return v, w
254
+ },
255
+ "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
256
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
257
+ return v, :invalid_long unless w.nil?
258
+ return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
259
+ return v, w
260
+ },
261
+ "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
262
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
263
+ return v, :invalid_int unless w.nil?
264
+ return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
265
+ return v, w
266
+ },
267
+ "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
268
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
269
+ return v, :invalid_short unless w.nil?
270
+ return nil, :invalid_short unless v <= 32767 && v >= -32768
271
+ return v, w
272
+ },
273
+ "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
274
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
275
+ return v, :invalid_byte unless w.nil?
276
+ return nil, :invalid_byte unless v <= 127 && v >= -128
277
+ return v, w
278
+ },
279
+ "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
280
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
281
+ return v, :invalid_nonNegativeInteger unless w.nil?
282
+ return nil, :invalid_nonNegativeInteger unless v >= 0
283
+ return v, w
284
+ },
285
+ "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
286
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
287
+ return v, :invalid_positiveInteger unless w.nil?
288
+ return nil, :invalid_positiveInteger unless v > 0
289
+ return v, w
290
+ },
291
+ "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
292
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
293
+ return v, :invalid_unsignedLong unless w.nil?
294
+ return nil, :invalid_unsignedLong unless v <= 18446744073709551615
295
+ return v, w
296
+ },
297
+ "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
298
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
299
+ return v, :invalid_unsignedInt unless w.nil?
300
+ return nil, :invalid_unsignedInt unless v <= 4294967295
301
+ return v, w
302
+ },
303
+ "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
304
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
305
+ return v, :invalid_unsignedShort unless w.nil?
306
+ return nil, :invalid_unsignedShort unless v <= 65535
307
+ return v, w
308
+ },
309
+ "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
310
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
311
+ return v, :invalid_unsignedByte unless w.nil?
312
+ return nil, :invalid_unsignedByte unless v <= 255
313
+ return v, w
314
+ },
315
+ "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
316
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
317
+ return v, :invalid_nonPositiveInteger unless w.nil?
318
+ return nil, :invalid_nonPositiveInteger unless v <= 0
319
+ return v, w
320
+ },
321
+ "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
322
+ v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
323
+ return v, :invalid_negativeInteger unless w.nil?
324
+ return nil, :invalid_negativeInteger unless v < 0
325
+ return v, w
326
+ },
327
+ "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
328
+ # regular expressions here taken from XML Schema datatypes spec
329
+ "http://www.w3.org/2001/XMLSchema#duration" =>
330
+ create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
331
+ "http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
332
+ create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
333
+ "http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
334
+ create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
335
+ "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
336
+ "http://www.w3.org/2001/XMLSchema#gDay" =>
337
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
338
+ "http://www.w3.org/2001/XMLSchema#gMonth" =>
339
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
340
+ "http://www.w3.org/2001/XMLSchema#gMonthDay" =>
341
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
342
+ "http://www.w3.org/2001/XMLSchema#gYear" =>
343
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
344
+ "http://www.w3.org/2001/XMLSchema#gYearMonth" =>
345
+ create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
346
+ "http://www.w3.org/2001/XMLSchema#hexBinary" => ALL_VALUES_VALID,
347
+ "http://www.w3.org/2001/XMLSchema#QName" => ALL_VALUES_VALID,
348
+ "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
349
+ "http://www.w3.org/2001/XMLSchema#normalizedString" => ALL_VALUES_VALID,
350
+ "http://www.w3.org/2001/XMLSchema#token" => ALL_VALUES_VALID,
351
+ "http://www.w3.org/2001/XMLSchema#language" => ALL_VALUES_VALID,
352
+ "http://www.w3.org/2001/XMLSchema#Name" => ALL_VALUES_VALID,
353
+ "http://www.w3.org/2001/XMLSchema#NMTOKEN" => ALL_VALUES_VALID,
354
+ "http://www.w3.org/2001/XMLSchema#time" =>
355
+ create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
356
+ }
357
+ end
358
+ end
359
+ end