flat_kit 0.2.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +15 -0
  4. data/Manifest.txt +21 -26
  5. data/{bin → exe}/fk +2 -1
  6. data/flat_kit.gemspec +33 -0
  7. data/lib/flat_kit/cli.rb +48 -23
  8. data/lib/flat_kit/command/cat.rb +34 -32
  9. data/lib/flat_kit/command/merge.rb +37 -36
  10. data/lib/flat_kit/command/sort.rb +37 -37
  11. data/lib/flat_kit/command/stats.rb +96 -0
  12. data/lib/flat_kit/command.rb +10 -10
  13. data/lib/flat_kit/descendant_tracker.rb +17 -5
  14. data/lib/flat_kit/error.rb +4 -0
  15. data/lib/flat_kit/event_emitter.rb +7 -4
  16. data/lib/flat_kit/field_stats.rb +246 -0
  17. data/lib/flat_kit/field_type/boolean_type.rb +52 -0
  18. data/lib/flat_kit/field_type/date_type.rb +181 -0
  19. data/lib/flat_kit/field_type/float_type.rb +43 -0
  20. data/lib/flat_kit/field_type/guess_type.rb +23 -0
  21. data/lib/flat_kit/field_type/integer_type.rb +36 -0
  22. data/lib/flat_kit/field_type/null_type.rb +39 -0
  23. data/lib/flat_kit/field_type/string_type.rb +24 -0
  24. data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
  25. data/lib/flat_kit/field_type/unknown_type.rb +30 -0
  26. data/lib/flat_kit/field_type.rb +83 -0
  27. data/lib/flat_kit/format.rb +11 -5
  28. data/lib/flat_kit/input/file.rb +11 -9
  29. data/lib/flat_kit/input/io.rb +18 -21
  30. data/lib/flat_kit/input.rb +8 -7
  31. data/lib/flat_kit/internal_node.rb +22 -19
  32. data/lib/flat_kit/jsonl/format.rb +6 -2
  33. data/lib/flat_kit/jsonl/reader.rb +7 -4
  34. data/lib/flat_kit/jsonl/record.rb +16 -19
  35. data/lib/flat_kit/jsonl/writer.rb +25 -18
  36. data/lib/flat_kit/jsonl.rb +8 -4
  37. data/lib/flat_kit/leaf_node.rb +6 -5
  38. data/lib/flat_kit/log_formatter.rb +20 -0
  39. data/lib/flat_kit/logger.rb +12 -19
  40. data/lib/flat_kit/merge.rb +21 -16
  41. data/lib/flat_kit/merge_tree.rb +5 -6
  42. data/lib/flat_kit/output/file.rb +13 -9
  43. data/lib/flat_kit/output/io.rb +40 -35
  44. data/lib/flat_kit/output.rb +12 -7
  45. data/lib/flat_kit/position.rb +18 -0
  46. data/lib/flat_kit/reader.rb +8 -8
  47. data/lib/flat_kit/record.rb +12 -12
  48. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  49. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  50. data/lib/flat_kit/sort.rb +8 -9
  51. data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
  52. data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
  53. data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
  54. data/lib/flat_kit/stat_type.rb +70 -0
  55. data/lib/flat_kit/stats.rb +64 -0
  56. data/lib/flat_kit/writer.rb +17 -3
  57. data/lib/flat_kit/xsv/format.rb +6 -2
  58. data/lib/flat_kit/xsv/reader.rb +8 -6
  59. data/lib/flat_kit/xsv/record.rb +21 -15
  60. data/lib/flat_kit/xsv/writer.rb +36 -18
  61. data/lib/flat_kit/xsv.rb +7 -4
  62. data/lib/flat_kit.rb +33 -21
  63. metadata +38 -113
  64. data/Rakefile +0 -20
  65. data/tasks/default.rake +0 -242
  66. data/tasks/extension.rake +0 -38
  67. data/tasks/man.rake +0 -7
  68. data/tasks/this.rb +0 -208
  69. data/test/device_dataset.rb +0 -117
  70. data/test/input/test_file.rb +0 -73
  71. data/test/input/test_io.rb +0 -93
  72. data/test/jsonl/test_format.rb +0 -22
  73. data/test/jsonl/test_reader.rb +0 -49
  74. data/test/jsonl/test_record.rb +0 -61
  75. data/test/jsonl/test_writer.rb +0 -68
  76. data/test/output/test_file.rb +0 -60
  77. data/test/output/test_io.rb +0 -104
  78. data/test/test_conversions.rb +0 -45
  79. data/test/test_event_emitter.rb +0 -72
  80. data/test/test_format.rb +0 -24
  81. data/test/test_helper.rb +0 -26
  82. data/test/test_merge.rb +0 -40
  83. data/test/test_merge_tree.rb +0 -64
  84. data/test/test_version.rb +0 -11
  85. data/test/xsv/test_format.rb +0 -22
  86. data/test/xsv/test_reader.rb +0 -61
  87. data/test/xsv/test_record.rb +0 -69
  88. data/test/xsv/test_writer.rb +0 -68
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FlatKit
4
+ class Command
5
+ # Internal: The implementation of the stats command.
6
+ #
7
+ class Stats < ::FlatKit::Command
8
+ def self.name
9
+ "stats"
10
+ end
11
+
12
+ def self.description
13
+ "Collect and report stats on the inputfile"
14
+ end
15
+
16
+ def self.parser
17
+ ::Optimist::Parser.new do
18
+ banner Sort.description.to_s
19
+ banner ""
20
+
21
+ banner <<~BANNER
22
+ Given an input file collect basic statistics.
23
+
24
+ The statistics can vary based upon the datatype of the field.
25
+
26
+ Numeric fields will report the basic count, min, max, mean, standard deviation and sum.
27
+ Non-numeric fields that are comparable, like dates, will report count, min and max.
28
+ Other non-numeric fields will only report the count.
29
+
30
+ Adding --cardinality will report the count, and frequency of distinct values in the result.
31
+ This will allow for reporting the median value.
32
+
33
+ The fields upon which stats are collected may be selected with the --fields parameter.
34
+ By default statistics are collected on all fields.
35
+
36
+ The flatfile type(s) will be automatically determined by the file name.
37
+
38
+ The output can be dumped as a CSV, JSON or a a formated ascii table.
39
+
40
+ BANNER
41
+
42
+ banner <<~USAGE
43
+
44
+ Usage:
45
+ fk stats --everything file.json
46
+ fk stats --select surname,given_name file.csv
47
+ fk stats --select surname,given_name --output-format json file.csv > stats.json
48
+ fk stats --select field1,field2 --output-format json input.csv
49
+ fk stats --select field1 file.json.gz -o stats.csv
50
+ gunzip -c file.json.gz | fk stats --input-format json --output-format text
51
+
52
+ USAGE
53
+
54
+ banner <<~OPTIONS
55
+
56
+ Options:
57
+
58
+ OPTIONS
59
+
60
+ opt :output, "Send the output to the given path instead of standard out.", default: "<stdout>"
61
+ opt :input_format, "Input format, csv or json", default: "auto", short: :none
62
+ opt :output_format, "Output format, csv or json", default: "auto", short: :none
63
+ opt :select, "The comma separted list of field(s) to report stats on", required: false, type: :string
64
+ opt :everything, "Show all statistics that are possible", default: false
65
+ opt :cardinality, "Show the cardinality of the fields, this requires additional memory", default: false
66
+ end
67
+ end
68
+
69
+ def parse
70
+ parser = self.class.parser
71
+ ::Optimist.with_standard_exception_handling(parser) do
72
+ opts = parser.parse(argv)
73
+ fields = ::FlatKit::Stats::AllFields
74
+ fields = CSV.parse_line(opts[:select]) if opts[:select]
75
+
76
+ stats = [FieldStats::CORE_STATS]
77
+ stats << FieldStats::CARDINALITY_STATS if opts[:cardinality] || opts[:everything]
78
+
79
+ paths = parser.leftovers
80
+ raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1
81
+
82
+ path = paths.first || "-" # default to stdin
83
+ @stats = ::FlatKit::Stats.new(input: path, input_fallback: opts[:input_format],
84
+ output: opts[:output], output_fallback: opts[:output_format],
85
+ fields_to_stat: fields, stats_to_collect: stats)
86
+ rescue ::FlatKit::Error => e
87
+ raise ::Optimist::CommandlineError, e.message
88
+ end
89
+ end
90
+
91
+ def call
92
+ @stats.call
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,13 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: The base class for all commands in the CLI
5
+ #
2
6
  class Command
3
7
  extend DescendantTracker
4
8
 
5
- attr_reader :argv
6
- attr_reader :env
7
- attr_reader :logger
8
- attr_reader :opts
9
- attr_reader :readers
10
- attr_reader :writer
9
+ attr_reader :argv, :env, :logger, :opts, :readers, :writer
11
10
 
12
11
  def self.name
13
12
  raise NotImplementedError, "#{self.class} must implement #{self.class}.name"
@@ -22,7 +21,7 @@ module FlatKit
22
21
  end
23
22
 
24
23
  def self.names
25
- children.map { |c| c.name }
24
+ children.map(&:name)
26
25
  end
27
26
 
28
27
  def self.for(name)
@@ -48,6 +47,7 @@ module FlatKit
48
47
  end
49
48
  end
50
49
 
51
- require 'flat_kit/command/cat'
52
- require 'flat_kit/command/merge'
53
- require 'flat_kit/command/sort'
50
+ require "flat_kit/command/cat"
51
+ require "flat_kit/command/merge"
52
+ require "flat_kit/command/sort"
53
+ require "flat_kit/command/stats"
@@ -1,17 +1,20 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
2
4
 
3
5
  module FlatKit
6
+ # Internal: A module to track descendants of a class
7
+ #
4
8
  module DescendantTracker
5
9
  def inherited(klass)
6
10
  super
7
11
  return unless klass.instance_of?(Class)
8
- self.children << klass
12
+
13
+ children << klass
9
14
  end
10
15
 
11
16
  def children
12
- unless defined? @_children
13
- @_children = Set.new
14
- end
17
+ @_children = Set.new unless defined? @_children
15
18
  @_children
16
19
  end
17
20
 
@@ -23,5 +26,14 @@ module FlatKit
23
26
  child_klass.send(method, *args)
24
27
  end
25
28
  end
29
+
30
+ #
31
+ # Find all the children that return truthy from the given method with args
32
+ #
33
+ def find_children(method, *args)
34
+ children.select do |child_klass|
35
+ child_klass.send(method, *args)
36
+ end
37
+ end
26
38
  end
27
39
  end
@@ -1,4 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: A Base error class for all FlatKit errors
5
+ #
2
6
  class Error < ::StandardError
3
7
  class UnknownFormat < ::FlatKit::Error; end
4
8
  end
@@ -1,9 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
- # A simplified Observable class for use internally
4
+ # Internal: A simplified Observable class for use internally
3
5
  #
4
6
  module EventEmitter
5
7
  def add_listener(listener)
6
8
  raise ::NoMethodError, "#{listener} does not resond to #on_event" unless listener.respond_to?(:on_event)
9
+
7
10
  self._listeners ||= []
8
11
  self._listeners << listener unless _listeners.include?(listener)
9
12
  end
@@ -20,14 +23,14 @@ module FlatKit
20
23
  _listeners.clear
21
24
  end
22
25
 
23
- def notify_listeners(name:, data:)
26
+ def notify_listeners(name:, data:, meta: nil)
24
27
  _listeners.each do |l|
25
- l.on_event(name: name, data: data)
28
+ l.on_event(name: name, data: data, meta: meta)
26
29
  end
27
30
  end
28
31
 
29
32
  def _listeners
30
- @_listeners ||= Array.new
33
+ @_listeners ||= []
31
34
  end
32
35
  end
33
36
  end
@@ -0,0 +1,246 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FlatKit
4
+ # Internal: Collect stats on a single field.
5
+ #
6
+ # We may not know what the field data type is to start with, so collect a
7
+ # bunch of values until we have the threshold, and then calculte states based
8
+ # upon the data types determined by the guess
9
+ #
10
+ class FieldStats
11
+ DEFAULT_GUESS_THRESHOLD = 1000
12
+
13
+ CORE_STATS = :core
14
+ CARDINALITY_STATS = :cardinality
15
+
16
+ ALL_STATS = [CORE_STATS, CARDINALITY_STATS].freeze
17
+
18
+ EXPORT_FIELDS = %w[
19
+ name
20
+ type
21
+ count
22
+ max
23
+ mean
24
+ min
25
+ stddev
26
+ sum
27
+ mode
28
+ unique_count
29
+
30
+ max_length
31
+ mean_length
32
+ min_length
33
+ stddev_length
34
+ mode_length
35
+ unique_count_lengths
36
+
37
+ null_count
38
+ unknown_count
39
+ out_of_type_count
40
+ total_count
41
+ null_percent
42
+ unknown_percent
43
+ ].freeze
44
+
45
+ attr_reader :type_counts, :field_type, :name, :out_of_type_count
46
+
47
+ def initialize(name:, stats_to_collect: CORE_STATS,
48
+ type: ::FlatKit::FieldType::GuessType,
49
+ guess_threshold: DEFAULT_GUESS_THRESHOLD)
50
+ @name = name
51
+ @field_type = type
52
+ @guess_threshold = guess_threshold
53
+ @type_counts = Hash.new(0)
54
+ @out_of_type_count = 0
55
+ @values = []
56
+ @stats = nil
57
+ @length_stats = nil
58
+ @stats_to_collect = [stats_to_collect].flatten
59
+
60
+ @stats_to_collect.each do |collection_set|
61
+ next if ALL_STATS.include?(collection_set)
62
+
63
+ valid_sets = ALL_STATS.map(&:to_s).join(", ")
64
+
65
+ raise ArgumentError, "#{collection_set} is not a valid stats collection set, must be one of #{valid_sets}"
66
+ end
67
+ return if type.is_a?(Class) && (type.superclass == ::FlatKit::FieldType)
68
+
69
+ raise ArgumentError, "type: must be FieldType subclasses - not #{type}"
70
+ end
71
+
72
+ def field_type_determined?
73
+ @field_type != ::FlatKit::FieldType::GuessType
74
+ end
75
+
76
+ def update(value)
77
+ update_type_count(value)
78
+
79
+ if field_type_determined?
80
+ update_stats(value)
81
+ else
82
+ @values << value
83
+
84
+ resolve_guess if @values.size >= @guess_threshold
85
+ end
86
+ end
87
+
88
+ def collecting_frequencies?
89
+ @stats_to_collect.include?(CARDINALITY_STATS)
90
+ end
91
+
92
+ def type
93
+ @field_type.type_name
94
+ end
95
+
96
+ def count
97
+ stats.count
98
+ end
99
+
100
+ def max
101
+ stats.max if stats.respond_to?(:max)
102
+ end
103
+
104
+ def mean
105
+ stats.mean if stats.respond_to?(:mean)
106
+ end
107
+
108
+ def min
109
+ stats.min if stats.respond_to?(:min)
110
+ end
111
+
112
+ def stddev
113
+ stats.stddev if stats.respond_to?(:stddev)
114
+ end
115
+
116
+ def sum
117
+ stats.sum if stats.respond_to?(:sum)
118
+ end
119
+
120
+ def mode
121
+ stats.mode if collecting_frequencies?
122
+ end
123
+
124
+ def unique_count
125
+ stats.unique_count if collecting_frequencies?
126
+ end
127
+
128
+ def unique_values
129
+ stats.unique_values if collecting_frequencies?
130
+ end
131
+
132
+ def frequencies
133
+ stats.frequencies if collecting_frequencies?
134
+ end
135
+
136
+ def min_length
137
+ length_stats.min if @length_stats
138
+ end
139
+
140
+ def max_length
141
+ length_stats.max if @length_stats
142
+ end
143
+
144
+ def mean_length
145
+ length_stats.mean if @length_stats
146
+ end
147
+
148
+ def stddev_length
149
+ length_stats.stddev if @length_stats
150
+ end
151
+
152
+ def mode_length
153
+ length_stats.mode if @length_stats && collecting_frequencies?
154
+ end
155
+
156
+ def unique_count_lengths
157
+ length_stats.unique_count if @length_stats && collecting_frequencies?
158
+ end
159
+
160
+ def unique_values_lengths
161
+ length_stats.unique_values if @length_stats && collecting_frequencies?
162
+ end
163
+
164
+ def length_frequencies
165
+ length_stats.frequencies if @length_stats && collecting_frequencies?
166
+ end
167
+
168
+ def null_count
169
+ type_counts[FieldType::NullType]
170
+ end
171
+
172
+ def total_count
173
+ stats.count + @out_of_type_count
174
+ end
175
+
176
+ def null_percent
177
+ return 0 if total_count.zero?
178
+
179
+ ((null_count.to_f / total_count) * 100.0).truncate(2)
180
+ end
181
+
182
+ def unknown_count
183
+ type_counts[FieldType::UnknownType]
184
+ end
185
+
186
+ def unknown_percent
187
+ return 0 if total_count.zero?
188
+
189
+ ((unknown_count.to_f / total_count) * 100.0).truncate(2)
190
+ end
191
+
192
+ def to_hash
193
+ resolve_guess
194
+
195
+ {}.tap do |h|
196
+ EXPORT_FIELDS.each do |n|
197
+ h[n] = send(n)
198
+ end
199
+ end
200
+ end
201
+
202
+ private
203
+
204
+ def stats
205
+ resolve_guess
206
+ @stats
207
+ end
208
+
209
+ def length_stats
210
+ resolve_guess
211
+ @length_stats
212
+ end
213
+
214
+ def update_stats(value)
215
+ coerced_value = @field_type.coerce(value)
216
+ if coerced_value == FieldType::CoerceFailure
217
+ @out_of_type_count += 1
218
+ return
219
+ end
220
+
221
+ @stats.update(coerced_value)
222
+ @length_stats.update(coerced_value.to_s.length) if @length_stats
223
+ end
224
+
225
+ def update_type_count(value)
226
+ guess = FieldType.best_guess(value)
227
+ type_counts[guess] += 1
228
+ guess
229
+ end
230
+
231
+ def resolve_guess
232
+ return if field_type_determined?
233
+
234
+ best_guess_type, _best_guess_count = type_counts.max_by { |_k, v| v }
235
+ @field_type = best_guess_type
236
+ @stats = StatType.for(@field_type).new(collecting_frequencies: collecting_frequencies?)
237
+ if @field_type == ::FlatKit::FieldType::StringType
238
+ @length_stats = ::FlatKit::StatType::NumericalStats.new(collecting_frequencies: collecting_frequencies?)
239
+ end
240
+ @values.each do |v|
241
+ update_stats(v)
242
+ end
243
+ @values.clear
244
+ end
245
+ end
246
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FlatKit
4
+ class FieldType
5
+ # Internal: Implemenation of the boolean type and coercion to the type
6
+ #
7
+ class BooleanType < FieldType
8
+ TRUTHY_REGEX = /\A(true|t|1|yes|y|on)\Z/i
9
+ FALSEY_REGEX = /\A(false|f|0|no|n|off)\Z/i
10
+ REGEX = Regexp.union(TRUTHY_REGEX, FALSEY_REGEX)
11
+
12
+ def self.type_name
13
+ "boolean"
14
+ end
15
+
16
+ def self.matches?(data)
17
+ case data
18
+ when TrueClass, FalseClass
19
+ true
20
+ when String
21
+ REGEX.match?(data)
22
+ when Integer
23
+ return true if data.zero?
24
+ return true if data == 1
25
+
26
+ false
27
+ else
28
+ false
29
+ end
30
+ end
31
+
32
+ def self.coerce(data)
33
+ case data
34
+ when TrueClass
35
+ true
36
+ when FalseClass
37
+ false
38
+ when Numeric
39
+ return false if data.zero?
40
+ return true if data == 1
41
+
42
+ CoerceFailure
43
+ when String
44
+ return true if TRUTHY_REGEX.match?(data)
45
+ return false if FALSEY_REGEX.match?(data)
46
+
47
+ CoerceFailure
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FlatKit
4
+ class FieldType
5
+ # Internal: Representing the type of data which only includes data up to
6
+ # the day resolution
7
+ #
8
+ class DateType < FieldType
9
+ # %Y 4 digit year
10
+ # %y 2 didigt year (%Y mod 100) (00..99)
11
+ # %m month of year zero padded
12
+ # %-m month of year no-padding
13
+ # %B Full month name
14
+ # %b Abbreviated month name
15
+ # %^b uppercased month name
16
+ # %d day of month zero padded
17
+ # %-d day of moneth not padded
18
+ # %e day of month blank padded
19
+ # %j day of year zero padded
20
+
21
+ # parse formats are not the same as print formats as parsing does not deal
22
+ # with flags and widths
23
+ def self.parse_formats
24
+ @parse_formats ||= [
25
+ # YMD formats
26
+ "%Y-%m-%d",
27
+ "%Y%m%d",
28
+ "%Y/%m/%d",
29
+ "%Y %m %d.",
30
+
31
+ # DMY formats
32
+ "%d %B %Y",
33
+ "%d %b %Y",
34
+ "%d-%b-%Y",
35
+ "%d/%b/%Y",
36
+ "%d-%m-%Y",
37
+ "%d-%m-%y",
38
+ "%d %b, %Y",
39
+ "%d %b,%Y",
40
+ "%d %B, %Y",
41
+ "%d %B,%Y",
42
+
43
+ # MDY formats
44
+ "%m/%d/%Y",
45
+ "%m-%d-%Y",
46
+ "%m/%d/%y",
47
+ "%m-%d-%y",
48
+
49
+ "%B %d, %Y",
50
+ "%b %d, %Y",
51
+
52
+ # other formats
53
+ "%Y-%j",
54
+ "%a %b %d %Y",
55
+ ].freeze
56
+ end
57
+
58
+ # https://en.wikipedia.org/wiki/Date_format_by_country
59
+ # List of formats culled from the above - not using all as it is
60
+ # definitely a performance issue at the moment
61
+ # def self.known_formats
62
+ # @known_formats ||= [
63
+ # # YMD formats
64
+ # "%Y-%m-%d",
65
+ # "%Y%m%d",
66
+ # "%Y/%m/%d",
67
+ # "%Y.%m.%d",
68
+ # "%Y.%m.%d.",
69
+ # "%Y %m %d.",
70
+ # "%Y %b %d",
71
+ # "%Y %b %-d",
72
+ # "%Y %B %-d",
73
+ # "%Y %B %d",
74
+ # "%Y-%m%d",
75
+ # "%Y. %m. %-d.",
76
+ # "%Y. %m. %d.",
77
+ # "%Y.%-m.%-d.",
78
+ # "%Y.%-m.%-d",
79
+ # "%Y, %d %B",
80
+ # "%Y, %d %b",
81
+ #
82
+ # "%y.%-m.%-d",
83
+ # "%y.%-m.%-d.",
84
+ # "%y.%m.%d.",
85
+ # "%y.%m.%d",
86
+ # "%y/%m/%d",
87
+ #
88
+ # # DMY formats
89
+ # "%-d %b %Y",
90
+ # "%-d %B %Y",
91
+ # "%-d-%-m-%Y",
92
+ # "%-d. %-m. %Y",
93
+ # "%-d. %-m. %Y.",
94
+ # "%-d. %B %Y",
95
+ # "%-d. %B %Y.",
96
+ # "%-d.%-m.%Y",
97
+ # "%-d.%-m.%Y.",
98
+ # "%-d.%m.%Y.",
99
+ # "%-d.%m.%Y",
100
+ # "%-d.%b.%Y",
101
+ # "%-d.%B.%Y",
102
+ # "%-d/%-m %Y",
103
+ # "%-d/%-m/%Y",
104
+ # "%d %B %Y",
105
+ # "%d %b %Y",
106
+ # "%d-%m-%Y",
107
+ # "%d-%b-%Y",
108
+ # "%d-%B-%Y",
109
+ # "%d.%m.%Y",
110
+ # "%d/%m %Y",
111
+ # "%d/%m/%Y",
112
+ #
113
+ # "%-d.%b.%y",
114
+ # "%-d.%B.%y",
115
+ # "%-d.%-m.%y",
116
+ # "%-d/%-m-%y",
117
+ # "%-d/%-m/%y",
118
+ # "%d/%m/%y",
119
+ # "%d-%m-%y",
120
+ # "%d.%m.%y",
121
+ # "%d%m%y",
122
+ #
123
+ # # MDY formats
124
+ # "%-m/%-d/%Y",
125
+ # "%m/%d/%Y",
126
+ # "%m-%d-%Y",
127
+ # "%b-%d-%Y",
128
+ # "%B %-d, %Y",
129
+ # "%B %-d. %Y",
130
+ # "%B %d, %Y",
131
+ # "%B-%d-%Y",
132
+ # "%B/%d/%Y",
133
+ #
134
+ # "%-m/%-d/%y",
135
+ #
136
+ # # other formats
137
+ # "%Y-%j",
138
+ # "%Y%m",
139
+ # "%Y-%m",
140
+ # "%Y %m",
141
+ # ]
142
+ # end
143
+
144
+ def self.type_name
145
+ "date"
146
+ end
147
+
148
+ def self.matches?(data)
149
+ coerced = coerce(data)
150
+ coerced.is_a?(Date)
151
+ end
152
+
153
+ def self.coerce(data)
154
+ case data
155
+ when DateTime
156
+ CoerceFailure
157
+ when Date
158
+ data
159
+ when String
160
+ try_parse(data)
161
+ else
162
+ CoerceFailure
163
+ end
164
+ end
165
+
166
+ def self.try_parse(data)
167
+ parse_formats.each do |format|
168
+ coerced_data = Date.strptime(data, format)
169
+ return coerced_data
170
+ rescue StandardError => _e
171
+ false
172
+ end
173
+ CoerceFailure
174
+ end
175
+ end
176
+ end
177
+ end
178
+
179
+
180
+ __END__
181
+