flat_kit 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +15 -0
  4. data/Manifest.txt +21 -26
  5. data/{bin → exe}/fk +2 -1
  6. data/flat_kit.gemspec +33 -0
  7. data/lib/flat_kit/cli.rb +48 -23
  8. data/lib/flat_kit/command/cat.rb +34 -32
  9. data/lib/flat_kit/command/merge.rb +37 -36
  10. data/lib/flat_kit/command/sort.rb +37 -37
  11. data/lib/flat_kit/command/stats.rb +96 -0
  12. data/lib/flat_kit/command.rb +10 -10
  13. data/lib/flat_kit/descendant_tracker.rb +17 -5
  14. data/lib/flat_kit/error.rb +4 -0
  15. data/lib/flat_kit/event_emitter.rb +7 -4
  16. data/lib/flat_kit/field_stats.rb +246 -0
  17. data/lib/flat_kit/field_type/boolean_type.rb +52 -0
  18. data/lib/flat_kit/field_type/date_type.rb +181 -0
  19. data/lib/flat_kit/field_type/float_type.rb +43 -0
  20. data/lib/flat_kit/field_type/guess_type.rb +23 -0
  21. data/lib/flat_kit/field_type/integer_type.rb +36 -0
  22. data/lib/flat_kit/field_type/null_type.rb +39 -0
  23. data/lib/flat_kit/field_type/string_type.rb +24 -0
  24. data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
  25. data/lib/flat_kit/field_type/unknown_type.rb +30 -0
  26. data/lib/flat_kit/field_type.rb +83 -0
  27. data/lib/flat_kit/format.rb +11 -5
  28. data/lib/flat_kit/input/file.rb +11 -9
  29. data/lib/flat_kit/input/io.rb +18 -21
  30. data/lib/flat_kit/input.rb +8 -7
  31. data/lib/flat_kit/internal_node.rb +22 -19
  32. data/lib/flat_kit/jsonl/format.rb +6 -2
  33. data/lib/flat_kit/jsonl/reader.rb +7 -4
  34. data/lib/flat_kit/jsonl/record.rb +16 -19
  35. data/lib/flat_kit/jsonl/writer.rb +25 -18
  36. data/lib/flat_kit/jsonl.rb +8 -4
  37. data/lib/flat_kit/leaf_node.rb +6 -5
  38. data/lib/flat_kit/log_formatter.rb +20 -0
  39. data/lib/flat_kit/logger.rb +12 -19
  40. data/lib/flat_kit/merge.rb +21 -16
  41. data/lib/flat_kit/merge_tree.rb +5 -6
  42. data/lib/flat_kit/output/file.rb +13 -9
  43. data/lib/flat_kit/output/io.rb +40 -35
  44. data/lib/flat_kit/output.rb +12 -7
  45. data/lib/flat_kit/position.rb +18 -0
  46. data/lib/flat_kit/reader.rb +8 -8
  47. data/lib/flat_kit/record.rb +12 -12
  48. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  49. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  50. data/lib/flat_kit/sort.rb +8 -9
  51. data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
  52. data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
  53. data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
  54. data/lib/flat_kit/stat_type.rb +70 -0
  55. data/lib/flat_kit/stats.rb +64 -0
  56. data/lib/flat_kit/writer.rb +17 -3
  57. data/lib/flat_kit/xsv/format.rb +6 -2
  58. data/lib/flat_kit/xsv/reader.rb +8 -6
  59. data/lib/flat_kit/xsv/record.rb +21 -15
  60. data/lib/flat_kit/xsv/writer.rb +36 -18
  61. data/lib/flat_kit/xsv.rb +7 -4
  62. data/lib/flat_kit.rb +33 -21
  63. metadata +38 -113
  64. data/Rakefile +0 -20
  65. data/tasks/default.rake +0 -242
  66. data/tasks/extension.rake +0 -38
  67. data/tasks/man.rake +0 -7
  68. data/tasks/this.rb +0 -208
  69. data/test/device_dataset.rb +0 -117
  70. data/test/input/test_file.rb +0 -73
  71. data/test/input/test_io.rb +0 -93
  72. data/test/jsonl/test_format.rb +0 -22
  73. data/test/jsonl/test_reader.rb +0 -49
  74. data/test/jsonl/test_record.rb +0 -61
  75. data/test/jsonl/test_writer.rb +0 -68
  76. data/test/output/test_file.rb +0 -60
  77. data/test/output/test_io.rb +0 -104
  78. data/test/test_conversions.rb +0 -45
  79. data/test/test_event_emitter.rb +0 -72
  80. data/test/test_format.rb +0 -24
  81. data/test/test_helper.rb +0 -26
  82. data/test/test_merge.rb +0 -40
  83. data/test/test_merge_tree.rb +0 -64
  84. data/test/test_version.rb +0 -11
  85. data/test/xsv/test_format.rb +0 -22
  86. data/test/xsv/test_reader.rb +0 -61
  87. data/test/xsv/test_record.rb +0 -69
  88. data/test/xsv/test_writer.rb +0 -68
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FlatKit
4
+ class Command
5
+ # Internal: The implementation of the stats command.
6
+ #
7
+ class Stats < ::FlatKit::Command
8
+ def self.name
9
+ "stats"
10
+ end
11
+
12
+ def self.description
13
+ "Collect and report stats on the inputfile"
14
+ end
15
+
16
+ def self.parser
17
+ ::Optimist::Parser.new do
18
+ banner Sort.description.to_s
19
+ banner ""
20
+
21
+ banner <<~BANNER
22
+ Given an input file collect basic statistics.
23
+
24
+ The statistics can vary based upon the datatype of the field.
25
+
26
+ Numeric fields will report the basic count, min, max, mean, standard deviation and sum.
27
+ Non-numeric fields that are comparable, like dates, will report count, min and max.
28
+ Other non-numeric fields will only report the count.
29
+
30
+ Adding --cardinality will report the count, and frequency of distinct values in the result.
31
+ This will allow for reporting the median value.
32
+
33
+ The fields upon which stats are collected may be selected with the --fields parameter.
34
+ By default statistics are collected on all fields.
35
+
36
+ The flatfile type(s) will be automatically determined by the file name.
37
+
38
+ The output can be dumped as a CSV, JSON or a a formated ascii table.
39
+
40
+ BANNER
41
+
42
+ banner <<~USAGE
43
+
44
+ Usage:
45
+ fk stats --everything file.json
46
+ fk stats --select surname,given_name file.csv
47
+ fk stats --select surname,given_name --output-format json file.csv > stats.json
48
+ fk stats --select field1,field2 --output-format json input.csv
49
+ fk stats --select field1 file.json.gz -o stats.csv
50
+ gunzip -c file.json.gz | fk stats --input-format json --output-format text
51
+
52
+ USAGE
53
+
54
+ banner <<~OPTIONS
55
+
56
+ Options:
57
+
58
+ OPTIONS
59
+
60
+ opt :output, "Send the output to the given path instead of standard out.", default: "<stdout>"
61
+ opt :input_format, "Input format, csv or json", default: "auto", short: :none
62
+ opt :output_format, "Output format, csv or json", default: "auto", short: :none
63
+ opt :select, "The comma separted list of field(s) to report stats on", required: false, type: :string
64
+ opt :everything, "Show all statistics that are possible", default: false
65
+ opt :cardinality, "Show the cardinality of the fields, this requires additional memory", default: false
66
+ end
67
+ end
68
+
69
+ def parse
70
+ parser = self.class.parser
71
+ ::Optimist.with_standard_exception_handling(parser) do
72
+ opts = parser.parse(argv)
73
+ fields = ::FlatKit::Stats::AllFields
74
+ fields = CSV.parse_line(opts[:select]) if opts[:select]
75
+
76
+ stats = [FieldStats::CORE_STATS]
77
+ stats << FieldStats::CARDINALITY_STATS if opts[:cardinality] || opts[:everything]
78
+
79
+ paths = parser.leftovers
80
+ raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1
81
+
82
+ path = paths.first || "-" # default to stdin
83
+ @stats = ::FlatKit::Stats.new(input: path, input_fallback: opts[:input_format],
84
+ output: opts[:output], output_fallback: opts[:output_format],
85
+ fields_to_stat: fields, stats_to_collect: stats)
86
+ rescue ::FlatKit::Error => e
87
+ raise ::Optimist::CommandlineError, e.message
88
+ end
89
+ end
90
+
91
+ def call
92
+ @stats.call
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,13 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: The base class for all commands in the CLI
5
+ #
2
6
  class Command
3
7
  extend DescendantTracker
4
8
 
5
- attr_reader :argv
6
- attr_reader :env
7
- attr_reader :logger
8
- attr_reader :opts
9
- attr_reader :readers
10
- attr_reader :writer
9
+ attr_reader :argv, :env, :logger, :opts, :readers, :writer
11
10
 
12
11
  def self.name
13
12
  raise NotImplementedError, "#{self.class} must implement #{self.class}.name"
@@ -22,7 +21,7 @@ module FlatKit
22
21
  end
23
22
 
24
23
  def self.names
25
- children.map { |c| c.name }
24
+ children.map(&:name)
26
25
  end
27
26
 
28
27
  def self.for(name)
@@ -48,6 +47,7 @@ module FlatKit
48
47
  end
49
48
  end
50
49
 
51
- require 'flat_kit/command/cat'
52
- require 'flat_kit/command/merge'
53
- require 'flat_kit/command/sort'
50
+ require "flat_kit/command/cat"
51
+ require "flat_kit/command/merge"
52
+ require "flat_kit/command/sort"
53
+ require "flat_kit/command/stats"
@@ -1,17 +1,20 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
2
4
 
3
5
  module FlatKit
6
+ # Internal: A module to track descendants of a class
7
+ #
4
8
  module DescendantTracker
5
9
  def inherited(klass)
6
10
  super
7
11
  return unless klass.instance_of?(Class)
8
- self.children << klass
12
+
13
+ children << klass
9
14
  end
10
15
 
11
16
  def children
12
- unless defined? @_children
13
- @_children = Set.new
14
- end
17
+ @_children = Set.new unless defined? @_children
15
18
  @_children
16
19
  end
17
20
 
@@ -23,5 +26,14 @@ module FlatKit
23
26
  child_klass.send(method, *args)
24
27
  end
25
28
  end
29
+
30
+ #
31
+ # Find all the children that return truthy from the given method with args
32
+ #
33
+ def find_children(method, *args)
34
+ children.select do |child_klass|
35
+ child_klass.send(method, *args)
36
+ end
37
+ end
26
38
  end
27
39
  end
@@ -1,4 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: A Base error class for all FlatKit errors
5
+ #
2
6
  class Error < ::StandardError
3
7
  class UnknownFormat < ::FlatKit::Error; end
4
8
  end
@@ -1,9 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
- # A simplified Observable class for use internally
4
+ # Internal: A simplified Observable class for use internally
3
5
  #
4
6
  module EventEmitter
5
7
  def add_listener(listener)
6
8
  raise ::NoMethodError, "#{listener} does not resond to #on_event" unless listener.respond_to?(:on_event)
9
+
7
10
  self._listeners ||= []
8
11
  self._listeners << listener unless _listeners.include?(listener)
9
12
  end
@@ -20,14 +23,14 @@ module FlatKit
20
23
  _listeners.clear
21
24
  end
22
25
 
23
- def notify_listeners(name:, data:)
26
+ def notify_listeners(name:, data:, meta: nil)
24
27
  _listeners.each do |l|
25
- l.on_event(name: name, data: data)
28
+ l.on_event(name: name, data: data, meta: meta)
26
29
  end
27
30
  end
28
31
 
29
32
  def _listeners
30
- @_listeners ||= Array.new
33
+ @_listeners ||= []
31
34
  end
32
35
  end
33
36
  end
@@ -0,0 +1,246 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FlatKit
4
+ # Internal: Collect stats on a single field.
5
+ #
6
+ # We may not know what the field data type is to start with, so collect a
7
+ # bunch of values until we have the threshold, and then calculte states based
8
+ # upon the data types determined by the guess
9
+ #
10
+ class FieldStats
11
+ DEFAULT_GUESS_THRESHOLD = 1000
12
+
13
+ CORE_STATS = :core
14
+ CARDINALITY_STATS = :cardinality
15
+
16
+ ALL_STATS = [CORE_STATS, CARDINALITY_STATS].freeze
17
+
18
+ EXPORT_FIELDS = %w[
19
+ name
20
+ type
21
+ count
22
+ max
23
+ mean
24
+ min
25
+ stddev
26
+ sum
27
+ mode
28
+ unique_count
29
+
30
+ max_length
31
+ mean_length
32
+ min_length
33
+ stddev_length
34
+ mode_length
35
+ unique_count_lengths
36
+
37
+ null_count
38
+ unknown_count
39
+ out_of_type_count
40
+ total_count
41
+ null_percent
42
+ unknown_percent
43
+ ].freeze
44
+
45
+ attr_reader :type_counts, :field_type, :name, :out_of_type_count
46
+
47
+ def initialize(name:, stats_to_collect: CORE_STATS,
48
+ type: ::FlatKit::FieldType::GuessType,
49
+ guess_threshold: DEFAULT_GUESS_THRESHOLD)
50
+ @name = name
51
+ @field_type = type
52
+ @guess_threshold = guess_threshold
53
+ @type_counts = Hash.new(0)
54
+ @out_of_type_count = 0
55
+ @values = []
56
+ @stats = nil
57
+ @length_stats = nil
58
+ @stats_to_collect = [stats_to_collect].flatten
59
+
60
+ @stats_to_collect.each do |collection_set|
61
+ next if ALL_STATS.include?(collection_set)
62
+
63
+ valid_sets = ALL_STATS.map(&:to_s).join(", ")
64
+
65
+ raise ArgumentError, "#{collection_set} is not a valid stats collection set, must be one of #{valid_sets}"
66
+ end
67
+ return if type.is_a?(Class) && (type.superclass == ::FlatKit::FieldType)
68
+
69
+ raise ArgumentError, "type: must be FieldType subclasses - not #{type}"
70
+ end
71
+
72
+ def field_type_determined?
73
+ @field_type != ::FlatKit::FieldType::GuessType
74
+ end
75
+
76
+ def update(value)
77
+ update_type_count(value)
78
+
79
+ if field_type_determined?
80
+ update_stats(value)
81
+ else
82
+ @values << value
83
+
84
+ resolve_guess if @values.size >= @guess_threshold
85
+ end
86
+ end
87
+
88
+ def collecting_frequencies?
89
+ @stats_to_collect.include?(CARDINALITY_STATS)
90
+ end
91
+
92
+ def type
93
+ @field_type.type_name
94
+ end
95
+
96
+ def count
97
+ stats.count
98
+ end
99
+
100
+ def max
101
+ stats.max if stats.respond_to?(:max)
102
+ end
103
+
104
+ def mean
105
+ stats.mean if stats.respond_to?(:mean)
106
+ end
107
+
108
+ def min
109
+ stats.min if stats.respond_to?(:min)
110
+ end
111
+
112
+ def stddev
113
+ stats.stddev if stats.respond_to?(:stddev)
114
+ end
115
+
116
+ def sum
117
+ stats.sum if stats.respond_to?(:sum)
118
+ end
119
+
120
+ def mode
121
+ stats.mode if collecting_frequencies?
122
+ end
123
+
124
+ def unique_count
125
+ stats.unique_count if collecting_frequencies?
126
+ end
127
+
128
+ def unique_values
129
+ stats.unique_values if collecting_frequencies?
130
+ end
131
+
132
+ def frequencies
133
+ stats.frequencies if collecting_frequencies?
134
+ end
135
+
136
+ def min_length
137
+ length_stats.min if @length_stats
138
+ end
139
+
140
+ def max_length
141
+ length_stats.max if @length_stats
142
+ end
143
+
144
+ def mean_length
145
+ length_stats.mean if @length_stats
146
+ end
147
+
148
+ def stddev_length
149
+ length_stats.stddev if @length_stats
150
+ end
151
+
152
+ def mode_length
153
+ length_stats.mode if @length_stats && collecting_frequencies?
154
+ end
155
+
156
+ def unique_count_lengths
157
+ length_stats.unique_count if @length_stats && collecting_frequencies?
158
+ end
159
+
160
+ def unique_values_lengths
161
+ length_stats.unique_values if @length_stats && collecting_frequencies?
162
+ end
163
+
164
+ def length_frequencies
165
+ length_stats.frequencies if @length_stats && collecting_frequencies?
166
+ end
167
+
168
+ def null_count
169
+ type_counts[FieldType::NullType]
170
+ end
171
+
172
+ def total_count
173
+ stats.count + @out_of_type_count
174
+ end
175
+
176
+ def null_percent
177
+ return 0 if total_count.zero?
178
+
179
+ ((null_count.to_f / total_count) * 100.0).truncate(2)
180
+ end
181
+
182
+ def unknown_count
183
+ type_counts[FieldType::UnknownType]
184
+ end
185
+
186
+ def unknown_percent
187
+ return 0 if total_count.zero?
188
+
189
+ ((unknown_count.to_f / total_count) * 100.0).truncate(2)
190
+ end
191
+
192
+ def to_hash
193
+ resolve_guess
194
+
195
+ {}.tap do |h|
196
+ EXPORT_FIELDS.each do |n|
197
+ h[n] = send(n)
198
+ end
199
+ end
200
+ end
201
+
202
+ private
203
+
204
+ def stats
205
+ resolve_guess
206
+ @stats
207
+ end
208
+
209
+ def length_stats
210
+ resolve_guess
211
+ @length_stats
212
+ end
213
+
214
+ def update_stats(value)
215
+ coerced_value = @field_type.coerce(value)
216
+ if coerced_value == FieldType::CoerceFailure
217
+ @out_of_type_count += 1
218
+ return
219
+ end
220
+
221
+ @stats.update(coerced_value)
222
+ @length_stats.update(coerced_value.to_s.length) if @length_stats
223
+ end
224
+
225
+ def update_type_count(value)
226
+ guess = FieldType.best_guess(value)
227
+ type_counts[guess] += 1
228
+ guess
229
+ end
230
+
231
+ def resolve_guess
232
+ return if field_type_determined?
233
+
234
+ best_guess_type, _best_guess_count = type_counts.max_by { |_k, v| v }
235
+ @field_type = best_guess_type
236
+ @stats = StatType.for(@field_type).new(collecting_frequencies: collecting_frequencies?)
237
+ if @field_type == ::FlatKit::FieldType::StringType
238
+ @length_stats = ::FlatKit::StatType::NumericalStats.new(collecting_frequencies: collecting_frequencies?)
239
+ end
240
+ @values.each do |v|
241
+ update_stats(v)
242
+ end
243
+ @values.clear
244
+ end
245
+ end
246
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FlatKit
4
+ class FieldType
5
+ # Internal: Implemenation of the boolean type and coercion to the type
6
+ #
7
+ class BooleanType < FieldType
8
+ TRUTHY_REGEX = /\A(true|t|1|yes|y|on)\Z/i
9
+ FALSEY_REGEX = /\A(false|f|0|no|n|off)\Z/i
10
+ REGEX = Regexp.union(TRUTHY_REGEX, FALSEY_REGEX)
11
+
12
+ def self.type_name
13
+ "boolean"
14
+ end
15
+
16
+ def self.matches?(data)
17
+ case data
18
+ when TrueClass, FalseClass
19
+ true
20
+ when String
21
+ REGEX.match?(data)
22
+ when Integer
23
+ return true if data.zero?
24
+ return true if data == 1
25
+
26
+ false
27
+ else
28
+ false
29
+ end
30
+ end
31
+
32
+ def self.coerce(data)
33
+ case data
34
+ when TrueClass
35
+ true
36
+ when FalseClass
37
+ false
38
+ when Numeric
39
+ return false if data.zero?
40
+ return true if data == 1
41
+
42
+ CoerceFailure
43
+ when String
44
+ return true if TRUTHY_REGEX.match?(data)
45
+ return false if FALSEY_REGEX.match?(data)
46
+
47
+ CoerceFailure
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FlatKit
4
+ class FieldType
5
+ # Internal: Representing the type of data which only includes data up to
6
+ # the day resolution
7
+ #
8
+ class DateType < FieldType
9
+ # %Y 4 digit year
10
+ # %y 2 didigt year (%Y mod 100) (00..99)
11
+ # %m month of year zero padded
12
+ # %-m month of year no-padding
13
+ # %B Full month name
14
+ # %b Abbreviated month name
15
+ # %^b uppercased month name
16
+ # %d day of month zero padded
17
+ # %-d day of moneth not padded
18
+ # %e day of month blank padded
19
+ # %j day of year zero padded
20
+
21
+ # parse formats are not the same as print formats as parsing does not deal
22
+ # with flags and widths
23
+ def self.parse_formats
24
+ @parse_formats ||= [
25
+ # YMD formats
26
+ "%Y-%m-%d",
27
+ "%Y%m%d",
28
+ "%Y/%m/%d",
29
+ "%Y %m %d.",
30
+
31
+ # DMY formats
32
+ "%d %B %Y",
33
+ "%d %b %Y",
34
+ "%d-%b-%Y",
35
+ "%d/%b/%Y",
36
+ "%d-%m-%Y",
37
+ "%d-%m-%y",
38
+ "%d %b, %Y",
39
+ "%d %b,%Y",
40
+ "%d %B, %Y",
41
+ "%d %B,%Y",
42
+
43
+ # MDY formats
44
+ "%m/%d/%Y",
45
+ "%m-%d-%Y",
46
+ "%m/%d/%y",
47
+ "%m-%d-%y",
48
+
49
+ "%B %d, %Y",
50
+ "%b %d, %Y",
51
+
52
+ # other formats
53
+ "%Y-%j",
54
+ "%a %b %d %Y",
55
+ ].freeze
56
+ end
57
+
58
+ # https://en.wikipedia.org/wiki/Date_format_by_country
59
+ # List of formats culled from the above - not using all as it is
60
+ # definitely a performance issue at the moment
61
+ # def self.known_formats
62
+ # @known_formats ||= [
63
+ # # YMD formats
64
+ # "%Y-%m-%d",
65
+ # "%Y%m%d",
66
+ # "%Y/%m/%d",
67
+ # "%Y.%m.%d",
68
+ # "%Y.%m.%d.",
69
+ # "%Y %m %d.",
70
+ # "%Y %b %d",
71
+ # "%Y %b %-d",
72
+ # "%Y %B %-d",
73
+ # "%Y %B %d",
74
+ # "%Y-%m%d",
75
+ # "%Y. %m. %-d.",
76
+ # "%Y. %m. %d.",
77
+ # "%Y.%-m.%-d.",
78
+ # "%Y.%-m.%-d",
79
+ # "%Y, %d %B",
80
+ # "%Y, %d %b",
81
+ #
82
+ # "%y.%-m.%-d",
83
+ # "%y.%-m.%-d.",
84
+ # "%y.%m.%d.",
85
+ # "%y.%m.%d",
86
+ # "%y/%m/%d",
87
+ #
88
+ # # DMY formats
89
+ # "%-d %b %Y",
90
+ # "%-d %B %Y",
91
+ # "%-d-%-m-%Y",
92
+ # "%-d. %-m. %Y",
93
+ # "%-d. %-m. %Y.",
94
+ # "%-d. %B %Y",
95
+ # "%-d. %B %Y.",
96
+ # "%-d.%-m.%Y",
97
+ # "%-d.%-m.%Y.",
98
+ # "%-d.%m.%Y.",
99
+ # "%-d.%m.%Y",
100
+ # "%-d.%b.%Y",
101
+ # "%-d.%B.%Y",
102
+ # "%-d/%-m %Y",
103
+ # "%-d/%-m/%Y",
104
+ # "%d %B %Y",
105
+ # "%d %b %Y",
106
+ # "%d-%m-%Y",
107
+ # "%d-%b-%Y",
108
+ # "%d-%B-%Y",
109
+ # "%d.%m.%Y",
110
+ # "%d/%m %Y",
111
+ # "%d/%m/%Y",
112
+ #
113
+ # "%-d.%b.%y",
114
+ # "%-d.%B.%y",
115
+ # "%-d.%-m.%y",
116
+ # "%-d/%-m-%y",
117
+ # "%-d/%-m/%y",
118
+ # "%d/%m/%y",
119
+ # "%d-%m-%y",
120
+ # "%d.%m.%y",
121
+ # "%d%m%y",
122
+ #
123
+ # # MDY formats
124
+ # "%-m/%-d/%Y",
125
+ # "%m/%d/%Y",
126
+ # "%m-%d-%Y",
127
+ # "%b-%d-%Y",
128
+ # "%B %-d, %Y",
129
+ # "%B %-d. %Y",
130
+ # "%B %d, %Y",
131
+ # "%B-%d-%Y",
132
+ # "%B/%d/%Y",
133
+ #
134
+ # "%-m/%-d/%y",
135
+ #
136
+ # # other formats
137
+ # "%Y-%j",
138
+ # "%Y%m",
139
+ # "%Y-%m",
140
+ # "%Y %m",
141
+ # ]
142
+ # end
143
+
144
+ def self.type_name
145
+ "date"
146
+ end
147
+
148
+ def self.matches?(data)
149
+ coerced = coerce(data)
150
+ coerced.is_a?(Date)
151
+ end
152
+
153
+ def self.coerce(data)
154
+ case data
155
+ when DateTime
156
+ CoerceFailure
157
+ when Date
158
+ data
159
+ when String
160
+ try_parse(data)
161
+ else
162
+ CoerceFailure
163
+ end
164
+ end
165
+
166
+ def self.try_parse(data)
167
+ parse_formats.each do |format|
168
+ coerced_data = Date.strptime(data, format)
169
+ return coerced_data
170
+ rescue StandardError => _e
171
+ false
172
+ end
173
+ CoerceFailure
174
+ end
175
+ end
176
+ end
177
+ end
178
+
179
+
180
+ __END__
181
+