flat_kit 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module FlatKit
|
4
|
+
class Command
|
5
|
+
# Internal: The implementation of the stats command.
|
6
|
+
#
|
7
|
+
class Stats < ::FlatKit::Command
|
8
|
+
def self.name
|
9
|
+
"stats"
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.description
|
13
|
+
"Collect and report stats on the inputfile"
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parser
|
17
|
+
::Optimist::Parser.new do
|
18
|
+
banner Sort.description.to_s
|
19
|
+
banner ""
|
20
|
+
|
21
|
+
banner <<~BANNER
|
22
|
+
Given an input file collect basic statistics.
|
23
|
+
|
24
|
+
The statistics can vary based upon the datatype of the field.
|
25
|
+
|
26
|
+
Numeric fields will report the basic count, min, max, mean, standard deviation and sum.
|
27
|
+
Non-numeric fields that are comparable, like dates, will report count, min and max.
|
28
|
+
Other non-numeric fields will only report the count.
|
29
|
+
|
30
|
+
Adding --cardinality will report the count, and frequency of distinct values in the result.
|
31
|
+
This will allow for reporting the median value.
|
32
|
+
|
33
|
+
The fields upon which stats are collected may be selected with the --fields parameter.
|
34
|
+
By default statistics are collected on all fields.
|
35
|
+
|
36
|
+
The flatfile type(s) will be automatically determined by the file name.
|
37
|
+
|
38
|
+
The output can be dumped as a CSV, JSON or a a formated ascii table.
|
39
|
+
|
40
|
+
BANNER
|
41
|
+
|
42
|
+
banner <<~USAGE
|
43
|
+
|
44
|
+
Usage:
|
45
|
+
fk stats --everything file.json
|
46
|
+
fk stats --select surname,given_name file.csv
|
47
|
+
fk stats --select surname,given_name --output-format json file.csv > stats.json
|
48
|
+
fk stats --select field1,field2 --output-format json input.csv
|
49
|
+
fk stats --select field1 file.json.gz -o stats.csv
|
50
|
+
gunzip -c file.json.gz | fk stats --input-format json --output-format text
|
51
|
+
|
52
|
+
USAGE
|
53
|
+
|
54
|
+
banner <<~OPTIONS
|
55
|
+
|
56
|
+
Options:
|
57
|
+
|
58
|
+
OPTIONS
|
59
|
+
|
60
|
+
opt :output, "Send the output to the given path instead of standard out.", default: "<stdout>"
|
61
|
+
opt :input_format, "Input format, csv or json", default: "auto", short: :none
|
62
|
+
opt :output_format, "Output format, csv or json", default: "auto", short: :none
|
63
|
+
opt :select, "The comma separted list of field(s) to report stats on", required: false, type: :string
|
64
|
+
opt :everything, "Show all statistics that are possible", default: false
|
65
|
+
opt :cardinality, "Show the cardinality of the fields, this requires additional memory", default: false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse
|
70
|
+
parser = self.class.parser
|
71
|
+
::Optimist.with_standard_exception_handling(parser) do
|
72
|
+
opts = parser.parse(argv)
|
73
|
+
fields = ::FlatKit::Stats::AllFields
|
74
|
+
fields = CSV.parse_line(opts[:select]) if opts[:select]
|
75
|
+
|
76
|
+
stats = [FieldStats::CORE_STATS]
|
77
|
+
stats << FieldStats::CARDINALITY_STATS if opts[:cardinality] || opts[:everything]
|
78
|
+
|
79
|
+
paths = parser.leftovers
|
80
|
+
raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1
|
81
|
+
|
82
|
+
path = paths.first || "-" # default to stdin
|
83
|
+
@stats = ::FlatKit::Stats.new(input: path, input_fallback: opts[:input_format],
|
84
|
+
output: opts[:output], output_fallback: opts[:output_format],
|
85
|
+
fields_to_stat: fields, stats_to_collect: stats)
|
86
|
+
rescue ::FlatKit::Error => e
|
87
|
+
raise ::Optimist::CommandlineError, e.message
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def call
|
92
|
+
@stats.call
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
data/lib/flat_kit/command.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: The base class for all commands in the CLI
|
5
|
+
#
|
2
6
|
class Command
|
3
7
|
extend DescendantTracker
|
4
8
|
|
5
|
-
attr_reader :argv
|
6
|
-
attr_reader :env
|
7
|
-
attr_reader :logger
|
8
|
-
attr_reader :opts
|
9
|
-
attr_reader :readers
|
10
|
-
attr_reader :writer
|
9
|
+
attr_reader :argv, :env, :logger, :opts, :readers, :writer
|
11
10
|
|
12
11
|
def self.name
|
13
12
|
raise NotImplementedError, "#{self.class} must implement #{self.class}.name"
|
@@ -22,7 +21,7 @@ module FlatKit
|
|
22
21
|
end
|
23
22
|
|
24
23
|
def self.names
|
25
|
-
children.map
|
24
|
+
children.map(&:name)
|
26
25
|
end
|
27
26
|
|
28
27
|
def self.for(name)
|
@@ -48,6 +47,7 @@ module FlatKit
|
|
48
47
|
end
|
49
48
|
end
|
50
49
|
|
51
|
-
require
|
52
|
-
require
|
53
|
-
require
|
50
|
+
require "flat_kit/command/cat"
|
51
|
+
require "flat_kit/command/merge"
|
52
|
+
require "flat_kit/command/sort"
|
53
|
+
require "flat_kit/command/stats"
|
@@ -1,17 +1,20 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
2
4
|
|
3
5
|
module FlatKit
|
6
|
+
# Internal: A module to track descendants of a class
|
7
|
+
#
|
4
8
|
module DescendantTracker
|
5
9
|
def inherited(klass)
|
6
10
|
super
|
7
11
|
return unless klass.instance_of?(Class)
|
8
|
-
|
12
|
+
|
13
|
+
children << klass
|
9
14
|
end
|
10
15
|
|
11
16
|
def children
|
12
|
-
unless defined? @_children
|
13
|
-
@_children = Set.new
|
14
|
-
end
|
17
|
+
@_children = Set.new unless defined? @_children
|
15
18
|
@_children
|
16
19
|
end
|
17
20
|
|
@@ -23,5 +26,14 @@ module FlatKit
|
|
23
26
|
child_klass.send(method, *args)
|
24
27
|
end
|
25
28
|
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Find all the children that return truthy from the given method with args
|
32
|
+
#
|
33
|
+
def find_children(method, *args)
|
34
|
+
children.select do |child_klass|
|
35
|
+
child_klass.send(method, *args)
|
36
|
+
end
|
37
|
+
end
|
26
38
|
end
|
27
39
|
end
|
data/lib/flat_kit/error.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
|
-
# A simplified Observable class for use internally
|
4
|
+
# Internal: A simplified Observable class for use internally
|
3
5
|
#
|
4
6
|
module EventEmitter
|
5
7
|
def add_listener(listener)
|
6
8
|
raise ::NoMethodError, "#{listener} does not resond to #on_event" unless listener.respond_to?(:on_event)
|
9
|
+
|
7
10
|
self._listeners ||= []
|
8
11
|
self._listeners << listener unless _listeners.include?(listener)
|
9
12
|
end
|
@@ -20,14 +23,14 @@ module FlatKit
|
|
20
23
|
_listeners.clear
|
21
24
|
end
|
22
25
|
|
23
|
-
def notify_listeners(name:, data:)
|
26
|
+
def notify_listeners(name:, data:, meta: nil)
|
24
27
|
_listeners.each do |l|
|
25
|
-
l.on_event(name: name, data: data)
|
28
|
+
l.on_event(name: name, data: data, meta: meta)
|
26
29
|
end
|
27
30
|
end
|
28
31
|
|
29
32
|
def _listeners
|
30
|
-
@_listeners ||=
|
33
|
+
@_listeners ||= []
|
31
34
|
end
|
32
35
|
end
|
33
36
|
end
|
@@ -0,0 +1,246 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module FlatKit
|
4
|
+
# Internal: Collect stats on a single field.
|
5
|
+
#
|
6
|
+
# We may not know what the field data type is to start with, so collect a
|
7
|
+
# bunch of values until we have the threshold, and then calculte states based
|
8
|
+
# upon the data types determined by the guess
|
9
|
+
#
|
10
|
+
class FieldStats
|
11
|
+
DEFAULT_GUESS_THRESHOLD = 1000
|
12
|
+
|
13
|
+
CORE_STATS = :core
|
14
|
+
CARDINALITY_STATS = :cardinality
|
15
|
+
|
16
|
+
ALL_STATS = [CORE_STATS, CARDINALITY_STATS].freeze
|
17
|
+
|
18
|
+
EXPORT_FIELDS = %w[
|
19
|
+
name
|
20
|
+
type
|
21
|
+
count
|
22
|
+
max
|
23
|
+
mean
|
24
|
+
min
|
25
|
+
stddev
|
26
|
+
sum
|
27
|
+
mode
|
28
|
+
unique_count
|
29
|
+
|
30
|
+
max_length
|
31
|
+
mean_length
|
32
|
+
min_length
|
33
|
+
stddev_length
|
34
|
+
mode_length
|
35
|
+
unique_count_lengths
|
36
|
+
|
37
|
+
null_count
|
38
|
+
unknown_count
|
39
|
+
out_of_type_count
|
40
|
+
total_count
|
41
|
+
null_percent
|
42
|
+
unknown_percent
|
43
|
+
].freeze
|
44
|
+
|
45
|
+
attr_reader :type_counts, :field_type, :name, :out_of_type_count
|
46
|
+
|
47
|
+
def initialize(name:, stats_to_collect: CORE_STATS,
|
48
|
+
type: ::FlatKit::FieldType::GuessType,
|
49
|
+
guess_threshold: DEFAULT_GUESS_THRESHOLD)
|
50
|
+
@name = name
|
51
|
+
@field_type = type
|
52
|
+
@guess_threshold = guess_threshold
|
53
|
+
@type_counts = Hash.new(0)
|
54
|
+
@out_of_type_count = 0
|
55
|
+
@values = []
|
56
|
+
@stats = nil
|
57
|
+
@length_stats = nil
|
58
|
+
@stats_to_collect = [stats_to_collect].flatten
|
59
|
+
|
60
|
+
@stats_to_collect.each do |collection_set|
|
61
|
+
next if ALL_STATS.include?(collection_set)
|
62
|
+
|
63
|
+
valid_sets = ALL_STATS.map(&:to_s).join(", ")
|
64
|
+
|
65
|
+
raise ArgumentError, "#{collection_set} is not a valid stats collection set, must be one of #{valid_sets}"
|
66
|
+
end
|
67
|
+
return if type.is_a?(Class) && (type.superclass == ::FlatKit::FieldType)
|
68
|
+
|
69
|
+
raise ArgumentError, "type: must be FieldType subclasses - not #{type}"
|
70
|
+
end
|
71
|
+
|
72
|
+
def field_type_determined?
|
73
|
+
@field_type != ::FlatKit::FieldType::GuessType
|
74
|
+
end
|
75
|
+
|
76
|
+
def update(value)
|
77
|
+
update_type_count(value)
|
78
|
+
|
79
|
+
if field_type_determined?
|
80
|
+
update_stats(value)
|
81
|
+
else
|
82
|
+
@values << value
|
83
|
+
|
84
|
+
resolve_guess if @values.size >= @guess_threshold
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def collecting_frequencies?
|
89
|
+
@stats_to_collect.include?(CARDINALITY_STATS)
|
90
|
+
end
|
91
|
+
|
92
|
+
def type
|
93
|
+
@field_type.type_name
|
94
|
+
end
|
95
|
+
|
96
|
+
def count
|
97
|
+
stats.count
|
98
|
+
end
|
99
|
+
|
100
|
+
def max
|
101
|
+
stats.max if stats.respond_to?(:max)
|
102
|
+
end
|
103
|
+
|
104
|
+
def mean
|
105
|
+
stats.mean if stats.respond_to?(:mean)
|
106
|
+
end
|
107
|
+
|
108
|
+
def min
|
109
|
+
stats.min if stats.respond_to?(:min)
|
110
|
+
end
|
111
|
+
|
112
|
+
def stddev
|
113
|
+
stats.stddev if stats.respond_to?(:stddev)
|
114
|
+
end
|
115
|
+
|
116
|
+
def sum
|
117
|
+
stats.sum if stats.respond_to?(:sum)
|
118
|
+
end
|
119
|
+
|
120
|
+
def mode
|
121
|
+
stats.mode if collecting_frequencies?
|
122
|
+
end
|
123
|
+
|
124
|
+
def unique_count
|
125
|
+
stats.unique_count if collecting_frequencies?
|
126
|
+
end
|
127
|
+
|
128
|
+
def unique_values
|
129
|
+
stats.unique_values if collecting_frequencies?
|
130
|
+
end
|
131
|
+
|
132
|
+
def frequencies
|
133
|
+
stats.frequencies if collecting_frequencies?
|
134
|
+
end
|
135
|
+
|
136
|
+
def min_length
|
137
|
+
length_stats.min if @length_stats
|
138
|
+
end
|
139
|
+
|
140
|
+
def max_length
|
141
|
+
length_stats.max if @length_stats
|
142
|
+
end
|
143
|
+
|
144
|
+
def mean_length
|
145
|
+
length_stats.mean if @length_stats
|
146
|
+
end
|
147
|
+
|
148
|
+
def stddev_length
|
149
|
+
length_stats.stddev if @length_stats
|
150
|
+
end
|
151
|
+
|
152
|
+
def mode_length
|
153
|
+
length_stats.mode if @length_stats && collecting_frequencies?
|
154
|
+
end
|
155
|
+
|
156
|
+
def unique_count_lengths
|
157
|
+
length_stats.unique_count if @length_stats && collecting_frequencies?
|
158
|
+
end
|
159
|
+
|
160
|
+
def unique_values_lengths
|
161
|
+
length_stats.unique_values if @length_stats && collecting_frequencies?
|
162
|
+
end
|
163
|
+
|
164
|
+
def length_frequencies
|
165
|
+
length_stats.frequencies if @length_stats && collecting_frequencies?
|
166
|
+
end
|
167
|
+
|
168
|
+
def null_count
|
169
|
+
type_counts[FieldType::NullType]
|
170
|
+
end
|
171
|
+
|
172
|
+
def total_count
|
173
|
+
stats.count + @out_of_type_count
|
174
|
+
end
|
175
|
+
|
176
|
+
def null_percent
|
177
|
+
return 0 if total_count.zero?
|
178
|
+
|
179
|
+
((null_count.to_f / total_count) * 100.0).truncate(2)
|
180
|
+
end
|
181
|
+
|
182
|
+
def unknown_count
|
183
|
+
type_counts[FieldType::UnknownType]
|
184
|
+
end
|
185
|
+
|
186
|
+
def unknown_percent
|
187
|
+
return 0 if total_count.zero?
|
188
|
+
|
189
|
+
((unknown_count.to_f / total_count) * 100.0).truncate(2)
|
190
|
+
end
|
191
|
+
|
192
|
+
def to_hash
|
193
|
+
resolve_guess
|
194
|
+
|
195
|
+
{}.tap do |h|
|
196
|
+
EXPORT_FIELDS.each do |n|
|
197
|
+
h[n] = send(n)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
private
|
203
|
+
|
204
|
+
def stats
|
205
|
+
resolve_guess
|
206
|
+
@stats
|
207
|
+
end
|
208
|
+
|
209
|
+
def length_stats
|
210
|
+
resolve_guess
|
211
|
+
@length_stats
|
212
|
+
end
|
213
|
+
|
214
|
+
def update_stats(value)
|
215
|
+
coerced_value = @field_type.coerce(value)
|
216
|
+
if coerced_value == FieldType::CoerceFailure
|
217
|
+
@out_of_type_count += 1
|
218
|
+
return
|
219
|
+
end
|
220
|
+
|
221
|
+
@stats.update(coerced_value)
|
222
|
+
@length_stats.update(coerced_value.to_s.length) if @length_stats
|
223
|
+
end
|
224
|
+
|
225
|
+
def update_type_count(value)
|
226
|
+
guess = FieldType.best_guess(value)
|
227
|
+
type_counts[guess] += 1
|
228
|
+
guess
|
229
|
+
end
|
230
|
+
|
231
|
+
def resolve_guess
|
232
|
+
return if field_type_determined?
|
233
|
+
|
234
|
+
best_guess_type, _best_guess_count = type_counts.max_by { |_k, v| v }
|
235
|
+
@field_type = best_guess_type
|
236
|
+
@stats = StatType.for(@field_type).new(collecting_frequencies: collecting_frequencies?)
|
237
|
+
if @field_type == ::FlatKit::FieldType::StringType
|
238
|
+
@length_stats = ::FlatKit::StatType::NumericalStats.new(collecting_frequencies: collecting_frequencies?)
|
239
|
+
end
|
240
|
+
@values.each do |v|
|
241
|
+
update_stats(v)
|
242
|
+
end
|
243
|
+
@values.clear
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module FlatKit
|
4
|
+
class FieldType
|
5
|
+
# Internal: Implemenation of the boolean type and coercion to the type
|
6
|
+
#
|
7
|
+
class BooleanType < FieldType
|
8
|
+
TRUTHY_REGEX = /\A(true|t|1|yes|y|on)\Z/i
|
9
|
+
FALSEY_REGEX = /\A(false|f|0|no|n|off)\Z/i
|
10
|
+
REGEX = Regexp.union(TRUTHY_REGEX, FALSEY_REGEX)
|
11
|
+
|
12
|
+
def self.type_name
|
13
|
+
"boolean"
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.matches?(data)
|
17
|
+
case data
|
18
|
+
when TrueClass, FalseClass
|
19
|
+
true
|
20
|
+
when String
|
21
|
+
REGEX.match?(data)
|
22
|
+
when Integer
|
23
|
+
return true if data.zero?
|
24
|
+
return true if data == 1
|
25
|
+
|
26
|
+
false
|
27
|
+
else
|
28
|
+
false
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.coerce(data)
|
33
|
+
case data
|
34
|
+
when TrueClass
|
35
|
+
true
|
36
|
+
when FalseClass
|
37
|
+
false
|
38
|
+
when Numeric
|
39
|
+
return false if data.zero?
|
40
|
+
return true if data == 1
|
41
|
+
|
42
|
+
CoerceFailure
|
43
|
+
when String
|
44
|
+
return true if TRUTHY_REGEX.match?(data)
|
45
|
+
return false if FALSEY_REGEX.match?(data)
|
46
|
+
|
47
|
+
CoerceFailure
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module FlatKit
|
4
|
+
class FieldType
|
5
|
+
# Internal: Representing the type of data which only includes data up to
|
6
|
+
# the day resolution
|
7
|
+
#
|
8
|
+
class DateType < FieldType
|
9
|
+
# %Y 4 digit year
|
10
|
+
# %y 2 didigt year (%Y mod 100) (00..99)
|
11
|
+
# %m month of year zero padded
|
12
|
+
# %-m month of year no-padding
|
13
|
+
# %B Full month name
|
14
|
+
# %b Abbreviated month name
|
15
|
+
# %^b uppercased month name
|
16
|
+
# %d day of month zero padded
|
17
|
+
# %-d day of moneth not padded
|
18
|
+
# %e day of month blank padded
|
19
|
+
# %j day of year zero padded
|
20
|
+
|
21
|
+
# parse formats are not the same as print formats as parsing does not deal
|
22
|
+
# with flags and widths
|
23
|
+
def self.parse_formats
|
24
|
+
@parse_formats ||= [
|
25
|
+
# YMD formats
|
26
|
+
"%Y-%m-%d",
|
27
|
+
"%Y%m%d",
|
28
|
+
"%Y/%m/%d",
|
29
|
+
"%Y %m %d.",
|
30
|
+
|
31
|
+
# DMY formats
|
32
|
+
"%d %B %Y",
|
33
|
+
"%d %b %Y",
|
34
|
+
"%d-%b-%Y",
|
35
|
+
"%d/%b/%Y",
|
36
|
+
"%d-%m-%Y",
|
37
|
+
"%d-%m-%y",
|
38
|
+
"%d %b, %Y",
|
39
|
+
"%d %b,%Y",
|
40
|
+
"%d %B, %Y",
|
41
|
+
"%d %B,%Y",
|
42
|
+
|
43
|
+
# MDY formats
|
44
|
+
"%m/%d/%Y",
|
45
|
+
"%m-%d-%Y",
|
46
|
+
"%m/%d/%y",
|
47
|
+
"%m-%d-%y",
|
48
|
+
|
49
|
+
"%B %d, %Y",
|
50
|
+
"%b %d, %Y",
|
51
|
+
|
52
|
+
# other formats
|
53
|
+
"%Y-%j",
|
54
|
+
"%a %b %d %Y",
|
55
|
+
].freeze
|
56
|
+
end
|
57
|
+
|
58
|
+
# https://en.wikipedia.org/wiki/Date_format_by_country
|
59
|
+
# List of formats culled from the above - not using all as it is
|
60
|
+
# definitely a performance issue at the moment
|
61
|
+
# def self.known_formats
|
62
|
+
# @known_formats ||= [
|
63
|
+
# # YMD formats
|
64
|
+
# "%Y-%m-%d",
|
65
|
+
# "%Y%m%d",
|
66
|
+
# "%Y/%m/%d",
|
67
|
+
# "%Y.%m.%d",
|
68
|
+
# "%Y.%m.%d.",
|
69
|
+
# "%Y %m %d.",
|
70
|
+
# "%Y %b %d",
|
71
|
+
# "%Y %b %-d",
|
72
|
+
# "%Y %B %-d",
|
73
|
+
# "%Y %B %d",
|
74
|
+
# "%Y-%m%d",
|
75
|
+
# "%Y. %m. %-d.",
|
76
|
+
# "%Y. %m. %d.",
|
77
|
+
# "%Y.%-m.%-d.",
|
78
|
+
# "%Y.%-m.%-d",
|
79
|
+
# "%Y, %d %B",
|
80
|
+
# "%Y, %d %b",
|
81
|
+
#
|
82
|
+
# "%y.%-m.%-d",
|
83
|
+
# "%y.%-m.%-d.",
|
84
|
+
# "%y.%m.%d.",
|
85
|
+
# "%y.%m.%d",
|
86
|
+
# "%y/%m/%d",
|
87
|
+
#
|
88
|
+
# # DMY formats
|
89
|
+
# "%-d %b %Y",
|
90
|
+
# "%-d %B %Y",
|
91
|
+
# "%-d-%-m-%Y",
|
92
|
+
# "%-d. %-m. %Y",
|
93
|
+
# "%-d. %-m. %Y.",
|
94
|
+
# "%-d. %B %Y",
|
95
|
+
# "%-d. %B %Y.",
|
96
|
+
# "%-d.%-m.%Y",
|
97
|
+
# "%-d.%-m.%Y.",
|
98
|
+
# "%-d.%m.%Y.",
|
99
|
+
# "%-d.%m.%Y",
|
100
|
+
# "%-d.%b.%Y",
|
101
|
+
# "%-d.%B.%Y",
|
102
|
+
# "%-d/%-m %Y",
|
103
|
+
# "%-d/%-m/%Y",
|
104
|
+
# "%d %B %Y",
|
105
|
+
# "%d %b %Y",
|
106
|
+
# "%d-%m-%Y",
|
107
|
+
# "%d-%b-%Y",
|
108
|
+
# "%d-%B-%Y",
|
109
|
+
# "%d.%m.%Y",
|
110
|
+
# "%d/%m %Y",
|
111
|
+
# "%d/%m/%Y",
|
112
|
+
#
|
113
|
+
# "%-d.%b.%y",
|
114
|
+
# "%-d.%B.%y",
|
115
|
+
# "%-d.%-m.%y",
|
116
|
+
# "%-d/%-m-%y",
|
117
|
+
# "%-d/%-m/%y",
|
118
|
+
# "%d/%m/%y",
|
119
|
+
# "%d-%m-%y",
|
120
|
+
# "%d.%m.%y",
|
121
|
+
# "%d%m%y",
|
122
|
+
#
|
123
|
+
# # MDY formats
|
124
|
+
# "%-m/%-d/%Y",
|
125
|
+
# "%m/%d/%Y",
|
126
|
+
# "%m-%d-%Y",
|
127
|
+
# "%b-%d-%Y",
|
128
|
+
# "%B %-d, %Y",
|
129
|
+
# "%B %-d. %Y",
|
130
|
+
# "%B %d, %Y",
|
131
|
+
# "%B-%d-%Y",
|
132
|
+
# "%B/%d/%Y",
|
133
|
+
#
|
134
|
+
# "%-m/%-d/%y",
|
135
|
+
#
|
136
|
+
# # other formats
|
137
|
+
# "%Y-%j",
|
138
|
+
# "%Y%m",
|
139
|
+
# "%Y-%m",
|
140
|
+
# "%Y %m",
|
141
|
+
# ]
|
142
|
+
# end
|
143
|
+
|
144
|
+
def self.type_name
|
145
|
+
"date"
|
146
|
+
end
|
147
|
+
|
148
|
+
def self.matches?(data)
|
149
|
+
coerced = coerce(data)
|
150
|
+
coerced.is_a?(Date)
|
151
|
+
end
|
152
|
+
|
153
|
+
def self.coerce(data)
|
154
|
+
case data
|
155
|
+
when DateTime
|
156
|
+
CoerceFailure
|
157
|
+
when Date
|
158
|
+
data
|
159
|
+
when String
|
160
|
+
try_parse(data)
|
161
|
+
else
|
162
|
+
CoerceFailure
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def self.try_parse(data)
|
167
|
+
parse_formats.each do |format|
|
168
|
+
coerced_data = Date.strptime(data, format)
|
169
|
+
return coerced_data
|
170
|
+
rescue StandardError => _e
|
171
|
+
false
|
172
|
+
end
|
173
|
+
CoerceFailure
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
__END__
|
181
|
+
|