flat_kit 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +9 -0
- data/Manifest.txt +3 -42
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +46 -32
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +41 -39
- data/lib/flat_kit/command.rb +10 -11
- data/lib/flat_kit/descendant_tracker.rb +9 -6
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +5 -2
- data/lib/flat_kit/field_stats.rb +31 -26
- data/lib/flat_kit/field_type/boolean_type.rb +9 -5
- data/lib/flat_kit/field_type/date_type.rb +19 -17
- data/lib/flat_kit/field_type/float_type.rb +15 -9
- data/lib/flat_kit/field_type/guess_type.rb +9 -6
- data/lib/flat_kit/field_type/integer_type.rb +6 -4
- data/lib/flat_kit/field_type/null_type.rb +5 -1
- data/lib/flat_kit/field_type/string_type.rb +8 -6
- data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
- data/lib/flat_kit/field_type/unknown_type.rb +12 -8
- data/lib/flat_kit/field_type.rb +52 -44
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +15 -18
- data/lib/flat_kit/jsonl/writer.rb +8 -10
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -18
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +8 -7
- data/lib/flat_kit/position.rb +3 -4
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
- data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
- data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
- data/lib/flat_kit/stat_type.rb +18 -13
- data/lib/flat_kit/stats.rb +12 -14
- data/lib/flat_kit/writer.rb +5 -6
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +13 -10
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +31 -26
- metadata +20 -158
- data/Rakefile +0 -21
- data/examples/stream-active-record-to-csv.rb +0 -42
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/field_type/test_boolean_type.rb +0 -65
- data/test/field_type/test_date_type.rb +0 -71
- data/test/field_type/test_float_type.rb +0 -56
- data/test/field_type/test_guess_type.rb +0 -14
- data/test/field_type/test_integer_type.rb +0 -52
- data/test/field_type/test_null_type.rb +0 -41
- data/test/field_type/test_string_type.rb +0 -18
- data/test/field_type/test_timestamp_type.rb +0 -108
- data/test/field_type/test_unknown_type.rb +0 -35
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -86
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/run +0 -23
- data/test/stat_type/test_nominal_stats.rb +0 -69
- data/test/stat_type/test_numerical_stats.rb +0 -118
- data/test/stat_type/test_ordinal_stats.rb +0 -92
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -89
- data/test/test_field_stats.rb +0 -134
- data/test/test_field_type.rb +0 -34
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -89
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class Command
|
5
|
+
# Internal: The implementation of the stats command.
|
6
|
+
#
|
3
7
|
class Stats < ::FlatKit::Command
|
4
|
-
|
5
8
|
def self.name
|
6
9
|
"stats"
|
7
10
|
end
|
@@ -12,45 +15,45 @@ module FlatKit
|
|
12
15
|
|
13
16
|
def self.parser
|
14
17
|
::Optimist::Parser.new do
|
15
|
-
banner
|
18
|
+
banner Sort.description.to_s
|
16
19
|
banner ""
|
17
20
|
|
18
21
|
banner <<~BANNER
|
19
|
-
|
22
|
+
Given an input file collect basic statistics.
|
20
23
|
|
21
|
-
|
24
|
+
The statistics can vary based upon the datatype of the field.
|
22
25
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
+
Numeric fields will report the basic count, min, max, mean, standard deviation and sum.
|
27
|
+
Non-numeric fields that are comparable, like dates, will report count, min and max.
|
28
|
+
Other non-numeric fields will only report the count.
|
26
29
|
|
27
|
-
|
28
|
-
|
30
|
+
Adding --cardinality will report the count, and frequency of distinct values in the result.
|
31
|
+
This will allow for reporting the median value.
|
29
32
|
|
30
|
-
|
31
|
-
|
33
|
+
The fields upon which stats are collected may be selected with the --fields parameter.
|
34
|
+
By default statistics are collected on all fields.
|
32
35
|
|
33
|
-
|
36
|
+
The flatfile type(s) will be automatically determined by the file name.
|
34
37
|
|
35
|
-
|
38
|
+
The output can be dumped as a CSV, JSON or a a formated ascii table.
|
36
39
|
|
37
40
|
BANNER
|
38
41
|
|
39
42
|
banner <<~USAGE
|
40
43
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
Usage:
|
45
|
+
fk stats --everything file.json
|
46
|
+
fk stats --select surname,given_name file.csv
|
47
|
+
fk stats --select surname,given_name --output-format json file.csv > stats.json
|
48
|
+
fk stats --select field1,field2 --output-format json input.csv
|
49
|
+
fk stats --select field1 file.json.gz -o stats.csv
|
50
|
+
gunzip -c file.json.gz | fk stats --input-format json --output-format text
|
48
51
|
|
49
52
|
USAGE
|
50
53
|
|
51
54
|
banner <<~OPTIONS
|
52
55
|
|
53
|
-
|
56
|
+
Options:
|
54
57
|
|
55
58
|
OPTIONS
|
56
59
|
|
@@ -65,24 +68,23 @@ module FlatKit
|
|
65
68
|
|
66
69
|
def parse
|
67
70
|
parser = self.class.parser
|
68
|
-
::Optimist
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
71
|
+
::Optimist.with_standard_exception_handling(parser) do
|
72
|
+
opts = parser.parse(argv)
|
73
|
+
fields = ::FlatKit::Stats::AllFields
|
74
|
+
fields = CSV.parse_line(opts[:select]) if opts[:select]
|
75
|
+
|
76
|
+
stats = [FieldStats::CORE_STATS]
|
77
|
+
stats << FieldStats::CARDINALITY_STATS if opts[:cardinality] || opts[:everything]
|
78
|
+
|
79
|
+
paths = parser.leftovers
|
80
|
+
raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1
|
81
|
+
|
82
|
+
path = paths.first || "-" # default to stdin
|
83
|
+
@stats = ::FlatKit::Stats.new(input: path, input_fallback: opts[:input_format],
|
84
|
+
output: opts[:output], output_fallback: opts[:output_format],
|
85
|
+
fields_to_stat: fields, stats_to_collect: stats)
|
86
|
+
rescue ::FlatKit::Error => e
|
87
|
+
raise ::Optimist::CommandlineError, e.message
|
86
88
|
end
|
87
89
|
end
|
88
90
|
|
data/lib/flat_kit/command.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: The base class for all commands in the CLI
|
5
|
+
#
|
2
6
|
class Command
|
3
7
|
extend DescendantTracker
|
4
8
|
|
5
|
-
attr_reader :argv
|
6
|
-
attr_reader :env
|
7
|
-
attr_reader :logger
|
8
|
-
attr_reader :opts
|
9
|
-
attr_reader :readers
|
10
|
-
attr_reader :writer
|
9
|
+
attr_reader :argv, :env, :logger, :opts, :readers, :writer
|
11
10
|
|
12
11
|
def self.name
|
13
12
|
raise NotImplementedError, "#{self.class} must implement #{self.class}.name"
|
@@ -22,7 +21,7 @@ module FlatKit
|
|
22
21
|
end
|
23
22
|
|
24
23
|
def self.names
|
25
|
-
children.map
|
24
|
+
children.map(&:name)
|
26
25
|
end
|
27
26
|
|
28
27
|
def self.for(name)
|
@@ -48,7 +47,7 @@ module FlatKit
|
|
48
47
|
end
|
49
48
|
end
|
50
49
|
|
51
|
-
require
|
52
|
-
require
|
53
|
-
require
|
54
|
-
require
|
50
|
+
require "flat_kit/command/cat"
|
51
|
+
require "flat_kit/command/merge"
|
52
|
+
require "flat_kit/command/sort"
|
53
|
+
require "flat_kit/command/stats"
|
@@ -1,17 +1,20 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
2
4
|
|
3
5
|
module FlatKit
|
6
|
+
# Internal: A module to track descendants of a class
|
7
|
+
#
|
4
8
|
module DescendantTracker
|
5
9
|
def inherited(klass)
|
6
10
|
super
|
7
11
|
return unless klass.instance_of?(Class)
|
8
|
-
|
12
|
+
|
13
|
+
children << klass
|
9
14
|
end
|
10
15
|
|
11
16
|
def children
|
12
|
-
unless defined? @_children
|
13
|
-
@_children = Set.new
|
14
|
-
end
|
17
|
+
@_children = Set.new unless defined? @_children
|
15
18
|
@_children
|
16
19
|
end
|
17
20
|
|
@@ -24,7 +27,7 @@ module FlatKit
|
|
24
27
|
end
|
25
28
|
end
|
26
29
|
|
27
|
-
#
|
30
|
+
#
|
28
31
|
# Find all the children that return truthy from the given method with args
|
29
32
|
#
|
30
33
|
def find_children(method, *args)
|
data/lib/flat_kit/error.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
|
-
# A simplified Observable class for use internally
|
4
|
+
# Internal: A simplified Observable class for use internally
|
3
5
|
#
|
4
6
|
module EventEmitter
|
5
7
|
def add_listener(listener)
|
6
8
|
raise ::NoMethodError, "#{listener} does not resond to #on_event" unless listener.respond_to?(:on_event)
|
9
|
+
|
7
10
|
self._listeners ||= []
|
8
11
|
self._listeners << listener unless _listeners.include?(listener)
|
9
12
|
end
|
@@ -27,7 +30,7 @@ module FlatKit
|
|
27
30
|
end
|
28
31
|
|
29
32
|
def _listeners
|
30
|
-
@_listeners ||=
|
33
|
+
@_listeners ||= []
|
31
34
|
end
|
32
35
|
end
|
33
36
|
end
|
data/lib/flat_kit/field_stats.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
|
-
# Collect stats on a single field.
|
3
|
-
#
|
4
|
-
#
|
4
|
+
# Internal: Collect stats on a single field.
|
5
|
+
#
|
6
|
+
# We may not know what the field data type is to start with, so collect a
|
7
|
+
# bunch of values until we have the threshold, and then calculte states based
|
8
|
+
# upon the data types determined by the guess
|
5
9
|
#
|
6
10
|
class FieldStats
|
7
11
|
DEFAULT_GUESS_THRESHOLD = 1000
|
@@ -9,7 +13,7 @@ module FlatKit
|
|
9
13
|
CORE_STATS = :core
|
10
14
|
CARDINALITY_STATS = :cardinality
|
11
15
|
|
12
|
-
ALL_STATS = [
|
16
|
+
ALL_STATS = [CORE_STATS, CARDINALITY_STATS].freeze
|
13
17
|
|
14
18
|
EXPORT_FIELDS = %w[
|
15
19
|
name
|
@@ -36,14 +40,13 @@ module FlatKit
|
|
36
40
|
total_count
|
37
41
|
null_percent
|
38
42
|
unknown_percent
|
39
|
-
]
|
40
|
-
|
43
|
+
].freeze
|
41
44
|
|
42
|
-
attr_reader :type_counts
|
43
|
-
attr_reader :field_type
|
44
|
-
attr_reader :name
|
45
|
+
attr_reader :type_counts, :field_type, :name, :out_of_type_count
|
45
46
|
|
46
|
-
def initialize(name:, stats_to_collect: CORE_STATS,
|
47
|
+
def initialize(name:, stats_to_collect: CORE_STATS,
|
48
|
+
type: ::FlatKit::FieldType::GuessType,
|
49
|
+
guess_threshold: DEFAULT_GUESS_THRESHOLD)
|
47
50
|
@name = name
|
48
51
|
@field_type = type
|
49
52
|
@guess_threshold = guess_threshold
|
@@ -56,9 +59,14 @@ module FlatKit
|
|
56
59
|
|
57
60
|
@stats_to_collect.each do |collection_set|
|
58
61
|
next if ALL_STATS.include?(collection_set)
|
59
|
-
|
62
|
+
|
63
|
+
valid_sets = ALL_STATS.map(&:to_s).join(", ")
|
64
|
+
|
65
|
+
raise ArgumentError, "#{collection_set} is not a valid stats collection set, must be one of #{valid_sets}"
|
60
66
|
end
|
61
|
-
|
67
|
+
return if type.is_a?(Class) && (type.superclass == ::FlatKit::FieldType)
|
68
|
+
|
69
|
+
raise ArgumentError, "type: must be FieldType subclasses - not #{type}"
|
62
70
|
end
|
63
71
|
|
64
72
|
def field_type_determined?
|
@@ -68,14 +76,12 @@ module FlatKit
|
|
68
76
|
def update(value)
|
69
77
|
update_type_count(value)
|
70
78
|
|
71
|
-
if field_type_determined?
|
79
|
+
if field_type_determined?
|
72
80
|
update_stats(value)
|
73
81
|
else
|
74
82
|
@values << value
|
75
83
|
|
76
|
-
if @values.size >= @guess_threshold
|
77
|
-
resolve_guess
|
78
|
-
end
|
84
|
+
resolve_guess if @values.size >= @guess_threshold
|
79
85
|
end
|
80
86
|
end
|
81
87
|
|
@@ -167,12 +173,9 @@ module FlatKit
|
|
167
173
|
stats.count + @out_of_type_count
|
168
174
|
end
|
169
175
|
|
170
|
-
def out_of_type_count
|
171
|
-
@out_of_type_count
|
172
|
-
end
|
173
|
-
|
174
176
|
def null_percent
|
175
177
|
return 0 if total_count.zero?
|
178
|
+
|
176
179
|
((null_count.to_f / total_count) * 100.0).truncate(2)
|
177
180
|
end
|
178
181
|
|
@@ -182,15 +185,16 @@ module FlatKit
|
|
182
185
|
|
183
186
|
def unknown_percent
|
184
187
|
return 0 if total_count.zero?
|
188
|
+
|
185
189
|
((unknown_count.to_f / total_count) * 100.0).truncate(2)
|
186
190
|
end
|
187
191
|
|
188
192
|
def to_hash
|
189
193
|
resolve_guess
|
190
194
|
|
191
|
-
|
195
|
+
{}.tap do |h|
|
192
196
|
EXPORT_FIELDS.each do |n|
|
193
|
-
h[n] =
|
197
|
+
h[n] = send(n)
|
194
198
|
end
|
195
199
|
end
|
196
200
|
end
|
@@ -209,7 +213,7 @@ module FlatKit
|
|
209
213
|
|
210
214
|
def update_stats(value)
|
211
215
|
coerced_value = @field_type.coerce(value)
|
212
|
-
if coerced_value == FieldType::CoerceFailure
|
216
|
+
if coerced_value == FieldType::CoerceFailure
|
213
217
|
@out_of_type_count += 1
|
214
218
|
return
|
215
219
|
end
|
@@ -221,15 +225,16 @@ module FlatKit
|
|
221
225
|
def update_type_count(value)
|
222
226
|
guess = FieldType.best_guess(value)
|
223
227
|
type_counts[guess] += 1
|
224
|
-
|
228
|
+
guess
|
225
229
|
end
|
226
230
|
|
227
231
|
def resolve_guess
|
228
232
|
return if field_type_determined?
|
229
|
-
|
233
|
+
|
234
|
+
best_guess_type, _best_guess_count = type_counts.max_by { |_k, v| v }
|
230
235
|
@field_type = best_guess_type
|
231
236
|
@stats = StatType.for(@field_type).new(collecting_frequencies: collecting_frequencies?)
|
232
|
-
if @field_type == ::FlatKit::FieldType::StringType
|
237
|
+
if @field_type == ::FlatKit::FieldType::StringType
|
233
238
|
@length_stats = ::FlatKit::StatType::NumericalStats.new(collecting_frequencies: collecting_frequencies?)
|
234
239
|
end
|
235
240
|
@values.each do |v|
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Implemenation of the boolean type and coercion to the type
|
6
|
+
#
|
3
7
|
class BooleanType < FieldType
|
4
|
-
|
5
8
|
TRUTHY_REGEX = /\A(true|t|1|yes|y|on)\Z/i
|
6
9
|
FALSEY_REGEX = /\A(false|f|0|no|n|off)\Z/i
|
7
10
|
REGEX = Regexp.union(TRUTHY_REGEX, FALSEY_REGEX)
|
@@ -12,16 +15,15 @@ module FlatKit
|
|
12
15
|
|
13
16
|
def self.matches?(data)
|
14
17
|
case data
|
15
|
-
when TrueClass
|
16
|
-
true
|
17
|
-
when FalseClass
|
18
|
+
when TrueClass, FalseClass
|
18
19
|
true
|
19
20
|
when String
|
20
21
|
REGEX.match?(data)
|
21
22
|
when Integer
|
22
23
|
return true if data.zero?
|
23
24
|
return true if data == 1
|
24
|
-
|
25
|
+
|
26
|
+
false
|
25
27
|
else
|
26
28
|
false
|
27
29
|
end
|
@@ -36,10 +38,12 @@ module FlatKit
|
|
36
38
|
when Numeric
|
37
39
|
return false if data.zero?
|
38
40
|
return true if data == 1
|
41
|
+
|
39
42
|
CoerceFailure
|
40
43
|
when String
|
41
44
|
return true if TRUTHY_REGEX.match?(data)
|
42
45
|
return false if FALSEY_REGEX.match?(data)
|
46
|
+
|
43
47
|
CoerceFailure
|
44
48
|
end
|
45
49
|
end
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
3
|
-
# Representing the type of data which only includes data up to
|
4
|
-
# resolution
|
5
|
+
# Internal: Representing the type of data which only includes data up to
|
6
|
+
# the day resolution
|
7
|
+
#
|
5
8
|
class DateType < FieldType
|
6
|
-
|
7
9
|
# %Y 4 digit year
|
8
10
|
# %y 2 didigt year (%Y mod 100) (00..99)
|
9
11
|
# %m month of year zero padded
|
@@ -49,9 +51,8 @@ module FlatKit
|
|
49
51
|
|
50
52
|
# other formats
|
51
53
|
"%Y-%j",
|
52
|
-
"%a %b %d %Y"
|
53
|
-
|
54
|
-
|
54
|
+
"%a %b %d %Y",
|
55
|
+
].freeze
|
55
56
|
end
|
56
57
|
|
57
58
|
# https://en.wikipedia.org/wiki/Date_format_by_country
|
@@ -146,7 +147,7 @@ module FlatKit
|
|
146
147
|
|
147
148
|
def self.matches?(data)
|
148
149
|
coerced = coerce(data)
|
149
|
-
|
150
|
+
coerced.is_a?(Date)
|
150
151
|
end
|
151
152
|
|
152
153
|
def self.coerce(data)
|
@@ -156,20 +157,21 @@ module FlatKit
|
|
156
157
|
when Date
|
157
158
|
data
|
158
159
|
when String
|
159
|
-
|
160
|
-
parse_formats.each do |format|
|
161
|
-
begin
|
162
|
-
coerced_data = Date.strptime(data, format)
|
163
|
-
break
|
164
|
-
rescue => _
|
165
|
-
false
|
166
|
-
end
|
167
|
-
end
|
168
|
-
coerced_data
|
160
|
+
try_parse(data)
|
169
161
|
else
|
170
162
|
CoerceFailure
|
171
163
|
end
|
172
164
|
end
|
165
|
+
|
166
|
+
def self.try_parse(data)
|
167
|
+
parse_formats.each do |format|
|
168
|
+
coerced_data = Date.strptime(data, format)
|
169
|
+
return coerced_data
|
170
|
+
rescue StandardError => _e
|
171
|
+
false
|
172
|
+
end
|
173
|
+
CoerceFailure
|
174
|
+
end
|
173
175
|
end
|
174
176
|
end
|
175
177
|
end
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Represeting floating point data and conversion to it
|
6
|
+
#
|
3
7
|
class FloatType < FieldType
|
4
|
-
|
5
8
|
def self.type_name
|
6
9
|
"float"
|
7
10
|
end
|
@@ -14,12 +17,8 @@ module FlatKit
|
|
14
17
|
false
|
15
18
|
when String
|
16
19
|
return false if IntegerType.matches?(data)
|
17
|
-
|
18
|
-
|
19
|
-
true
|
20
|
-
rescue ArgumentError => _
|
21
|
-
false
|
22
|
-
end
|
20
|
+
|
21
|
+
maybe_float?(data)
|
23
22
|
else
|
24
23
|
false
|
25
24
|
end
|
@@ -27,11 +26,18 @@ module FlatKit
|
|
27
26
|
|
28
27
|
def self.coerce(data)
|
29
28
|
Float(data)
|
30
|
-
rescue TypeError =>
|
29
|
+
rescue TypeError => _e
|
31
30
|
CoerceFailure
|
32
|
-
rescue ArgumentError =>
|
31
|
+
rescue ArgumentError => _e
|
33
32
|
CoerceFailure
|
34
33
|
end
|
34
|
+
|
35
|
+
def self.maybe_float?(data)
|
36
|
+
Float(data)
|
37
|
+
true
|
38
|
+
rescue ArgumentError => _e
|
39
|
+
false
|
40
|
+
end
|
35
41
|
end
|
36
42
|
end
|
37
43
|
end
|
@@ -1,18 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
3
|
-
# GuessType is a field type where we don't know what type the
|
4
|
-
# it needs to be guessed. This is a sentinel type that doesn't
|
5
|
-
# data.
|
5
|
+
# Internal: GuessType is a field type where we don't know what type the
|
6
|
+
# field is, and it needs to be guessed. This is a sentinel type that doesn't
|
7
|
+
# match any data.
|
8
|
+
#
|
6
9
|
class GuessType < FieldType
|
7
10
|
def self.type_name
|
8
|
-
|
11
|
+
name
|
9
12
|
end
|
10
13
|
|
11
|
-
def self.matches?(
|
14
|
+
def self.matches?(*)
|
12
15
|
false
|
13
16
|
end
|
14
17
|
|
15
|
-
def self.coerce(
|
18
|
+
def self.coerce(*)
|
16
19
|
CoerceFailure
|
17
20
|
end
|
18
21
|
end
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Class reprepseting the Integer type and coercian to it.
|
6
|
+
#
|
3
7
|
class IntegerType < FieldType
|
4
|
-
|
5
8
|
REGEX = /\A[-+]?\d+\Z/
|
6
9
|
|
7
10
|
def self.type_name
|
@@ -23,12 +26,11 @@ module FlatKit
|
|
23
26
|
|
24
27
|
def self.coerce(data)
|
25
28
|
Integer(data)
|
26
|
-
rescue TypeError =>
|
29
|
+
rescue TypeError => _e
|
27
30
|
CoerceFailure
|
28
|
-
rescue ArgumentError =>
|
31
|
+
rescue ArgumentError => _e
|
29
32
|
CoerceFailure
|
30
33
|
end
|
31
|
-
|
32
34
|
end
|
33
35
|
end
|
34
36
|
end
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Class reprepseting the null type and coercian to it.
|
6
|
+
#
|
3
7
|
class NullType < FieldType
|
4
|
-
|
5
8
|
REGEX = Regexp.union(/\A(null|nil)\Z/i, /\A\\N\Z/)
|
6
9
|
|
7
10
|
def self.type_name
|
@@ -25,6 +28,7 @@ module FlatKit
|
|
25
28
|
data
|
26
29
|
when String
|
27
30
|
return nil if REGEX.match?(data)
|
31
|
+
|
28
32
|
CoerceFailure
|
29
33
|
else
|
30
34
|
CoerceFailure
|
@@ -1,20 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
3
|
-
# StringType is essentially a fallback - hence its lower weight
|
4
|
-
# types that might have string representations.
|
5
|
-
|
6
|
-
|
5
|
+
# Internal: StringType is essentially a fallback - hence its lower weight
|
6
|
+
# than other types that might have string representations.
|
7
|
+
#
|
8
|
+
class StringType < FieldType
|
7
9
|
def self.type_name
|
8
10
|
"string"
|
9
11
|
end
|
10
12
|
|
11
13
|
def self.matches?(data)
|
12
|
-
data.
|
14
|
+
data.is_a?(String)
|
13
15
|
end
|
14
16
|
|
15
17
|
def self.coerce(data)
|
16
18
|
data.to_s
|
17
|
-
rescue =>
|
19
|
+
rescue StandardError => _e
|
18
20
|
CoerceFailure
|
19
21
|
end
|
20
22
|
end
|
@@ -1,15 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Type for all tiemstamps types more granular than Date.
|
6
|
+
#
|
3
7
|
class TimestampType < FieldType
|
4
|
-
|
5
8
|
def self.parse_formats
|
6
|
-
@
|
9
|
+
@parse_formats ||= [
|
7
10
|
"%Y-%m-%d %H:%M:%S.%NZ",
|
8
11
|
"%Y-%m-%d %H:%M:%S.%N",
|
9
12
|
"%Y-%m-%dT%H:%M:%S.%N%z", # w3cdtf
|
10
13
|
"%Y-%m-%d %H:%M:%S",
|
11
14
|
"%Y-%m-%dT%H:%M:%S%z",
|
12
|
-
"%Y-%m-%dT%H:%M:%SZ",
|
15
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
13
16
|
"%Y%m%dT%H%M%S",
|
14
17
|
"%a, %d %b %Y %H:%M:%S %z", # rfc2822, httpdate
|
15
18
|
].freeze
|
@@ -21,7 +24,7 @@ module FlatKit
|
|
21
24
|
|
22
25
|
def self.matches?(data)
|
23
26
|
coerced = coerce(data)
|
24
|
-
|
27
|
+
coerced.is_a?(Time)
|
25
28
|
end
|
26
29
|
|
27
30
|
def self.coerce(data)
|
@@ -30,12 +33,10 @@ module FlatKit
|
|
30
33
|
data
|
31
34
|
when String
|
32
35
|
parse_formats.each do |format|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
# do nothing
|
38
|
-
end
|
36
|
+
coerced_data = Time.strptime(data, format).utc
|
37
|
+
return coerced_data
|
38
|
+
rescue StandardError => _e
|
39
|
+
# do nothing
|
39
40
|
end
|
40
41
|
CoerceFailure
|
41
42
|
else
|