flat_kit 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +9 -0
- data/Manifest.txt +3 -42
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +46 -32
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +41 -39
- data/lib/flat_kit/command.rb +10 -11
- data/lib/flat_kit/descendant_tracker.rb +9 -6
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +5 -2
- data/lib/flat_kit/field_stats.rb +31 -26
- data/lib/flat_kit/field_type/boolean_type.rb +9 -5
- data/lib/flat_kit/field_type/date_type.rb +19 -17
- data/lib/flat_kit/field_type/float_type.rb +15 -9
- data/lib/flat_kit/field_type/guess_type.rb +9 -6
- data/lib/flat_kit/field_type/integer_type.rb +6 -4
- data/lib/flat_kit/field_type/null_type.rb +5 -1
- data/lib/flat_kit/field_type/string_type.rb +8 -6
- data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
- data/lib/flat_kit/field_type/unknown_type.rb +12 -8
- data/lib/flat_kit/field_type.rb +52 -44
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +15 -18
- data/lib/flat_kit/jsonl/writer.rb +8 -10
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -18
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +8 -7
- data/lib/flat_kit/position.rb +3 -4
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
- data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
- data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
- data/lib/flat_kit/stat_type.rb +18 -13
- data/lib/flat_kit/stats.rb +12 -14
- data/lib/flat_kit/writer.rb +5 -6
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +13 -10
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +31 -26
- metadata +20 -158
- data/Rakefile +0 -21
- data/examples/stream-active-record-to-csv.rb +0 -42
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/field_type/test_boolean_type.rb +0 -65
- data/test/field_type/test_date_type.rb +0 -71
- data/test/field_type/test_float_type.rb +0 -56
- data/test/field_type/test_guess_type.rb +0 -14
- data/test/field_type/test_integer_type.rb +0 -52
- data/test/field_type/test_null_type.rb +0 -41
- data/test/field_type/test_string_type.rb +0 -18
- data/test/field_type/test_timestamp_type.rb +0 -108
- data/test/field_type/test_unknown_type.rb +0 -35
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -86
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/run +0 -23
- data/test/stat_type/test_nominal_stats.rb +0 -69
- data/test/stat_type/test_numerical_stats.rb +0 -118
- data/test/stat_type/test_ordinal_stats.rb +0 -92
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -89
- data/test/test_field_stats.rb +0 -134
- data/test/test_field_type.rb +0 -34
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -89
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class Command
|
5
|
+
# Internal: The implementation of the stats command.
|
6
|
+
#
|
3
7
|
class Stats < ::FlatKit::Command
|
4
|
-
|
5
8
|
def self.name
|
6
9
|
"stats"
|
7
10
|
end
|
@@ -12,45 +15,45 @@ module FlatKit
|
|
12
15
|
|
13
16
|
def self.parser
|
14
17
|
::Optimist::Parser.new do
|
15
|
-
banner
|
18
|
+
banner Sort.description.to_s
|
16
19
|
banner ""
|
17
20
|
|
18
21
|
banner <<~BANNER
|
19
|
-
|
22
|
+
Given an input file collect basic statistics.
|
20
23
|
|
21
|
-
|
24
|
+
The statistics can vary based upon the datatype of the field.
|
22
25
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
+
Numeric fields will report the basic count, min, max, mean, standard deviation and sum.
|
27
|
+
Non-numeric fields that are comparable, like dates, will report count, min and max.
|
28
|
+
Other non-numeric fields will only report the count.
|
26
29
|
|
27
|
-
|
28
|
-
|
30
|
+
Adding --cardinality will report the count, and frequency of distinct values in the result.
|
31
|
+
This will allow for reporting the median value.
|
29
32
|
|
30
|
-
|
31
|
-
|
33
|
+
The fields upon which stats are collected may be selected with the --fields parameter.
|
34
|
+
By default statistics are collected on all fields.
|
32
35
|
|
33
|
-
|
36
|
+
The flatfile type(s) will be automatically determined by the file name.
|
34
37
|
|
35
|
-
|
38
|
+
The output can be dumped as a CSV, JSON or a a formated ascii table.
|
36
39
|
|
37
40
|
BANNER
|
38
41
|
|
39
42
|
banner <<~USAGE
|
40
43
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
Usage:
|
45
|
+
fk stats --everything file.json
|
46
|
+
fk stats --select surname,given_name file.csv
|
47
|
+
fk stats --select surname,given_name --output-format json file.csv > stats.json
|
48
|
+
fk stats --select field1,field2 --output-format json input.csv
|
49
|
+
fk stats --select field1 file.json.gz -o stats.csv
|
50
|
+
gunzip -c file.json.gz | fk stats --input-format json --output-format text
|
48
51
|
|
49
52
|
USAGE
|
50
53
|
|
51
54
|
banner <<~OPTIONS
|
52
55
|
|
53
|
-
|
56
|
+
Options:
|
54
57
|
|
55
58
|
OPTIONS
|
56
59
|
|
@@ -65,24 +68,23 @@ module FlatKit
|
|
65
68
|
|
66
69
|
def parse
|
67
70
|
parser = self.class.parser
|
68
|
-
::Optimist
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
71
|
+
::Optimist.with_standard_exception_handling(parser) do
|
72
|
+
opts = parser.parse(argv)
|
73
|
+
fields = ::FlatKit::Stats::AllFields
|
74
|
+
fields = CSV.parse_line(opts[:select]) if opts[:select]
|
75
|
+
|
76
|
+
stats = [FieldStats::CORE_STATS]
|
77
|
+
stats << FieldStats::CARDINALITY_STATS if opts[:cardinality] || opts[:everything]
|
78
|
+
|
79
|
+
paths = parser.leftovers
|
80
|
+
raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1
|
81
|
+
|
82
|
+
path = paths.first || "-" # default to stdin
|
83
|
+
@stats = ::FlatKit::Stats.new(input: path, input_fallback: opts[:input_format],
|
84
|
+
output: opts[:output], output_fallback: opts[:output_format],
|
85
|
+
fields_to_stat: fields, stats_to_collect: stats)
|
86
|
+
rescue ::FlatKit::Error => e
|
87
|
+
raise ::Optimist::CommandlineError, e.message
|
86
88
|
end
|
87
89
|
end
|
88
90
|
|
data/lib/flat_kit/command.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: The base class for all commands in the CLI
|
5
|
+
#
|
2
6
|
class Command
|
3
7
|
extend DescendantTracker
|
4
8
|
|
5
|
-
attr_reader :argv
|
6
|
-
attr_reader :env
|
7
|
-
attr_reader :logger
|
8
|
-
attr_reader :opts
|
9
|
-
attr_reader :readers
|
10
|
-
attr_reader :writer
|
9
|
+
attr_reader :argv, :env, :logger, :opts, :readers, :writer
|
11
10
|
|
12
11
|
def self.name
|
13
12
|
raise NotImplementedError, "#{self.class} must implement #{self.class}.name"
|
@@ -22,7 +21,7 @@ module FlatKit
|
|
22
21
|
end
|
23
22
|
|
24
23
|
def self.names
|
25
|
-
children.map
|
24
|
+
children.map(&:name)
|
26
25
|
end
|
27
26
|
|
28
27
|
def self.for(name)
|
@@ -48,7 +47,7 @@ module FlatKit
|
|
48
47
|
end
|
49
48
|
end
|
50
49
|
|
51
|
-
require
|
52
|
-
require
|
53
|
-
require
|
54
|
-
require
|
50
|
+
require "flat_kit/command/cat"
|
51
|
+
require "flat_kit/command/merge"
|
52
|
+
require "flat_kit/command/sort"
|
53
|
+
require "flat_kit/command/stats"
|
@@ -1,17 +1,20 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
2
4
|
|
3
5
|
module FlatKit
|
6
|
+
# Internal: A module to track descendants of a class
|
7
|
+
#
|
4
8
|
module DescendantTracker
|
5
9
|
def inherited(klass)
|
6
10
|
super
|
7
11
|
return unless klass.instance_of?(Class)
|
8
|
-
|
12
|
+
|
13
|
+
children << klass
|
9
14
|
end
|
10
15
|
|
11
16
|
def children
|
12
|
-
unless defined? @_children
|
13
|
-
@_children = Set.new
|
14
|
-
end
|
17
|
+
@_children = Set.new unless defined? @_children
|
15
18
|
@_children
|
16
19
|
end
|
17
20
|
|
@@ -24,7 +27,7 @@ module FlatKit
|
|
24
27
|
end
|
25
28
|
end
|
26
29
|
|
27
|
-
#
|
30
|
+
#
|
28
31
|
# Find all the children that return truthy from the given method with args
|
29
32
|
#
|
30
33
|
def find_children(method, *args)
|
data/lib/flat_kit/error.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
|
-
# A simplified Observable class for use internally
|
4
|
+
# Internal: A simplified Observable class for use internally
|
3
5
|
#
|
4
6
|
module EventEmitter
|
5
7
|
def add_listener(listener)
|
6
8
|
raise ::NoMethodError, "#{listener} does not resond to #on_event" unless listener.respond_to?(:on_event)
|
9
|
+
|
7
10
|
self._listeners ||= []
|
8
11
|
self._listeners << listener unless _listeners.include?(listener)
|
9
12
|
end
|
@@ -27,7 +30,7 @@ module FlatKit
|
|
27
30
|
end
|
28
31
|
|
29
32
|
def _listeners
|
30
|
-
@_listeners ||=
|
33
|
+
@_listeners ||= []
|
31
34
|
end
|
32
35
|
end
|
33
36
|
end
|
data/lib/flat_kit/field_stats.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
|
-
# Collect stats on a single field.
|
3
|
-
#
|
4
|
-
#
|
4
|
+
# Internal: Collect stats on a single field.
|
5
|
+
#
|
6
|
+
# We may not know what the field data type is to start with, so collect a
|
7
|
+
# bunch of values until we have the threshold, and then calculte states based
|
8
|
+
# upon the data types determined by the guess
|
5
9
|
#
|
6
10
|
class FieldStats
|
7
11
|
DEFAULT_GUESS_THRESHOLD = 1000
|
@@ -9,7 +13,7 @@ module FlatKit
|
|
9
13
|
CORE_STATS = :core
|
10
14
|
CARDINALITY_STATS = :cardinality
|
11
15
|
|
12
|
-
ALL_STATS = [
|
16
|
+
ALL_STATS = [CORE_STATS, CARDINALITY_STATS].freeze
|
13
17
|
|
14
18
|
EXPORT_FIELDS = %w[
|
15
19
|
name
|
@@ -36,14 +40,13 @@ module FlatKit
|
|
36
40
|
total_count
|
37
41
|
null_percent
|
38
42
|
unknown_percent
|
39
|
-
]
|
40
|
-
|
43
|
+
].freeze
|
41
44
|
|
42
|
-
attr_reader :type_counts
|
43
|
-
attr_reader :field_type
|
44
|
-
attr_reader :name
|
45
|
+
attr_reader :type_counts, :field_type, :name, :out_of_type_count
|
45
46
|
|
46
|
-
def initialize(name:, stats_to_collect: CORE_STATS,
|
47
|
+
def initialize(name:, stats_to_collect: CORE_STATS,
|
48
|
+
type: ::FlatKit::FieldType::GuessType,
|
49
|
+
guess_threshold: DEFAULT_GUESS_THRESHOLD)
|
47
50
|
@name = name
|
48
51
|
@field_type = type
|
49
52
|
@guess_threshold = guess_threshold
|
@@ -56,9 +59,14 @@ module FlatKit
|
|
56
59
|
|
57
60
|
@stats_to_collect.each do |collection_set|
|
58
61
|
next if ALL_STATS.include?(collection_set)
|
59
|
-
|
62
|
+
|
63
|
+
valid_sets = ALL_STATS.map(&:to_s).join(", ")
|
64
|
+
|
65
|
+
raise ArgumentError, "#{collection_set} is not a valid stats collection set, must be one of #{valid_sets}"
|
60
66
|
end
|
61
|
-
|
67
|
+
return if type.is_a?(Class) && (type.superclass == ::FlatKit::FieldType)
|
68
|
+
|
69
|
+
raise ArgumentError, "type: must be FieldType subclasses - not #{type}"
|
62
70
|
end
|
63
71
|
|
64
72
|
def field_type_determined?
|
@@ -68,14 +76,12 @@ module FlatKit
|
|
68
76
|
def update(value)
|
69
77
|
update_type_count(value)
|
70
78
|
|
71
|
-
if field_type_determined?
|
79
|
+
if field_type_determined?
|
72
80
|
update_stats(value)
|
73
81
|
else
|
74
82
|
@values << value
|
75
83
|
|
76
|
-
if @values.size >= @guess_threshold
|
77
|
-
resolve_guess
|
78
|
-
end
|
84
|
+
resolve_guess if @values.size >= @guess_threshold
|
79
85
|
end
|
80
86
|
end
|
81
87
|
|
@@ -167,12 +173,9 @@ module FlatKit
|
|
167
173
|
stats.count + @out_of_type_count
|
168
174
|
end
|
169
175
|
|
170
|
-
def out_of_type_count
|
171
|
-
@out_of_type_count
|
172
|
-
end
|
173
|
-
|
174
176
|
def null_percent
|
175
177
|
return 0 if total_count.zero?
|
178
|
+
|
176
179
|
((null_count.to_f / total_count) * 100.0).truncate(2)
|
177
180
|
end
|
178
181
|
|
@@ -182,15 +185,16 @@ module FlatKit
|
|
182
185
|
|
183
186
|
def unknown_percent
|
184
187
|
return 0 if total_count.zero?
|
188
|
+
|
185
189
|
((unknown_count.to_f / total_count) * 100.0).truncate(2)
|
186
190
|
end
|
187
191
|
|
188
192
|
def to_hash
|
189
193
|
resolve_guess
|
190
194
|
|
191
|
-
|
195
|
+
{}.tap do |h|
|
192
196
|
EXPORT_FIELDS.each do |n|
|
193
|
-
h[n] =
|
197
|
+
h[n] = send(n)
|
194
198
|
end
|
195
199
|
end
|
196
200
|
end
|
@@ -209,7 +213,7 @@ module FlatKit
|
|
209
213
|
|
210
214
|
def update_stats(value)
|
211
215
|
coerced_value = @field_type.coerce(value)
|
212
|
-
if coerced_value == FieldType::CoerceFailure
|
216
|
+
if coerced_value == FieldType::CoerceFailure
|
213
217
|
@out_of_type_count += 1
|
214
218
|
return
|
215
219
|
end
|
@@ -221,15 +225,16 @@ module FlatKit
|
|
221
225
|
def update_type_count(value)
|
222
226
|
guess = FieldType.best_guess(value)
|
223
227
|
type_counts[guess] += 1
|
224
|
-
|
228
|
+
guess
|
225
229
|
end
|
226
230
|
|
227
231
|
def resolve_guess
|
228
232
|
return if field_type_determined?
|
229
|
-
|
233
|
+
|
234
|
+
best_guess_type, _best_guess_count = type_counts.max_by { |_k, v| v }
|
230
235
|
@field_type = best_guess_type
|
231
236
|
@stats = StatType.for(@field_type).new(collecting_frequencies: collecting_frequencies?)
|
232
|
-
if @field_type == ::FlatKit::FieldType::StringType
|
237
|
+
if @field_type == ::FlatKit::FieldType::StringType
|
233
238
|
@length_stats = ::FlatKit::StatType::NumericalStats.new(collecting_frequencies: collecting_frequencies?)
|
234
239
|
end
|
235
240
|
@values.each do |v|
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Implemenation of the boolean type and coercion to the type
|
6
|
+
#
|
3
7
|
class BooleanType < FieldType
|
4
|
-
|
5
8
|
TRUTHY_REGEX = /\A(true|t|1|yes|y|on)\Z/i
|
6
9
|
FALSEY_REGEX = /\A(false|f|0|no|n|off)\Z/i
|
7
10
|
REGEX = Regexp.union(TRUTHY_REGEX, FALSEY_REGEX)
|
@@ -12,16 +15,15 @@ module FlatKit
|
|
12
15
|
|
13
16
|
def self.matches?(data)
|
14
17
|
case data
|
15
|
-
when TrueClass
|
16
|
-
true
|
17
|
-
when FalseClass
|
18
|
+
when TrueClass, FalseClass
|
18
19
|
true
|
19
20
|
when String
|
20
21
|
REGEX.match?(data)
|
21
22
|
when Integer
|
22
23
|
return true if data.zero?
|
23
24
|
return true if data == 1
|
24
|
-
|
25
|
+
|
26
|
+
false
|
25
27
|
else
|
26
28
|
false
|
27
29
|
end
|
@@ -36,10 +38,12 @@ module FlatKit
|
|
36
38
|
when Numeric
|
37
39
|
return false if data.zero?
|
38
40
|
return true if data == 1
|
41
|
+
|
39
42
|
CoerceFailure
|
40
43
|
when String
|
41
44
|
return true if TRUTHY_REGEX.match?(data)
|
42
45
|
return false if FALSEY_REGEX.match?(data)
|
46
|
+
|
43
47
|
CoerceFailure
|
44
48
|
end
|
45
49
|
end
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
3
|
-
# Representing the type of data which only includes data up to
|
4
|
-
# resolution
|
5
|
+
# Internal: Representing the type of data which only includes data up to
|
6
|
+
# the day resolution
|
7
|
+
#
|
5
8
|
class DateType < FieldType
|
6
|
-
|
7
9
|
# %Y 4 digit year
|
8
10
|
# %y 2 didigt year (%Y mod 100) (00..99)
|
9
11
|
# %m month of year zero padded
|
@@ -49,9 +51,8 @@ module FlatKit
|
|
49
51
|
|
50
52
|
# other formats
|
51
53
|
"%Y-%j",
|
52
|
-
"%a %b %d %Y"
|
53
|
-
|
54
|
-
|
54
|
+
"%a %b %d %Y",
|
55
|
+
].freeze
|
55
56
|
end
|
56
57
|
|
57
58
|
# https://en.wikipedia.org/wiki/Date_format_by_country
|
@@ -146,7 +147,7 @@ module FlatKit
|
|
146
147
|
|
147
148
|
def self.matches?(data)
|
148
149
|
coerced = coerce(data)
|
149
|
-
|
150
|
+
coerced.is_a?(Date)
|
150
151
|
end
|
151
152
|
|
152
153
|
def self.coerce(data)
|
@@ -156,20 +157,21 @@ module FlatKit
|
|
156
157
|
when Date
|
157
158
|
data
|
158
159
|
when String
|
159
|
-
|
160
|
-
parse_formats.each do |format|
|
161
|
-
begin
|
162
|
-
coerced_data = Date.strptime(data, format)
|
163
|
-
break
|
164
|
-
rescue => _
|
165
|
-
false
|
166
|
-
end
|
167
|
-
end
|
168
|
-
coerced_data
|
160
|
+
try_parse(data)
|
169
161
|
else
|
170
162
|
CoerceFailure
|
171
163
|
end
|
172
164
|
end
|
165
|
+
|
166
|
+
def self.try_parse(data)
|
167
|
+
parse_formats.each do |format|
|
168
|
+
coerced_data = Date.strptime(data, format)
|
169
|
+
return coerced_data
|
170
|
+
rescue StandardError => _e
|
171
|
+
false
|
172
|
+
end
|
173
|
+
CoerceFailure
|
174
|
+
end
|
173
175
|
end
|
174
176
|
end
|
175
177
|
end
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Represeting floating point data and conversion to it
|
6
|
+
#
|
3
7
|
class FloatType < FieldType
|
4
|
-
|
5
8
|
def self.type_name
|
6
9
|
"float"
|
7
10
|
end
|
@@ -14,12 +17,8 @@ module FlatKit
|
|
14
17
|
false
|
15
18
|
when String
|
16
19
|
return false if IntegerType.matches?(data)
|
17
|
-
|
18
|
-
|
19
|
-
true
|
20
|
-
rescue ArgumentError => _
|
21
|
-
false
|
22
|
-
end
|
20
|
+
|
21
|
+
maybe_float?(data)
|
23
22
|
else
|
24
23
|
false
|
25
24
|
end
|
@@ -27,11 +26,18 @@ module FlatKit
|
|
27
26
|
|
28
27
|
def self.coerce(data)
|
29
28
|
Float(data)
|
30
|
-
rescue TypeError =>
|
29
|
+
rescue TypeError => _e
|
31
30
|
CoerceFailure
|
32
|
-
rescue ArgumentError =>
|
31
|
+
rescue ArgumentError => _e
|
33
32
|
CoerceFailure
|
34
33
|
end
|
34
|
+
|
35
|
+
def self.maybe_float?(data)
|
36
|
+
Float(data)
|
37
|
+
true
|
38
|
+
rescue ArgumentError => _e
|
39
|
+
false
|
40
|
+
end
|
35
41
|
end
|
36
42
|
end
|
37
43
|
end
|
@@ -1,18 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
3
|
-
# GuessType is a field type where we don't know what type the
|
4
|
-
# it needs to be guessed. This is a sentinel type that doesn't
|
5
|
-
# data.
|
5
|
+
# Internal: GuessType is a field type where we don't know what type the
|
6
|
+
# field is, and it needs to be guessed. This is a sentinel type that doesn't
|
7
|
+
# match any data.
|
8
|
+
#
|
6
9
|
class GuessType < FieldType
|
7
10
|
def self.type_name
|
8
|
-
|
11
|
+
name
|
9
12
|
end
|
10
13
|
|
11
|
-
def self.matches?(
|
14
|
+
def self.matches?(*)
|
12
15
|
false
|
13
16
|
end
|
14
17
|
|
15
|
-
def self.coerce(
|
18
|
+
def self.coerce(*)
|
16
19
|
CoerceFailure
|
17
20
|
end
|
18
21
|
end
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Class reprepseting the Integer type and coercian to it.
|
6
|
+
#
|
3
7
|
class IntegerType < FieldType
|
4
|
-
|
5
8
|
REGEX = /\A[-+]?\d+\Z/
|
6
9
|
|
7
10
|
def self.type_name
|
@@ -23,12 +26,11 @@ module FlatKit
|
|
23
26
|
|
24
27
|
def self.coerce(data)
|
25
28
|
Integer(data)
|
26
|
-
rescue TypeError =>
|
29
|
+
rescue TypeError => _e
|
27
30
|
CoerceFailure
|
28
|
-
rescue ArgumentError =>
|
31
|
+
rescue ArgumentError => _e
|
29
32
|
CoerceFailure
|
30
33
|
end
|
31
|
-
|
32
34
|
end
|
33
35
|
end
|
34
36
|
end
|
@@ -1,7 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Class reprepseting the null type and coercian to it.
|
6
|
+
#
|
3
7
|
class NullType < FieldType
|
4
|
-
|
5
8
|
REGEX = Regexp.union(/\A(null|nil)\Z/i, /\A\\N\Z/)
|
6
9
|
|
7
10
|
def self.type_name
|
@@ -25,6 +28,7 @@ module FlatKit
|
|
25
28
|
data
|
26
29
|
when String
|
27
30
|
return nil if REGEX.match?(data)
|
31
|
+
|
28
32
|
CoerceFailure
|
29
33
|
else
|
30
34
|
CoerceFailure
|
@@ -1,20 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
3
|
-
# StringType is essentially a fallback - hence its lower weight
|
4
|
-
# types that might have string representations.
|
5
|
-
|
6
|
-
|
5
|
+
# Internal: StringType is essentially a fallback - hence its lower weight
|
6
|
+
# than other types that might have string representations.
|
7
|
+
#
|
8
|
+
class StringType < FieldType
|
7
9
|
def self.type_name
|
8
10
|
"string"
|
9
11
|
end
|
10
12
|
|
11
13
|
def self.matches?(data)
|
12
|
-
data.
|
14
|
+
data.is_a?(String)
|
13
15
|
end
|
14
16
|
|
15
17
|
def self.coerce(data)
|
16
18
|
data.to_s
|
17
|
-
rescue =>
|
19
|
+
rescue StandardError => _e
|
18
20
|
CoerceFailure
|
19
21
|
end
|
20
22
|
end
|
@@ -1,15 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class FieldType
|
5
|
+
# Internal: Type for all tiemstamps types more granular than Date.
|
6
|
+
#
|
3
7
|
class TimestampType < FieldType
|
4
|
-
|
5
8
|
def self.parse_formats
|
6
|
-
@
|
9
|
+
@parse_formats ||= [
|
7
10
|
"%Y-%m-%d %H:%M:%S.%NZ",
|
8
11
|
"%Y-%m-%d %H:%M:%S.%N",
|
9
12
|
"%Y-%m-%dT%H:%M:%S.%N%z", # w3cdtf
|
10
13
|
"%Y-%m-%d %H:%M:%S",
|
11
14
|
"%Y-%m-%dT%H:%M:%S%z",
|
12
|
-
"%Y-%m-%dT%H:%M:%SZ",
|
15
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
13
16
|
"%Y%m%dT%H%M%S",
|
14
17
|
"%a, %d %b %Y %H:%M:%S %z", # rfc2822, httpdate
|
15
18
|
].freeze
|
@@ -21,7 +24,7 @@ module FlatKit
|
|
21
24
|
|
22
25
|
def self.matches?(data)
|
23
26
|
coerced = coerce(data)
|
24
|
-
|
27
|
+
coerced.is_a?(Time)
|
25
28
|
end
|
26
29
|
|
27
30
|
def self.coerce(data)
|
@@ -30,12 +33,10 @@ module FlatKit
|
|
30
33
|
data
|
31
34
|
when String
|
32
35
|
parse_formats.each do |format|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
# do nothing
|
38
|
-
end
|
36
|
+
coerced_data = Time.strptime(data, format).utc
|
37
|
+
return coerced_data
|
38
|
+
rescue StandardError => _e
|
39
|
+
# do nothing
|
39
40
|
end
|
40
41
|
CoerceFailure
|
41
42
|
else
|