flat_kit 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +9 -0
- data/Manifest.txt +3 -42
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +46 -32
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +41 -39
- data/lib/flat_kit/command.rb +10 -11
- data/lib/flat_kit/descendant_tracker.rb +9 -6
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +5 -2
- data/lib/flat_kit/field_stats.rb +31 -26
- data/lib/flat_kit/field_type/boolean_type.rb +9 -5
- data/lib/flat_kit/field_type/date_type.rb +19 -17
- data/lib/flat_kit/field_type/float_type.rb +15 -9
- data/lib/flat_kit/field_type/guess_type.rb +9 -6
- data/lib/flat_kit/field_type/integer_type.rb +6 -4
- data/lib/flat_kit/field_type/null_type.rb +5 -1
- data/lib/flat_kit/field_type/string_type.rb +8 -6
- data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
- data/lib/flat_kit/field_type/unknown_type.rb +12 -8
- data/lib/flat_kit/field_type.rb +52 -44
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +15 -18
- data/lib/flat_kit/jsonl/writer.rb +8 -10
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -18
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +8 -7
- data/lib/flat_kit/position.rb +3 -4
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
- data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
- data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
- data/lib/flat_kit/stat_type.rb +18 -13
- data/lib/flat_kit/stats.rb +12 -14
- data/lib/flat_kit/writer.rb +5 -6
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +13 -10
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +31 -26
- metadata +20 -158
- data/Rakefile +0 -21
- data/examples/stream-active-record-to-csv.rb +0 -42
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/field_type/test_boolean_type.rb +0 -65
- data/test/field_type/test_date_type.rb +0 -71
- data/test/field_type/test_float_type.rb +0 -56
- data/test/field_type/test_guess_type.rb +0 -14
- data/test/field_type/test_integer_type.rb +0 -52
- data/test/field_type/test_null_type.rb +0 -41
- data/test/field_type/test_string_type.rb +0 -18
- data/test/field_type/test_timestamp_type.rb +0 -108
- data/test/field_type/test_unknown_type.rb +0 -35
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -86
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/run +0 -23
- data/test/stat_type/test_nominal_stats.rb +0 -69
- data/test/stat_type/test_numerical_stats.rb +0 -118
- data/test/stat_type/test_ordinal_stats.rb +0 -92
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -89
- data/test/test_field_stats.rb +0 -134
- data/test/test_field_type.rb +0 -34
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -89
data/test/run
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
|
4
|
-
test_dir = __dir__
|
5
|
-
|
6
|
-
$: << File.join(File.dirname(test_dir), "lib")
|
7
|
-
$: << test_dir
|
8
|
-
|
9
|
-
require_relative './test_helper'
|
10
|
-
|
11
|
-
require 'find'
|
12
|
-
|
13
|
-
if ARGV.empty? then
|
14
|
-
Find.find(test_dir) do |path|
|
15
|
-
next unless File.file?(path) && File.basename(path) =~ /\Atest_.*\.rb\Z/
|
16
|
-
require path
|
17
|
-
end
|
18
|
-
else
|
19
|
-
ARGV.each do |f|
|
20
|
-
require File.expand_path(f)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
@@ -1,69 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestStatType
|
4
|
-
class TestNominalStats < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@unique_values = ('a'..'f').to_a
|
7
|
-
@values = Array.new.tap do |a|
|
8
|
-
@unique_values.each do |letter|
|
9
|
-
(Random.rand(42) + 1).times { a << letter }
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
@frequencies = @values.tally
|
14
|
-
|
15
|
-
@stats = ::FlatKit::StatType::NominalStats.new
|
16
|
-
@all_stats = ::FlatKit::StatType::NominalStats.new(collecting_frequencies: true)
|
17
|
-
|
18
|
-
@values.each do |v|
|
19
|
-
@stats.update(v)
|
20
|
-
@all_stats.update(v)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_count
|
25
|
-
assert_equal(@values.size, @stats.count)
|
26
|
-
assert_equal(@values.size, @all_stats.count)
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_does_not_collect_unique_count_by_default
|
30
|
-
assert_nil(@stats.unique_count)
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_does_not_collect_unique_values_by_default
|
34
|
-
assert_nil(@stats.unique_values)
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_does_not_collect_frequencies_by_default
|
38
|
-
assert_nil(@stats.frequencies)
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_unique_count
|
42
|
-
assert_equal(@unique_values.size, @all_stats.unique_count)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_unique_values
|
46
|
-
assert_equal(@unique_values.sort, @all_stats.unique_values.sort)
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_frequencies
|
50
|
-
assert_equal(@frequencies, @all_stats.frequencies)
|
51
|
-
end
|
52
|
-
|
53
|
-
def test_default_to_hash
|
54
|
-
expecting = { "count" => @values.size }
|
55
|
-
assert_equal(expecting, @stats.to_hash)
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_all_stats_hash
|
59
|
-
expecting = {
|
60
|
-
"count" => @values.size,
|
61
|
-
"unique_count" => @unique_values.size,
|
62
|
-
"unique_values" => @unique_values.sort,
|
63
|
-
"mode" => @frequencies.max_by { |k,v| v }.first
|
64
|
-
}
|
65
|
-
assert_equal(expecting, @all_stats.to_hash)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
@@ -1,118 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
|
4
|
-
module TestStatType
|
5
|
-
class TestNumericalStats < ::Minitest::Test
|
6
|
-
def setup
|
7
|
-
@stats = FlatKit::StatType::NumericalStats.new
|
8
|
-
@full_stats = FlatKit::StatType::NumericalStats.new
|
9
|
-
@all_stats = FlatKit::StatType::NumericalStats.new(collecting_frequencies: true)
|
10
|
-
[ 1, 2, 3].each { |i| @full_stats.update( i ) }
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_intialized_with_usable_values
|
14
|
-
assert_equal(0, @stats.count)
|
15
|
-
assert_equal(Float::INFINITY, @stats.min)
|
16
|
-
assert_equal(-Float::INFINITY, @stats.max)
|
17
|
-
assert_equal(0.0, @stats.sum)
|
18
|
-
assert_equal(0.0, @stats.rate)
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_calculates_mean
|
22
|
-
assert_equal(2.0, @full_stats.mean)
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_calculates_rate
|
26
|
-
assert_equal(0.5, @full_stats.rate)
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_tracks_the_maximum_value
|
30
|
-
assert_equal(3.0, @full_stats.max)
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_tracks_the_minimum_value
|
34
|
-
assert_equal(1.0, @full_stats.min)
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_tracks_the_count
|
38
|
-
assert_equal(3,@full_stats.count)
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_tracks_the_sum
|
42
|
-
assert_equal(6.0, @full_stats.sum)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_calculates_the_standard_deviation
|
46
|
-
assert_equal(1.0, @full_stats.stddev)
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_calculates_the_sum_of_squares
|
50
|
-
assert_equal(14, @full_stats.sumsq)
|
51
|
-
end
|
52
|
-
|
53
|
-
def test_converts_to_a_hash
|
54
|
-
h = @full_stats.to_hash
|
55
|
-
assert_equal(::FlatKit::StatType::NumericalStats.default_stats.size, h.size)
|
56
|
-
assert_equal(::FlatKit::StatType::NumericalStats.default_stats, h.keys.sort)
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_converts_to_a_limited_hash_if_given_arguments
|
60
|
-
h = @full_stats.to_hash( "min", "max", "mean" )
|
61
|
-
assert_equal(3, h.size)
|
62
|
-
assert_equal(%w[ max mean min], h.keys.sort)
|
63
|
-
|
64
|
-
h = @full_stats.to_hash( %w[ count rate ] )
|
65
|
-
assert_equal(2, h.size)
|
66
|
-
assert_equal(%w[ count rate ], h.keys.sort)
|
67
|
-
end
|
68
|
-
|
69
|
-
def test_raises_nomethoderror_if_an_invalid_stat_is_used
|
70
|
-
assert_raises(NoMethodError) { @full_stats.to_hash( "wibble" ) }
|
71
|
-
end
|
72
|
-
|
73
|
-
def test_converts_to_a_json_string
|
74
|
-
j = @full_stats.to_json
|
75
|
-
h = JSON.parse( j )
|
76
|
-
assert_equal(::FlatKit::StatType::NumericalStats.default_stats.size, h.size)
|
77
|
-
assert_equal(::FlatKit::StatType::NumericalStats.default_stats, h.keys.sort)
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_converts_to_a_limited_json_hash_if_given_arguments
|
81
|
-
j = @full_stats.to_json( "min", "max", "mean" )
|
82
|
-
h = JSON.parse( j )
|
83
|
-
assert_equal(3, h.size)
|
84
|
-
assert_equal(%w[ max mean min], h.keys.sort)
|
85
|
-
|
86
|
-
j = @full_stats.to_json( %w[ count rate ] )
|
87
|
-
h = JSON.parse( j )
|
88
|
-
assert_equal(2, h.size)
|
89
|
-
assert_equal(%w[ count rate ], h.keys.sort)
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_raises_nomethoderror_if_an_invalid_json_stat_is_used
|
93
|
-
assert_raises(NoMethodError) { @full_stats.to_json( "wibble" ) }
|
94
|
-
end
|
95
|
-
|
96
|
-
def test_collects_mode
|
97
|
-
values = Array.new.tap do |a|
|
98
|
-
100.times {
|
99
|
-
n = Random.rand(10)
|
100
|
-
a << n
|
101
|
-
@all_stats.update(n)
|
102
|
-
}
|
103
|
-
end
|
104
|
-
|
105
|
-
tally = values.tally
|
106
|
-
mode_value = tally.max_by { |v, count| count }.first
|
107
|
-
|
108
|
-
assert_equal(mode_value, @all_stats.mode)
|
109
|
-
end
|
110
|
-
|
111
|
-
def test_collecting_frequences_reports_extra_stat_names
|
112
|
-
stat_names = @all_stats.collected_stats
|
113
|
-
assert_includes(stat_names, "mode")
|
114
|
-
assert_includes(stat_names, "unique_count")
|
115
|
-
assert_includes(stat_names, "unique_values")
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestStatType
|
4
|
-
class TestOrdinalStats < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
today = Date.today
|
7
|
-
next_month = today >> 1
|
8
|
-
last_day_of_month = (next_month - 1).mday
|
9
|
-
|
10
|
-
@start_date = Date.new(today.year, today.month, 1)
|
11
|
-
@end_date = Date.new(today.year, today.month, last_day_of_month)
|
12
|
-
|
13
|
-
@unique_values = (@start_date..@end_date).to_a
|
14
|
-
@values = Array.new.tap do |a|
|
15
|
-
@unique_values.each do |date|
|
16
|
-
(Random.rand(42) + 1).times { a << date}
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
@frequencies = @values.tally
|
21
|
-
|
22
|
-
@stats = ::FlatKit::StatType::OrdinalStats.new
|
23
|
-
@all_stats = ::FlatKit::StatType::OrdinalStats.new(collecting_frequencies: true)
|
24
|
-
|
25
|
-
@values.each do |v|
|
26
|
-
@stats.update(v)
|
27
|
-
@all_stats.update(v)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_count
|
32
|
-
assert_equal(@values.size, @stats.count)
|
33
|
-
assert_equal(@values.size, @all_stats.count)
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_min
|
37
|
-
assert_equal(@values.min, @stats.min)
|
38
|
-
assert_equal(@values.min, @all_stats.min)
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_max
|
42
|
-
assert_equal(@values.max, @stats.max)
|
43
|
-
assert_equal(@values.max, @all_stats.max)
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_does_not_collect_unique_count_by_default
|
47
|
-
assert_nil(@stats.unique_count)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_does_not_collect_unique_values_by_default
|
51
|
-
assert_nil(@stats.unique_values)
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_does_not_collect_frequencies_by_default
|
55
|
-
assert_nil(@stats.frequencies)
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_unique_count
|
59
|
-
assert_equal(@unique_values.size, @all_stats.unique_count)
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_unique_values
|
63
|
-
assert_equal(@unique_values.sort, @all_stats.unique_values.sort)
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_frequencies
|
67
|
-
assert_equal(@frequencies, @all_stats.frequencies)
|
68
|
-
end
|
69
|
-
|
70
|
-
def test_default_to_hash
|
71
|
-
expecting = {
|
72
|
-
"count" => @values.size,
|
73
|
-
"max" => @values.max,
|
74
|
-
"min" => @values.min,
|
75
|
-
}
|
76
|
-
assert_equal(expecting, @stats.to_hash)
|
77
|
-
end
|
78
|
-
|
79
|
-
def test_all_stats_hash
|
80
|
-
expecting = {
|
81
|
-
"count" => @values.size,
|
82
|
-
"unique_count" => @unique_values.size,
|
83
|
-
"unique_values" => @unique_values.sort,
|
84
|
-
"mode" => @frequencies.max_by { |k,v| v }.first,
|
85
|
-
"max" => @values.max,
|
86
|
-
"min" => @values.min,
|
87
|
-
}
|
88
|
-
assert_equal(expecting, @all_stats.to_hash)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
data/test/test_conversions.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestConversions < ::Minitest::Test
|
4
|
-
def setup
|
5
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
6
|
-
@src_record = @one_row_dataset.records.first
|
7
|
-
@csv_row = @one_row_dataset.records_as_csv_rows.first
|
8
|
-
@compare_fields = @one_row_dataset.compare_fields
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_from_csv_to_json
|
12
|
-
xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
13
|
-
json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
|
14
|
-
|
15
|
-
assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
|
16
|
-
assert_equal(@one_row_dataset.records.first, json_record.to_hash)
|
17
|
-
assert_equal(xsv_record, json_record)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_from_json_to_csv
|
21
|
-
src_json = JSON.generate(@src_record)
|
22
|
-
json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
|
23
|
-
xsv_record = FlatKit::Xsv::Record.from_record(json_record)
|
24
|
-
|
25
|
-
assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
|
26
|
-
assert_equal(@one_row_dataset.records.first, json_record.to_hash)
|
27
|
-
assert_equal(xsv_record, json_record)
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_roundtrip_csv_json_csv
|
31
|
-
xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
32
|
-
json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
|
33
|
-
xsv2 = FlatKit::Xsv::Record.from_record(json_record)
|
34
|
-
|
35
|
-
assert_equal(xsv_record.to_s, xsv2.to_s)
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_roundtrip_json_csv_json
|
39
|
-
src_json = JSON.generate(@src_record)
|
40
|
-
json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
|
41
|
-
xsv_record = FlatKit::Xsv::Record.from_record(json_record)
|
42
|
-
json2 = FlatKit::Jsonl::Record.from_record(xsv_record)
|
43
|
-
assert_equal(src_json, json2.to_s)
|
44
|
-
end
|
45
|
-
end
|
data/test/test_event_emitter.rb
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestEventEmitter < ::Minitest::Test
|
4
|
-
class Pub
|
5
|
-
include ::FlatKit::EventEmitter
|
6
|
-
end
|
7
|
-
|
8
|
-
class Sub
|
9
|
-
attr_reader :name
|
10
|
-
attr_reader :data
|
11
|
-
attr_reader :meta
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@name = nil
|
15
|
-
@data = nil
|
16
|
-
@meta = nil
|
17
|
-
end
|
18
|
-
|
19
|
-
def [](key)
|
20
|
-
@meta[key]
|
21
|
-
end
|
22
|
-
|
23
|
-
def on_event(name:, data:, meta:)
|
24
|
-
@name = name
|
25
|
-
@data = data
|
26
|
-
@meta = meta
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
class BadSub; end
|
31
|
-
|
32
|
-
def setup
|
33
|
-
@emitter = Pub.new
|
34
|
-
@receiver = Sub.new
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_counts_no_listeners_before_adding_one
|
38
|
-
assert_equal(0, @emitter.count_listeners)
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_adds_listener
|
42
|
-
@emitter.add_listener(@receiver)
|
43
|
-
assert_equal(1, @emitter.count_listeners)
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_removes_listener
|
47
|
-
@emitter.add_listener(@receiver)
|
48
|
-
assert_equal(1, @emitter.count_listeners)
|
49
|
-
|
50
|
-
@emitter.remove_listener(@receiver)
|
51
|
-
assert_equal(0, @emitter.count_listeners)
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_only_adds_an_listener_once
|
55
|
-
@emitter.add_listener(@receiver)
|
56
|
-
assert_equal(1, @emitter.count_listeners)
|
57
|
-
|
58
|
-
@emitter.add_listener(@receiver)
|
59
|
-
assert_equal(1, @emitter.count_listeners)
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_verifies_reciever_responds_t_observed
|
63
|
-
assert_raises(::NoMethodError) { @emitter.add_listener(BadSub.new) }
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_listeners_get_notified
|
67
|
-
@receiver_2 = Sub.new
|
68
|
-
@emitter.add_listener(@receiver)
|
69
|
-
@emitter.add_listener(@receiver_2)
|
70
|
-
|
71
|
-
meta = {
|
72
|
-
foo: "foo",
|
73
|
-
bar: 42,
|
74
|
-
}
|
75
|
-
@emitter.notify_listeners(name: :notification, data: "DATA!", meta: meta)
|
76
|
-
|
77
|
-
assert_equal(:notification, @receiver.name)
|
78
|
-
assert_equal(:notification, @receiver_2.name)
|
79
|
-
|
80
|
-
assert_equal("DATA!", @receiver.data)
|
81
|
-
assert_equal("DATA!", @receiver_2.data)
|
82
|
-
|
83
|
-
assert_equal("foo", @receiver[:foo])
|
84
|
-
assert_equal("foo", @receiver_2[:foo])
|
85
|
-
|
86
|
-
assert_equal(42, @receiver[:bar])
|
87
|
-
assert_equal(42, @receiver_2[:bar])
|
88
|
-
end
|
89
|
-
end
|
data/test/test_field_stats.rb
DELETED
@@ -1,134 +0,0 @@
|
|
1
|
-
require_relative './test_helper'
|
2
|
-
|
3
|
-
require 'faker'
|
4
|
-
|
5
|
-
class TestFieldStats < ::Minitest::Test
|
6
|
-
# returns [FieldStats, Array] where the array is the original data
|
7
|
-
def generate_data_with(count: 100, stats: ::FlatKit::FieldStats.new(name: "data"), &block)
|
8
|
-
list = Array.new.tap do |a|
|
9
|
-
count.times do
|
10
|
-
n = block.call
|
11
|
-
stats.update(n)
|
12
|
-
a << n
|
13
|
-
end
|
14
|
-
end
|
15
|
-
[stats, list]
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_raises_error_on_invalid_stats
|
19
|
-
assert_raises(ArgumentError) { ::FlatKit::FieldStats.new(name: "test", stats_to_collect: :whatever) }
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_collects_numeric_default_stats
|
23
|
-
field_stats, number_data = generate_data_with { Faker::Number.within(range: 1.0..100.0) }
|
24
|
-
|
25
|
-
null_count = 5
|
26
|
-
null_count.times {
|
27
|
-
field_stats.update(nil)
|
28
|
-
}
|
29
|
-
|
30
|
-
avg = number_data.sum / number_data.size
|
31
|
-
min = number_data.min
|
32
|
-
max = number_data.max
|
33
|
-
sum = number_data.sum
|
34
|
-
|
35
|
-
refute(field_stats.field_type_determined?)
|
36
|
-
|
37
|
-
assert_equal(null_count, field_stats.null_count)
|
38
|
-
assert_equal(number_data.size, field_stats.count)
|
39
|
-
|
40
|
-
assert(field_stats.field_type_determined?)
|
41
|
-
|
42
|
-
assert_in_epsilon(avg, field_stats.mean)
|
43
|
-
assert_equal(min, field_stats.min)
|
44
|
-
assert_equal(max, field_stats.max)
|
45
|
-
assert_in_epsilon(sum, field_stats.sum)
|
46
|
-
expected_percent = (null_count.to_f / (null_count + number_data.size)) * 100.0
|
47
|
-
assert_in_epsilon(expected_percent, field_stats.null_percent)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_collect_numeric_cardinality_stats
|
51
|
-
field_stats = ::FlatKit::FieldStats.new(name: "number-cardinality",
|
52
|
-
stats_to_collect: ::FlatKit::FieldStats::ALL_STATS)
|
53
|
-
field_stats, number_data = generate_data_with(stats: field_stats) {
|
54
|
-
Faker::Number.within(range:1..25)
|
55
|
-
}
|
56
|
-
|
57
|
-
avg = number_data.sum.to_f / number_data.size
|
58
|
-
min = number_data.min
|
59
|
-
max = number_data.max
|
60
|
-
|
61
|
-
assert(field_stats.collecting_frequencies?)
|
62
|
-
refute(field_stats.field_type_determined?)
|
63
|
-
|
64
|
-
assert_equal(number_data.size, field_stats.count)
|
65
|
-
|
66
|
-
assert(field_stats.field_type_determined?)
|
67
|
-
|
68
|
-
assert_in_epsilon(avg, field_stats.mean)
|
69
|
-
assert_equal(min, field_stats.min)
|
70
|
-
assert_equal(max, field_stats.max)
|
71
|
-
|
72
|
-
assert_equal(number_data.tally.keys.size, field_stats.unique_count)
|
73
|
-
assert_equal(number_data.tally.keys.sort, field_stats.unique_values.sort)
|
74
|
-
assert_equal(number_data.tally, field_stats.frequencies)
|
75
|
-
|
76
|
-
mode = number_data.tally.max_by{ |k,v| v }.first
|
77
|
-
assert_equal(mode, field_stats.mode)
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_unknown_type_stats
|
81
|
-
field_stats = ::FlatKit::FieldStats.new(name: "numeric-with-unknown")
|
82
|
-
field_stats, number_data = generate_data_with { Faker::Number.within(range: 1.0..100.0) }
|
83
|
-
|
84
|
-
unknown_count = 20
|
85
|
-
unknown_count.times {
|
86
|
-
field_stats.update("unknown")
|
87
|
-
}
|
88
|
-
|
89
|
-
refute(field_stats.field_type_determined?)
|
90
|
-
|
91
|
-
assert_equal(unknown_count, field_stats.unknown_count)
|
92
|
-
assert_equal(unknown_count + number_data.size, field_stats.total_count)
|
93
|
-
|
94
|
-
expected_percent = (unknown_count.to_f / (unknown_count + number_data.size)) * 100.0
|
95
|
-
|
96
|
-
assert_in_epsilon(expected_percent, field_stats.unknown_percent)
|
97
|
-
end
|
98
|
-
|
99
|
-
def test_resolves_type_automatically
|
100
|
-
field_stats = ::FlatKit::FieldStats.new(name: "numeric-autoresolve",guess_threshold: 101)
|
101
|
-
field_stats, _ = generate_data_with(stats: field_stats) { Faker::Number.within(range: 1.0..100.0) }
|
102
|
-
|
103
|
-
refute(field_stats.field_type_determined?)
|
104
|
-
field_stats, _ = generate_data_with(stats: field_stats) { Faker::Number.within(range: 200.0..300.0) }
|
105
|
-
assert(field_stats.field_type_determined?)
|
106
|
-
end
|
107
|
-
|
108
|
-
def test_resolves_integer_appropriately_with_mixed_data
|
109
|
-
field_stats = ::FlatKit::FieldStats.new(name: "numeric-integer",guess_threshold: 100)
|
110
|
-
field_stats, _ = generate_data_with(count: 40, stats: field_stats) { Faker::Number.within(range: 0..1).to_s }
|
111
|
-
field_stats, _ = generate_data_with(count: 70, stats: field_stats) { Faker::Number.within(range: 0..200).to_s }
|
112
|
-
|
113
|
-
assert_equal(::FlatKit::FieldType::IntegerType, field_stats.field_type)
|
114
|
-
|
115
|
-
end
|
116
|
-
|
117
|
-
def test_resolves_boolean_appropriately_with_mixed_data
|
118
|
-
field_stats = ::FlatKit::FieldStats.new(name: "numeric-integer",guess_threshold: 100)
|
119
|
-
field_stats, _ = generate_data_with(count: 70, stats: field_stats) { Faker::Boolean.boolean.to_s }
|
120
|
-
field_stats, _ = generate_data_with(count: 40, stats: field_stats) { Faker::Number.within(range: 0..200).to_s }
|
121
|
-
assert_equal(::FlatKit::FieldType::BooleanType, field_stats.field_type)
|
122
|
-
end
|
123
|
-
|
124
|
-
def test_resolves_string_appropriately_with_mixed_data
|
125
|
-
field_stats = ::FlatKit::FieldStats.new(name: "string",guess_threshold: 100)
|
126
|
-
field_stats, _ = generate_data_with(count: 61, stats: field_stats) { Faker::Color.name.to_s }
|
127
|
-
field_stats, _ = generate_data_with(count: 59, stats: field_stats) { Faker::Number.within(range: 0..200).to_s }
|
128
|
-
assert_equal(::FlatKit::FieldType::StringType, field_stats.field_type)
|
129
|
-
|
130
|
-
assert_equal(120, field_stats.count)
|
131
|
-
assert_equal(0, field_stats.unknown_count)
|
132
|
-
assert_equal(0, field_stats.null_count)
|
133
|
-
end
|
134
|
-
end
|
data/test/test_field_type.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
require_relative './test_helper'
|
2
|
-
|
3
|
-
module TestFieldType
|
4
|
-
class TestFieldType < ::Minitest::Test
|
5
|
-
|
6
|
-
def test_weight_raises_exception
|
7
|
-
assert_raises(NotImplementedError) { ::FlatKit::FieldType.weight }
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_best_guesses
|
11
|
-
guesses = {
|
12
|
-
"t" => ::FlatKit::FieldType::BooleanType,
|
13
|
-
"1" => ::FlatKit::FieldType::BooleanType,
|
14
|
-
"0" => ::FlatKit::FieldType::BooleanType,
|
15
|
-
"n" => ::FlatKit::FieldType::BooleanType,
|
16
|
-
"42" => ::FlatKit::FieldType::IntegerType,
|
17
|
-
"nil" => ::FlatKit::FieldType::NullType,
|
18
|
-
"n/a" => ::FlatKit::FieldType::UnknownType,
|
19
|
-
"foo" => ::FlatKit::FieldType::StringType,
|
20
|
-
"12.3" => ::FlatKit::FieldType::FloatType,
|
21
|
-
"2021-02-26" => ::FlatKit::FieldType::DateType,
|
22
|
-
"2020-03-03T12:34:56Z" => ::FlatKit::FieldType::TimestampType,
|
23
|
-
}
|
24
|
-
|
25
|
-
guesses.each do |test, expected|
|
26
|
-
assert_equal(expected, ::FlatKit::FieldType.best_guess(test), "Expected '#{test}' to be #{expected}")
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_children_exist
|
31
|
-
assert_equal(9,::FlatKit::FieldType.children.size)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
data/test/test_format.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require_relative 'test_helper'
|
2
|
-
|
3
|
-
class TestFormat < ::Minitest::Test
|
4
|
-
def test_finds_jsonl_format
|
5
|
-
klass = ::FlatKit::Format.for("data.json.gz")
|
6
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_finds_xsv_format
|
10
|
-
klass = ::FlatKit::Format.for("data.csv.gz")
|
11
|
-
assert_equal(::FlatKit::Xsv::Format, klass)
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_finds_jsonl_format_for_full_path
|
15
|
-
klass = ::FlatKit::Format.for("tmp/sorted/foo.jsonl")
|
16
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_finds_jsonl_format_with_fallback
|
20
|
-
path = "tmp/sorted/foo.json"
|
21
|
-
klass = ::FlatKit::Format.for_with_fallback!(path: path, fallback: "auto")
|
22
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
23
|
-
end
|
24
|
-
end
|
data/test/test_helper.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
SimpleCov.start if ENV['COVERAGE']
|
3
|
-
|
4
|
-
require 'byebug'
|
5
|
-
|
6
|
-
require 'minitest/autorun'
|
7
|
-
require 'minitest/focus'
|
8
|
-
require 'minitest/pride'
|
9
|
-
|
10
|
-
module TestHelper
|
11
|
-
def scratch_dir
|
12
|
-
p = Pathname.new(__FILE__).parent.parent.join('tmp/testing_scratch')
|
13
|
-
p.mkpath
|
14
|
-
p
|
15
|
-
end
|
16
|
-
|
17
|
-
def generate_slug(length: 10)
|
18
|
-
SecureRandom.alphanumeric(10)
|
19
|
-
end
|
20
|
-
|
21
|
-
def scratch_file(prefix: "test_", slug: generate_slug, extension: ".jsonl")
|
22
|
-
scratch_dir.join("#{prefix}#{slug}#{extension}")
|
23
|
-
end
|
24
|
-
end
|
25
|
-
require_relative '../lib/flat_kit'
|
26
|
-
require_relative './device_dataset'
|
data/test/test_merge.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestMerge < ::Minitest::Test
|
4
|
-
|
5
|
-
def test_can_use_use_dash_as_output
|
6
|
-
merge = ::FlatKit::Merge.new(inputs: [], input_fallback: "json",
|
7
|
-
output: "-", output_fallback: "json", compare_fields: [])
|
8
|
-
assert_match(/STDOUT/, merge.writer.output.name)
|
9
|
-
assert_instance_of(::FlatKit::Output::IO, merge.writer.output)
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_can_use_a_text_path_as_output
|
13
|
-
test_path = "tmp/test_can_use_a_text_path_as_output.json"
|
14
|
-
begin
|
15
|
-
merge = ::FlatKit::Merge.new(output: test_path, inputs: [], input_fallback: "json", compare_fields: [])
|
16
|
-
assert_equal(test_path, merge.writer.output.name)
|
17
|
-
assert_instance_of(::FlatKit::Output::File, merge.writer.output)
|
18
|
-
merge.writer.close
|
19
|
-
ensure
|
20
|
-
File.unlink(test_path) if File.exist?(test_path)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_can_use_a_pathname_as_output
|
25
|
-
test_path = Pathname.new("tmp/test_can_use_a_pathname_as_output.json")
|
26
|
-
begin
|
27
|
-
merge = ::FlatKit::Merge.new(output: test_path, inputs: [], input_fallback: "json", compare_fields: [])
|
28
|
-
assert_equal(test_path.to_s, merge.writer.output.name)
|
29
|
-
assert_instance_of(::FlatKit::Output::File, merge.writer.output)
|
30
|
-
merge.writer.close
|
31
|
-
ensure
|
32
|
-
test_path.unlink if test_path.exist?
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_raises_error_if_unable_to_parse_output
|
37
|
-
test_path = Object.new
|
38
|
-
assert_raises(FlatKit::Error) { ::FlatKit::Merge.new(output: test_path, inputs: [], compare_fields: []) }
|
39
|
-
end
|
40
|
-
end
|