flat_kit 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +9 -0
- data/Manifest.txt +3 -42
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +46 -32
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +41 -39
- data/lib/flat_kit/command.rb +10 -11
- data/lib/flat_kit/descendant_tracker.rb +9 -6
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +5 -2
- data/lib/flat_kit/field_stats.rb +31 -26
- data/lib/flat_kit/field_type/boolean_type.rb +9 -5
- data/lib/flat_kit/field_type/date_type.rb +19 -17
- data/lib/flat_kit/field_type/float_type.rb +15 -9
- data/lib/flat_kit/field_type/guess_type.rb +9 -6
- data/lib/flat_kit/field_type/integer_type.rb +6 -4
- data/lib/flat_kit/field_type/null_type.rb +5 -1
- data/lib/flat_kit/field_type/string_type.rb +8 -6
- data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
- data/lib/flat_kit/field_type/unknown_type.rb +12 -8
- data/lib/flat_kit/field_type.rb +52 -44
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +15 -18
- data/lib/flat_kit/jsonl/writer.rb +8 -10
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -18
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +8 -7
- data/lib/flat_kit/position.rb +3 -4
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
- data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
- data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
- data/lib/flat_kit/stat_type.rb +18 -13
- data/lib/flat_kit/stats.rb +12 -14
- data/lib/flat_kit/writer.rb +5 -6
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +13 -10
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +31 -26
- metadata +20 -158
- data/Rakefile +0 -21
- data/examples/stream-active-record-to-csv.rb +0 -42
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/field_type/test_boolean_type.rb +0 -65
- data/test/field_type/test_date_type.rb +0 -71
- data/test/field_type/test_float_type.rb +0 -56
- data/test/field_type/test_guess_type.rb +0 -14
- data/test/field_type/test_integer_type.rb +0 -52
- data/test/field_type/test_null_type.rb +0 -41
- data/test/field_type/test_string_type.rb +0 -18
- data/test/field_type/test_timestamp_type.rb +0 -108
- data/test/field_type/test_unknown_type.rb +0 -35
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -86
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/run +0 -23
- data/test/stat_type/test_nominal_stats.rb +0 -69
- data/test/stat_type/test_numerical_stats.rb +0 -118
- data/test/stat_type/test_ordinal_stats.rb +0 -92
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -89
- data/test/test_field_stats.rb +0 -134
- data/test/test_field_type.rb +0 -34
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -89
data/test/run
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
|
4
|
-
test_dir = __dir__
|
5
|
-
|
6
|
-
$: << File.join(File.dirname(test_dir), "lib")
|
7
|
-
$: << test_dir
|
8
|
-
|
9
|
-
require_relative './test_helper'
|
10
|
-
|
11
|
-
require 'find'
|
12
|
-
|
13
|
-
if ARGV.empty? then
|
14
|
-
Find.find(test_dir) do |path|
|
15
|
-
next unless File.file?(path) && File.basename(path) =~ /\Atest_.*\.rb\Z/
|
16
|
-
require path
|
17
|
-
end
|
18
|
-
else
|
19
|
-
ARGV.each do |f|
|
20
|
-
require File.expand_path(f)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
@@ -1,69 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestStatType
|
4
|
-
class TestNominalStats < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@unique_values = ('a'..'f').to_a
|
7
|
-
@values = Array.new.tap do |a|
|
8
|
-
@unique_values.each do |letter|
|
9
|
-
(Random.rand(42) + 1).times { a << letter }
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
@frequencies = @values.tally
|
14
|
-
|
15
|
-
@stats = ::FlatKit::StatType::NominalStats.new
|
16
|
-
@all_stats = ::FlatKit::StatType::NominalStats.new(collecting_frequencies: true)
|
17
|
-
|
18
|
-
@values.each do |v|
|
19
|
-
@stats.update(v)
|
20
|
-
@all_stats.update(v)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_count
|
25
|
-
assert_equal(@values.size, @stats.count)
|
26
|
-
assert_equal(@values.size, @all_stats.count)
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_does_not_collect_unique_count_by_default
|
30
|
-
assert_nil(@stats.unique_count)
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_does_not_collect_unique_values_by_default
|
34
|
-
assert_nil(@stats.unique_values)
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_does_not_collect_frequencies_by_default
|
38
|
-
assert_nil(@stats.frequencies)
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_unique_count
|
42
|
-
assert_equal(@unique_values.size, @all_stats.unique_count)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_unique_values
|
46
|
-
assert_equal(@unique_values.sort, @all_stats.unique_values.sort)
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_frequencies
|
50
|
-
assert_equal(@frequencies, @all_stats.frequencies)
|
51
|
-
end
|
52
|
-
|
53
|
-
def test_default_to_hash
|
54
|
-
expecting = { "count" => @values.size }
|
55
|
-
assert_equal(expecting, @stats.to_hash)
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_all_stats_hash
|
59
|
-
expecting = {
|
60
|
-
"count" => @values.size,
|
61
|
-
"unique_count" => @unique_values.size,
|
62
|
-
"unique_values" => @unique_values.sort,
|
63
|
-
"mode" => @frequencies.max_by { |k,v| v }.first
|
64
|
-
}
|
65
|
-
assert_equal(expecting, @all_stats.to_hash)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
@@ -1,118 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
|
4
|
-
module TestStatType
|
5
|
-
class TestNumericalStats < ::Minitest::Test
|
6
|
-
def setup
|
7
|
-
@stats = FlatKit::StatType::NumericalStats.new
|
8
|
-
@full_stats = FlatKit::StatType::NumericalStats.new
|
9
|
-
@all_stats = FlatKit::StatType::NumericalStats.new(collecting_frequencies: true)
|
10
|
-
[ 1, 2, 3].each { |i| @full_stats.update( i ) }
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_intialized_with_usable_values
|
14
|
-
assert_equal(0, @stats.count)
|
15
|
-
assert_equal(Float::INFINITY, @stats.min)
|
16
|
-
assert_equal(-Float::INFINITY, @stats.max)
|
17
|
-
assert_equal(0.0, @stats.sum)
|
18
|
-
assert_equal(0.0, @stats.rate)
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_calculates_mean
|
22
|
-
assert_equal(2.0, @full_stats.mean)
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_calculates_rate
|
26
|
-
assert_equal(0.5, @full_stats.rate)
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_tracks_the_maximum_value
|
30
|
-
assert_equal(3.0, @full_stats.max)
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_tracks_the_minimum_value
|
34
|
-
assert_equal(1.0, @full_stats.min)
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_tracks_the_count
|
38
|
-
assert_equal(3,@full_stats.count)
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_tracks_the_sum
|
42
|
-
assert_equal(6.0, @full_stats.sum)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_calculates_the_standard_deviation
|
46
|
-
assert_equal(1.0, @full_stats.stddev)
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_calculates_the_sum_of_squares
|
50
|
-
assert_equal(14, @full_stats.sumsq)
|
51
|
-
end
|
52
|
-
|
53
|
-
def test_converts_to_a_hash
|
54
|
-
h = @full_stats.to_hash
|
55
|
-
assert_equal(::FlatKit::StatType::NumericalStats.default_stats.size, h.size)
|
56
|
-
assert_equal(::FlatKit::StatType::NumericalStats.default_stats, h.keys.sort)
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_converts_to_a_limited_hash_if_given_arguments
|
60
|
-
h = @full_stats.to_hash( "min", "max", "mean" )
|
61
|
-
assert_equal(3, h.size)
|
62
|
-
assert_equal(%w[ max mean min], h.keys.sort)
|
63
|
-
|
64
|
-
h = @full_stats.to_hash( %w[ count rate ] )
|
65
|
-
assert_equal(2, h.size)
|
66
|
-
assert_equal(%w[ count rate ], h.keys.sort)
|
67
|
-
end
|
68
|
-
|
69
|
-
def test_raises_nomethoderror_if_an_invalid_stat_is_used
|
70
|
-
assert_raises(NoMethodError) { @full_stats.to_hash( "wibble" ) }
|
71
|
-
end
|
72
|
-
|
73
|
-
def test_converts_to_a_json_string
|
74
|
-
j = @full_stats.to_json
|
75
|
-
h = JSON.parse( j )
|
76
|
-
assert_equal(::FlatKit::StatType::NumericalStats.default_stats.size, h.size)
|
77
|
-
assert_equal(::FlatKit::StatType::NumericalStats.default_stats, h.keys.sort)
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_converts_to_a_limited_json_hash_if_given_arguments
|
81
|
-
j = @full_stats.to_json( "min", "max", "mean" )
|
82
|
-
h = JSON.parse( j )
|
83
|
-
assert_equal(3, h.size)
|
84
|
-
assert_equal(%w[ max mean min], h.keys.sort)
|
85
|
-
|
86
|
-
j = @full_stats.to_json( %w[ count rate ] )
|
87
|
-
h = JSON.parse( j )
|
88
|
-
assert_equal(2, h.size)
|
89
|
-
assert_equal(%w[ count rate ], h.keys.sort)
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_raises_nomethoderror_if_an_invalid_json_stat_is_used
|
93
|
-
assert_raises(NoMethodError) { @full_stats.to_json( "wibble" ) }
|
94
|
-
end
|
95
|
-
|
96
|
-
def test_collects_mode
|
97
|
-
values = Array.new.tap do |a|
|
98
|
-
100.times {
|
99
|
-
n = Random.rand(10)
|
100
|
-
a << n
|
101
|
-
@all_stats.update(n)
|
102
|
-
}
|
103
|
-
end
|
104
|
-
|
105
|
-
tally = values.tally
|
106
|
-
mode_value = tally.max_by { |v, count| count }.first
|
107
|
-
|
108
|
-
assert_equal(mode_value, @all_stats.mode)
|
109
|
-
end
|
110
|
-
|
111
|
-
def test_collecting_frequences_reports_extra_stat_names
|
112
|
-
stat_names = @all_stats.collected_stats
|
113
|
-
assert_includes(stat_names, "mode")
|
114
|
-
assert_includes(stat_names, "unique_count")
|
115
|
-
assert_includes(stat_names, "unique_values")
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestStatType
|
4
|
-
class TestOrdinalStats < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
today = Date.today
|
7
|
-
next_month = today >> 1
|
8
|
-
last_day_of_month = (next_month - 1).mday
|
9
|
-
|
10
|
-
@start_date = Date.new(today.year, today.month, 1)
|
11
|
-
@end_date = Date.new(today.year, today.month, last_day_of_month)
|
12
|
-
|
13
|
-
@unique_values = (@start_date..@end_date).to_a
|
14
|
-
@values = Array.new.tap do |a|
|
15
|
-
@unique_values.each do |date|
|
16
|
-
(Random.rand(42) + 1).times { a << date}
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
@frequencies = @values.tally
|
21
|
-
|
22
|
-
@stats = ::FlatKit::StatType::OrdinalStats.new
|
23
|
-
@all_stats = ::FlatKit::StatType::OrdinalStats.new(collecting_frequencies: true)
|
24
|
-
|
25
|
-
@values.each do |v|
|
26
|
-
@stats.update(v)
|
27
|
-
@all_stats.update(v)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_count
|
32
|
-
assert_equal(@values.size, @stats.count)
|
33
|
-
assert_equal(@values.size, @all_stats.count)
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_min
|
37
|
-
assert_equal(@values.min, @stats.min)
|
38
|
-
assert_equal(@values.min, @all_stats.min)
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_max
|
42
|
-
assert_equal(@values.max, @stats.max)
|
43
|
-
assert_equal(@values.max, @all_stats.max)
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_does_not_collect_unique_count_by_default
|
47
|
-
assert_nil(@stats.unique_count)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_does_not_collect_unique_values_by_default
|
51
|
-
assert_nil(@stats.unique_values)
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_does_not_collect_frequencies_by_default
|
55
|
-
assert_nil(@stats.frequencies)
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_unique_count
|
59
|
-
assert_equal(@unique_values.size, @all_stats.unique_count)
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_unique_values
|
63
|
-
assert_equal(@unique_values.sort, @all_stats.unique_values.sort)
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_frequencies
|
67
|
-
assert_equal(@frequencies, @all_stats.frequencies)
|
68
|
-
end
|
69
|
-
|
70
|
-
def test_default_to_hash
|
71
|
-
expecting = {
|
72
|
-
"count" => @values.size,
|
73
|
-
"max" => @values.max,
|
74
|
-
"min" => @values.min,
|
75
|
-
}
|
76
|
-
assert_equal(expecting, @stats.to_hash)
|
77
|
-
end
|
78
|
-
|
79
|
-
def test_all_stats_hash
|
80
|
-
expecting = {
|
81
|
-
"count" => @values.size,
|
82
|
-
"unique_count" => @unique_values.size,
|
83
|
-
"unique_values" => @unique_values.sort,
|
84
|
-
"mode" => @frequencies.max_by { |k,v| v }.first,
|
85
|
-
"max" => @values.max,
|
86
|
-
"min" => @values.min,
|
87
|
-
}
|
88
|
-
assert_equal(expecting, @all_stats.to_hash)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
data/test/test_conversions.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestConversions < ::Minitest::Test
|
4
|
-
def setup
|
5
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
6
|
-
@src_record = @one_row_dataset.records.first
|
7
|
-
@csv_row = @one_row_dataset.records_as_csv_rows.first
|
8
|
-
@compare_fields = @one_row_dataset.compare_fields
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_from_csv_to_json
|
12
|
-
xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
13
|
-
json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
|
14
|
-
|
15
|
-
assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
|
16
|
-
assert_equal(@one_row_dataset.records.first, json_record.to_hash)
|
17
|
-
assert_equal(xsv_record, json_record)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_from_json_to_csv
|
21
|
-
src_json = JSON.generate(@src_record)
|
22
|
-
json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
|
23
|
-
xsv_record = FlatKit::Xsv::Record.from_record(json_record)
|
24
|
-
|
25
|
-
assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
|
26
|
-
assert_equal(@one_row_dataset.records.first, json_record.to_hash)
|
27
|
-
assert_equal(xsv_record, json_record)
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_roundtrip_csv_json_csv
|
31
|
-
xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
32
|
-
json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
|
33
|
-
xsv2 = FlatKit::Xsv::Record.from_record(json_record)
|
34
|
-
|
35
|
-
assert_equal(xsv_record.to_s, xsv2.to_s)
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_roundtrip_json_csv_json
|
39
|
-
src_json = JSON.generate(@src_record)
|
40
|
-
json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
|
41
|
-
xsv_record = FlatKit::Xsv::Record.from_record(json_record)
|
42
|
-
json2 = FlatKit::Jsonl::Record.from_record(xsv_record)
|
43
|
-
assert_equal(src_json, json2.to_s)
|
44
|
-
end
|
45
|
-
end
|
data/test/test_event_emitter.rb
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestEventEmitter < ::Minitest::Test
|
4
|
-
class Pub
|
5
|
-
include ::FlatKit::EventEmitter
|
6
|
-
end
|
7
|
-
|
8
|
-
class Sub
|
9
|
-
attr_reader :name
|
10
|
-
attr_reader :data
|
11
|
-
attr_reader :meta
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@name = nil
|
15
|
-
@data = nil
|
16
|
-
@meta = nil
|
17
|
-
end
|
18
|
-
|
19
|
-
def [](key)
|
20
|
-
@meta[key]
|
21
|
-
end
|
22
|
-
|
23
|
-
def on_event(name:, data:, meta:)
|
24
|
-
@name = name
|
25
|
-
@data = data
|
26
|
-
@meta = meta
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
class BadSub; end
|
31
|
-
|
32
|
-
def setup
|
33
|
-
@emitter = Pub.new
|
34
|
-
@receiver = Sub.new
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_counts_no_listeners_before_adding_one
|
38
|
-
assert_equal(0, @emitter.count_listeners)
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_adds_listener
|
42
|
-
@emitter.add_listener(@receiver)
|
43
|
-
assert_equal(1, @emitter.count_listeners)
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_removes_listener
|
47
|
-
@emitter.add_listener(@receiver)
|
48
|
-
assert_equal(1, @emitter.count_listeners)
|
49
|
-
|
50
|
-
@emitter.remove_listener(@receiver)
|
51
|
-
assert_equal(0, @emitter.count_listeners)
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_only_adds_an_listener_once
|
55
|
-
@emitter.add_listener(@receiver)
|
56
|
-
assert_equal(1, @emitter.count_listeners)
|
57
|
-
|
58
|
-
@emitter.add_listener(@receiver)
|
59
|
-
assert_equal(1, @emitter.count_listeners)
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_verifies_reciever_responds_t_observed
|
63
|
-
assert_raises(::NoMethodError) { @emitter.add_listener(BadSub.new) }
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_listeners_get_notified
|
67
|
-
@receiver_2 = Sub.new
|
68
|
-
@emitter.add_listener(@receiver)
|
69
|
-
@emitter.add_listener(@receiver_2)
|
70
|
-
|
71
|
-
meta = {
|
72
|
-
foo: "foo",
|
73
|
-
bar: 42,
|
74
|
-
}
|
75
|
-
@emitter.notify_listeners(name: :notification, data: "DATA!", meta: meta)
|
76
|
-
|
77
|
-
assert_equal(:notification, @receiver.name)
|
78
|
-
assert_equal(:notification, @receiver_2.name)
|
79
|
-
|
80
|
-
assert_equal("DATA!", @receiver.data)
|
81
|
-
assert_equal("DATA!", @receiver_2.data)
|
82
|
-
|
83
|
-
assert_equal("foo", @receiver[:foo])
|
84
|
-
assert_equal("foo", @receiver_2[:foo])
|
85
|
-
|
86
|
-
assert_equal(42, @receiver[:bar])
|
87
|
-
assert_equal(42, @receiver_2[:bar])
|
88
|
-
end
|
89
|
-
end
|
data/test/test_field_stats.rb
DELETED
@@ -1,134 +0,0 @@
|
|
1
|
-
require_relative './test_helper'
|
2
|
-
|
3
|
-
require 'faker'
|
4
|
-
|
5
|
-
class TestFieldStats < ::Minitest::Test
|
6
|
-
# returns [FieldStats, Array] where the array is the original data
|
7
|
-
def generate_data_with(count: 100, stats: ::FlatKit::FieldStats.new(name: "data"), &block)
|
8
|
-
list = Array.new.tap do |a|
|
9
|
-
count.times do
|
10
|
-
n = block.call
|
11
|
-
stats.update(n)
|
12
|
-
a << n
|
13
|
-
end
|
14
|
-
end
|
15
|
-
[stats, list]
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_raises_error_on_invalid_stats
|
19
|
-
assert_raises(ArgumentError) { ::FlatKit::FieldStats.new(name: "test", stats_to_collect: :whatever) }
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_collects_numeric_default_stats
|
23
|
-
field_stats, number_data = generate_data_with { Faker::Number.within(range: 1.0..100.0) }
|
24
|
-
|
25
|
-
null_count = 5
|
26
|
-
null_count.times {
|
27
|
-
field_stats.update(nil)
|
28
|
-
}
|
29
|
-
|
30
|
-
avg = number_data.sum / number_data.size
|
31
|
-
min = number_data.min
|
32
|
-
max = number_data.max
|
33
|
-
sum = number_data.sum
|
34
|
-
|
35
|
-
refute(field_stats.field_type_determined?)
|
36
|
-
|
37
|
-
assert_equal(null_count, field_stats.null_count)
|
38
|
-
assert_equal(number_data.size, field_stats.count)
|
39
|
-
|
40
|
-
assert(field_stats.field_type_determined?)
|
41
|
-
|
42
|
-
assert_in_epsilon(avg, field_stats.mean)
|
43
|
-
assert_equal(min, field_stats.min)
|
44
|
-
assert_equal(max, field_stats.max)
|
45
|
-
assert_in_epsilon(sum, field_stats.sum)
|
46
|
-
expected_percent = (null_count.to_f / (null_count + number_data.size)) * 100.0
|
47
|
-
assert_in_epsilon(expected_percent, field_stats.null_percent)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_collect_numeric_cardinality_stats
|
51
|
-
field_stats = ::FlatKit::FieldStats.new(name: "number-cardinality",
|
52
|
-
stats_to_collect: ::FlatKit::FieldStats::ALL_STATS)
|
53
|
-
field_stats, number_data = generate_data_with(stats: field_stats) {
|
54
|
-
Faker::Number.within(range:1..25)
|
55
|
-
}
|
56
|
-
|
57
|
-
avg = number_data.sum.to_f / number_data.size
|
58
|
-
min = number_data.min
|
59
|
-
max = number_data.max
|
60
|
-
|
61
|
-
assert(field_stats.collecting_frequencies?)
|
62
|
-
refute(field_stats.field_type_determined?)
|
63
|
-
|
64
|
-
assert_equal(number_data.size, field_stats.count)
|
65
|
-
|
66
|
-
assert(field_stats.field_type_determined?)
|
67
|
-
|
68
|
-
assert_in_epsilon(avg, field_stats.mean)
|
69
|
-
assert_equal(min, field_stats.min)
|
70
|
-
assert_equal(max, field_stats.max)
|
71
|
-
|
72
|
-
assert_equal(number_data.tally.keys.size, field_stats.unique_count)
|
73
|
-
assert_equal(number_data.tally.keys.sort, field_stats.unique_values.sort)
|
74
|
-
assert_equal(number_data.tally, field_stats.frequencies)
|
75
|
-
|
76
|
-
mode = number_data.tally.max_by{ |k,v| v }.first
|
77
|
-
assert_equal(mode, field_stats.mode)
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_unknown_type_stats
|
81
|
-
field_stats = ::FlatKit::FieldStats.new(name: "numeric-with-unknown")
|
82
|
-
field_stats, number_data = generate_data_with { Faker::Number.within(range: 1.0..100.0) }
|
83
|
-
|
84
|
-
unknown_count = 20
|
85
|
-
unknown_count.times {
|
86
|
-
field_stats.update("unknown")
|
87
|
-
}
|
88
|
-
|
89
|
-
refute(field_stats.field_type_determined?)
|
90
|
-
|
91
|
-
assert_equal(unknown_count, field_stats.unknown_count)
|
92
|
-
assert_equal(unknown_count + number_data.size, field_stats.total_count)
|
93
|
-
|
94
|
-
expected_percent = (unknown_count.to_f / (unknown_count + number_data.size)) * 100.0
|
95
|
-
|
96
|
-
assert_in_epsilon(expected_percent, field_stats.unknown_percent)
|
97
|
-
end
|
98
|
-
|
99
|
-
def test_resolves_type_automatically
|
100
|
-
field_stats = ::FlatKit::FieldStats.new(name: "numeric-autoresolve",guess_threshold: 101)
|
101
|
-
field_stats, _ = generate_data_with(stats: field_stats) { Faker::Number.within(range: 1.0..100.0) }
|
102
|
-
|
103
|
-
refute(field_stats.field_type_determined?)
|
104
|
-
field_stats, _ = generate_data_with(stats: field_stats) { Faker::Number.within(range: 200.0..300.0) }
|
105
|
-
assert(field_stats.field_type_determined?)
|
106
|
-
end
|
107
|
-
|
108
|
-
def test_resolves_integer_appropriately_with_mixed_data
|
109
|
-
field_stats = ::FlatKit::FieldStats.new(name: "numeric-integer",guess_threshold: 100)
|
110
|
-
field_stats, _ = generate_data_with(count: 40, stats: field_stats) { Faker::Number.within(range: 0..1).to_s }
|
111
|
-
field_stats, _ = generate_data_with(count: 70, stats: field_stats) { Faker::Number.within(range: 0..200).to_s }
|
112
|
-
|
113
|
-
assert_equal(::FlatKit::FieldType::IntegerType, field_stats.field_type)
|
114
|
-
|
115
|
-
end
|
116
|
-
|
117
|
-
def test_resolves_boolean_appropriately_with_mixed_data
|
118
|
-
field_stats = ::FlatKit::FieldStats.new(name: "numeric-integer",guess_threshold: 100)
|
119
|
-
field_stats, _ = generate_data_with(count: 70, stats: field_stats) { Faker::Boolean.boolean.to_s }
|
120
|
-
field_stats, _ = generate_data_with(count: 40, stats: field_stats) { Faker::Number.within(range: 0..200).to_s }
|
121
|
-
assert_equal(::FlatKit::FieldType::BooleanType, field_stats.field_type)
|
122
|
-
end
|
123
|
-
|
124
|
-
def test_resolves_string_appropriately_with_mixed_data
|
125
|
-
field_stats = ::FlatKit::FieldStats.new(name: "string",guess_threshold: 100)
|
126
|
-
field_stats, _ = generate_data_with(count: 61, stats: field_stats) { Faker::Color.name.to_s }
|
127
|
-
field_stats, _ = generate_data_with(count: 59, stats: field_stats) { Faker::Number.within(range: 0..200).to_s }
|
128
|
-
assert_equal(::FlatKit::FieldType::StringType, field_stats.field_type)
|
129
|
-
|
130
|
-
assert_equal(120, field_stats.count)
|
131
|
-
assert_equal(0, field_stats.unknown_count)
|
132
|
-
assert_equal(0, field_stats.null_count)
|
133
|
-
end
|
134
|
-
end
|
data/test/test_field_type.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
require_relative './test_helper'
|
2
|
-
|
3
|
-
module TestFieldType
|
4
|
-
class TestFieldType < ::Minitest::Test
|
5
|
-
|
6
|
-
def test_weight_raises_exception
|
7
|
-
assert_raises(NotImplementedError) { ::FlatKit::FieldType.weight }
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_best_guesses
|
11
|
-
guesses = {
|
12
|
-
"t" => ::FlatKit::FieldType::BooleanType,
|
13
|
-
"1" => ::FlatKit::FieldType::BooleanType,
|
14
|
-
"0" => ::FlatKit::FieldType::BooleanType,
|
15
|
-
"n" => ::FlatKit::FieldType::BooleanType,
|
16
|
-
"42" => ::FlatKit::FieldType::IntegerType,
|
17
|
-
"nil" => ::FlatKit::FieldType::NullType,
|
18
|
-
"n/a" => ::FlatKit::FieldType::UnknownType,
|
19
|
-
"foo" => ::FlatKit::FieldType::StringType,
|
20
|
-
"12.3" => ::FlatKit::FieldType::FloatType,
|
21
|
-
"2021-02-26" => ::FlatKit::FieldType::DateType,
|
22
|
-
"2020-03-03T12:34:56Z" => ::FlatKit::FieldType::TimestampType,
|
23
|
-
}
|
24
|
-
|
25
|
-
guesses.each do |test, expected|
|
26
|
-
assert_equal(expected, ::FlatKit::FieldType.best_guess(test), "Expected '#{test}' to be #{expected}")
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_children_exist
|
31
|
-
assert_equal(9,::FlatKit::FieldType.children.size)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
data/test/test_format.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require_relative 'test_helper'
|
2
|
-
|
3
|
-
class TestFormat < ::Minitest::Test
|
4
|
-
def test_finds_jsonl_format
|
5
|
-
klass = ::FlatKit::Format.for("data.json.gz")
|
6
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_finds_xsv_format
|
10
|
-
klass = ::FlatKit::Format.for("data.csv.gz")
|
11
|
-
assert_equal(::FlatKit::Xsv::Format, klass)
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_finds_jsonl_format_for_full_path
|
15
|
-
klass = ::FlatKit::Format.for("tmp/sorted/foo.jsonl")
|
16
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_finds_jsonl_format_with_fallback
|
20
|
-
path = "tmp/sorted/foo.json"
|
21
|
-
klass = ::FlatKit::Format.for_with_fallback!(path: path, fallback: "auto")
|
22
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
23
|
-
end
|
24
|
-
end
|
data/test/test_helper.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
SimpleCov.start if ENV['COVERAGE']
|
3
|
-
|
4
|
-
require 'byebug'
|
5
|
-
|
6
|
-
require 'minitest/autorun'
|
7
|
-
require 'minitest/focus'
|
8
|
-
require 'minitest/pride'
|
9
|
-
|
10
|
-
module TestHelper
|
11
|
-
def scratch_dir
|
12
|
-
p = Pathname.new(__FILE__).parent.parent.join('tmp/testing_scratch')
|
13
|
-
p.mkpath
|
14
|
-
p
|
15
|
-
end
|
16
|
-
|
17
|
-
def generate_slug(length: 10)
|
18
|
-
SecureRandom.alphanumeric(10)
|
19
|
-
end
|
20
|
-
|
21
|
-
def scratch_file(prefix: "test_", slug: generate_slug, extension: ".jsonl")
|
22
|
-
scratch_dir.join("#{prefix}#{slug}#{extension}")
|
23
|
-
end
|
24
|
-
end
|
25
|
-
require_relative '../lib/flat_kit'
|
26
|
-
require_relative './device_dataset'
|
data/test/test_merge.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestMerge < ::Minitest::Test
|
4
|
-
|
5
|
-
def test_can_use_use_dash_as_output
|
6
|
-
merge = ::FlatKit::Merge.new(inputs: [], input_fallback: "json",
|
7
|
-
output: "-", output_fallback: "json", compare_fields: [])
|
8
|
-
assert_match(/STDOUT/, merge.writer.output.name)
|
9
|
-
assert_instance_of(::FlatKit::Output::IO, merge.writer.output)
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_can_use_a_text_path_as_output
|
13
|
-
test_path = "tmp/test_can_use_a_text_path_as_output.json"
|
14
|
-
begin
|
15
|
-
merge = ::FlatKit::Merge.new(output: test_path, inputs: [], input_fallback: "json", compare_fields: [])
|
16
|
-
assert_equal(test_path, merge.writer.output.name)
|
17
|
-
assert_instance_of(::FlatKit::Output::File, merge.writer.output)
|
18
|
-
merge.writer.close
|
19
|
-
ensure
|
20
|
-
File.unlink(test_path) if File.exist?(test_path)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_can_use_a_pathname_as_output
|
25
|
-
test_path = Pathname.new("tmp/test_can_use_a_pathname_as_output.json")
|
26
|
-
begin
|
27
|
-
merge = ::FlatKit::Merge.new(output: test_path, inputs: [], input_fallback: "json", compare_fields: [])
|
28
|
-
assert_equal(test_path.to_s, merge.writer.output.name)
|
29
|
-
assert_instance_of(::FlatKit::Output::File, merge.writer.output)
|
30
|
-
merge.writer.close
|
31
|
-
ensure
|
32
|
-
test_path.unlink if test_path.exist?
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_raises_error_if_unable_to_parse_output
|
37
|
-
test_path = Object.new
|
38
|
-
assert_raises(FlatKit::Error) { ::FlatKit::Merge.new(output: test_path, inputs: [], compare_fields: []) }
|
39
|
-
end
|
40
|
-
end
|