flat_kit 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
data/test/jsonl/test_writer.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestJsonl
|
4
|
-
class TestWriter < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@write_path = "tmp/test_writes_to_io.jsonl"
|
10
|
-
@read_path = "tmp/test_read.jsonl"
|
11
|
-
|
12
|
-
File.open(@read_path, "wb") do |f|
|
13
|
-
f.write(@dataset.records_as_jsonl)
|
14
|
-
end
|
15
|
-
|
16
|
-
@reader = ::FlatKit::Jsonl::Reader.new(source: @read_path, compare_fields: @compare_fields)
|
17
|
-
@records = @reader.to_a
|
18
|
-
end
|
19
|
-
|
20
|
-
def teardown
|
21
|
-
File.unlink(@write_path) if File.exist?(@write_path)
|
22
|
-
File.unlink(@read_path) if File.exist?(@read_path)
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_raises_error_on_invalid_destination
|
26
|
-
assert_raises(::FlatKit::Error) {
|
27
|
-
::FlatKit::Jsonl::Writer.new(destination: Object.new)
|
28
|
-
}
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_writes_to_pathname
|
32
|
-
writer = ::FlatKit::Jsonl::Writer.new(destination: @write_path)
|
33
|
-
@records.each do |r|
|
34
|
-
writer.write(r)
|
35
|
-
end
|
36
|
-
writer.close
|
37
|
-
assert_equal(@count, writer.count)
|
38
|
-
|
39
|
-
expected = @dataset.records_as_jsonl
|
40
|
-
actual = IO.read(@write_path)
|
41
|
-
assert_equal(expected, actual)
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_writes_to_io
|
45
|
-
File.open(@write_path, "w+") do |f|
|
46
|
-
writer = ::FlatKit::Jsonl::Writer.new(destination: f)
|
47
|
-
|
48
|
-
@records.each do |r|
|
49
|
-
writer.write(r)
|
50
|
-
end
|
51
|
-
writer.close
|
52
|
-
|
53
|
-
assert_equal(@count, writer.count)
|
54
|
-
|
55
|
-
expected = @dataset.records_as_jsonl
|
56
|
-
actual = IO.read(@write_path)
|
57
|
-
assert_equal(expected, actual)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
def test_raises_error_on_io_error
|
62
|
-
s = StringIO.new
|
63
|
-
s.close_write
|
64
|
-
writer = ::FlatKit::Jsonl::Writer.new(destination: s)
|
65
|
-
assert_raises(::FlatKit::Error) { writer.write(@records.first) }
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
data/test/output/test_file.rb
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestOutput
|
4
|
-
class TestFile < ::Minitest::Test
|
5
|
-
def test_does_not_handle_stderr_text
|
6
|
-
::FlatKit::Output::IO::STDERRS.each do |e|
|
7
|
-
refute(::FlatKit::Output::File.handles?(e), "#{e} is not stderr text")
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_only_handles_string
|
12
|
-
refute(::FlatKit::Output::File.handles?(Object.new))
|
13
|
-
end
|
14
|
-
|
15
|
-
def test_doest_not_handles_stdout_text
|
16
|
-
::FlatKit::Output::IO::STDOUTS.each do |e|
|
17
|
-
refute(::FlatKit::Output::File.handles?(e), "#{e} is not stdout text")
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_init_from_path
|
22
|
-
test_path = "tmp/test_init_from_path.txt"
|
23
|
-
begin
|
24
|
-
io = ::FlatKit::Output::File.new(test_path)
|
25
|
-
assert_equal(test_path, io.name)
|
26
|
-
assert_instance_of(::File, io.io)
|
27
|
-
ensure
|
28
|
-
File.unlink(test_path) if File.exist?(test_path)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_writes_to_file
|
33
|
-
test_path = "tmp/test_writes_to_file.txt"
|
34
|
-
begin
|
35
|
-
output = ::FlatKit::Output::File.new(test_path)
|
36
|
-
assert_equal(test_path, output.name)
|
37
|
-
output.io.write("test_writes_to_file output")
|
38
|
-
output.close
|
39
|
-
t = IO.read(test_path)
|
40
|
-
assert_equal("test_writes_to_file output", t)
|
41
|
-
ensure
|
42
|
-
File.unlink(test_path) if File.exist?(test_path)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_writes_to_gzfile
|
47
|
-
test_path = "tmp/test_writes_to_gzfile.txt.gz"
|
48
|
-
begin
|
49
|
-
output = ::FlatKit::Output::File.new(test_path)
|
50
|
-
assert_equal(test_path, output.name)
|
51
|
-
output.io.write("test_writes_to_gzfile output")
|
52
|
-
output.close
|
53
|
-
t = %x[ gunzip -c #{test_path} ]
|
54
|
-
assert_equal("test_writes_to_gzfile output", t)
|
55
|
-
ensure
|
56
|
-
File.unlink(test_path) if File.exist?(test_path)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
data/test/output/test_io.rb
DELETED
@@ -1,104 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestOutput
|
4
|
-
class NullIO < ::IO
|
5
|
-
def initialize()
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class TestIO < ::Minitest::Test
|
10
|
-
def test_handles_stderr_text
|
11
|
-
::FlatKit::Output::IO::STDERRS.each do |e|
|
12
|
-
assert(::FlatKit::Output::IO.handles?(e), "#{e} is not stderr text")
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_handles_stderr_io
|
17
|
-
x = $stderr
|
18
|
-
assert(::FlatKit::Output::IO.handles?(x), "is not stderr")
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_handles_stdout_text
|
22
|
-
::FlatKit::Output::IO::STDOUTS.each do |e|
|
23
|
-
assert(::FlatKit::Output::IO.handles?(e), "#{e} is not stdout text")
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_handles_stdout_io
|
28
|
-
x = $stderr
|
29
|
-
assert(::FlatKit::Output::IO.handles?(x), "is not stdout")
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_handles_stringio
|
33
|
-
assert(::FlatKit::Output::IO.handles?(StringIO.new))
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_does_not_handle_other
|
37
|
-
x = Object.new
|
38
|
-
refute(::FlatKit::Output::IO.handles?(x))
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_init_from_dash
|
42
|
-
io = ::FlatKit::Output::IO.new("-")
|
43
|
-
assert_equal("<STDOUT>", io.name)
|
44
|
-
assert_equal(::STDOUT, io.io)
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_init_from_stderr_text
|
48
|
-
io = ::FlatKit::Output::IO.new("stderr")
|
49
|
-
assert_equal("<STDERR>", io.name)
|
50
|
-
assert_equal(::STDERR, io.io)
|
51
|
-
end
|
52
|
-
|
53
|
-
def test_init_from_file_object
|
54
|
-
test_path = "tmp/test_init_from_file_object.txt"
|
55
|
-
begin
|
56
|
-
File.open(test_path, "w") do |f|
|
57
|
-
io = ::FlatKit::Output::IO.new(f)
|
58
|
-
assert_equal(test_path, io.name)
|
59
|
-
assert_instance_of(::File, io.io)
|
60
|
-
end
|
61
|
-
ensure
|
62
|
-
File.unlink(test_path) if File.exist?(test_path)
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_init_from_stdout
|
67
|
-
io = ::FlatKit::Output::IO.new($stdout)
|
68
|
-
assert_equal("<STDOUT>", io.name)
|
69
|
-
assert_equal(::STDOUT, io.io)
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_init_from_string_io_object
|
73
|
-
sio = StringIO.new
|
74
|
-
io = ::FlatKit::Output::IO.new(sio)
|
75
|
-
assert_match(/StringIO/, io.name)
|
76
|
-
assert_instance_of(::StringIO, io.io)
|
77
|
-
end
|
78
|
-
|
79
|
-
def test_init_from_io_object
|
80
|
-
null_io = NullIO.new
|
81
|
-
io = ::FlatKit::Output::IO.new(null_io)
|
82
|
-
assert_match(/NullIO/, io.name)
|
83
|
-
assert_instance_of(::TestOutput::NullIO, io.io)
|
84
|
-
end
|
85
|
-
|
86
|
-
def test_writes_to_io
|
87
|
-
test_path = "tmp/test_writes_to_io.txt"
|
88
|
-
begin
|
89
|
-
File.open(test_path, "w") do |f|
|
90
|
-
io = ::FlatKit::Output::IO.new(f)
|
91
|
-
assert_equal(test_path, io.name)
|
92
|
-
assert_instance_of(::File, io.io)
|
93
|
-
io.io.write("test_writes_to_io output")
|
94
|
-
io.close
|
95
|
-
end
|
96
|
-
t = IO.read(test_path)
|
97
|
-
assert_equal("test_writes_to_io output", t)
|
98
|
-
ensure
|
99
|
-
File.unlink(test_path) if File.exist?(test_path)
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
end
|
104
|
-
end
|
data/test/test_conversions.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestConversions < ::Minitest::Test
|
4
|
-
def setup
|
5
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
6
|
-
@src_record = @one_row_dataset.records.first
|
7
|
-
@csv_row = @one_row_dataset.records_as_csv_rows.first
|
8
|
-
@compare_fields = @one_row_dataset.compare_fields
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_from_csv_to_json
|
12
|
-
xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
13
|
-
json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
|
14
|
-
|
15
|
-
assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
|
16
|
-
assert_equal(@one_row_dataset.records.first, json_record.to_hash)
|
17
|
-
assert_equal(xsv_record, json_record)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_from_json_to_csv
|
21
|
-
src_json = JSON.generate(@src_record)
|
22
|
-
json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
|
23
|
-
xsv_record = FlatKit::Xsv::Record.from_record(json_record)
|
24
|
-
|
25
|
-
assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
|
26
|
-
assert_equal(@one_row_dataset.records.first, json_record.to_hash)
|
27
|
-
assert_equal(xsv_record, json_record)
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_roundtrip_csv_json_csv
|
31
|
-
xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
32
|
-
json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
|
33
|
-
xsv2 = FlatKit::Xsv::Record.from_record(json_record)
|
34
|
-
|
35
|
-
assert_equal(xsv_record.to_s, xsv2.to_s)
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_roundtrip_json_csv_json
|
39
|
-
src_json = JSON.generate(@src_record)
|
40
|
-
json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
|
41
|
-
xsv_record = FlatKit::Xsv::Record.from_record(json_record)
|
42
|
-
json2 = FlatKit::Jsonl::Record.from_record(xsv_record)
|
43
|
-
assert_equal(src_json, json2.to_s)
|
44
|
-
end
|
45
|
-
end
|
data/test/test_event_emitter.rb
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestEventEmitter < ::Minitest::Test
|
4
|
-
class Pub
|
5
|
-
include ::FlatKit::EventEmitter
|
6
|
-
end
|
7
|
-
|
8
|
-
class Sub
|
9
|
-
attr_reader :name
|
10
|
-
attr_reader :data
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@name = nil
|
14
|
-
@data = nil
|
15
|
-
end
|
16
|
-
|
17
|
-
def on_event(name:, data:)
|
18
|
-
@name = name
|
19
|
-
@data = data
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
class BadSub; end
|
24
|
-
|
25
|
-
def setup
|
26
|
-
@emitter = Pub.new
|
27
|
-
@receiver = Sub.new
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_counts_no_listeners_before_adding_one
|
31
|
-
assert_equal(0, @emitter.count_listeners)
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_adds_listener
|
35
|
-
@emitter.add_listener(@receiver)
|
36
|
-
assert_equal(1, @emitter.count_listeners)
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_removes_listener
|
40
|
-
@emitter.add_listener(@receiver)
|
41
|
-
assert_equal(1, @emitter.count_listeners)
|
42
|
-
|
43
|
-
@emitter.remove_listener(@receiver)
|
44
|
-
assert_equal(0, @emitter.count_listeners)
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_only_adds_an_listener_once
|
48
|
-
@emitter.add_listener(@receiver)
|
49
|
-
assert_equal(1, @emitter.count_listeners)
|
50
|
-
|
51
|
-
@emitter.add_listener(@receiver)
|
52
|
-
assert_equal(1, @emitter.count_listeners)
|
53
|
-
end
|
54
|
-
|
55
|
-
def test_verifies_reciever_responds_t_observed
|
56
|
-
assert_raises(::NoMethodError) { @emitter.add_listener(BadSub.new) }
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_listeners_get_notified
|
60
|
-
@receiver_2 = Sub.new
|
61
|
-
@emitter.add_listener(@receiver)
|
62
|
-
@emitter.add_listener(@receiver_2)
|
63
|
-
|
64
|
-
@emitter.notify_listeners(name: :notification, data: "DATA!")
|
65
|
-
|
66
|
-
assert_equal(:notification, @receiver.name)
|
67
|
-
assert_equal(:notification, @receiver_2.name)
|
68
|
-
|
69
|
-
assert_equal("DATA!", @receiver.data)
|
70
|
-
assert_equal("DATA!", @receiver_2.data)
|
71
|
-
end
|
72
|
-
end
|
data/test/test_format.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require_relative 'test_helper'
|
2
|
-
|
3
|
-
class TestFormat < ::Minitest::Test
|
4
|
-
def test_finds_jsonl_format
|
5
|
-
klass = ::FlatKit::Format.for("data.json.gz")
|
6
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_finds_xsv_format
|
10
|
-
klass = ::FlatKit::Format.for("data.csv.gz")
|
11
|
-
assert_equal(::FlatKit::Xsv::Format, klass)
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_finds_jsonl_format_for_full_path
|
15
|
-
klass = ::FlatKit::Format.for("tmp/sorted/foo.jsonl")
|
16
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_finds_jsonl_format_with_fallback
|
20
|
-
path = "tmp/sorted/foo.json"
|
21
|
-
klass = ::FlatKit::Format.for_with_fallback!(path: path, fallback: "auto")
|
22
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
23
|
-
end
|
24
|
-
end
|
data/test/test_helper.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
SimpleCov.start if ENV['COVERAGE']
|
3
|
-
|
4
|
-
require 'byebug'
|
5
|
-
|
6
|
-
require 'minitest/autorun'
|
7
|
-
require 'minitest/focus'
|
8
|
-
require 'minitest/pride'
|
9
|
-
|
10
|
-
module TestHelper
|
11
|
-
def scratch_dir
|
12
|
-
p = Pathname.new(__FILE__).parent.parent.join('tmp/testing_scratch')
|
13
|
-
p.mkpath
|
14
|
-
p
|
15
|
-
end
|
16
|
-
|
17
|
-
def generate_slug(length: 10)
|
18
|
-
SecureRandom.alphanumeric(10)
|
19
|
-
end
|
20
|
-
|
21
|
-
def scratch_file(prefix: "test_", slug: generate_slug, extension: ".jsonl")
|
22
|
-
scratch_dir.join("#{prefix}#{slug}#{extension}")
|
23
|
-
end
|
24
|
-
end
|
25
|
-
require_relative '../lib/flat_kit'
|
26
|
-
require_relative './device_dataset'
|
data/test/test_merge.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestMerge < ::Minitest::Test
|
4
|
-
|
5
|
-
def test_can_use_use_dash_as_output
|
6
|
-
merge = ::FlatKit::Merge.new(inputs: [], input_fallback: "json",
|
7
|
-
output: "-", output_fallback: "json", compare_fields: [])
|
8
|
-
assert_match(/STDOUT/, merge.writer.output.name)
|
9
|
-
assert_instance_of(::FlatKit::Output::IO, merge.writer.output)
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_can_use_a_text_path_as_output
|
13
|
-
test_path = "tmp/test_can_use_a_text_path_as_output.json"
|
14
|
-
begin
|
15
|
-
merge = ::FlatKit::Merge.new(output: test_path, inputs: [], input_fallback: "json", compare_fields: [])
|
16
|
-
assert_equal(test_path, merge.writer.output.name)
|
17
|
-
assert_instance_of(::FlatKit::Output::File, merge.writer.output)
|
18
|
-
merge.writer.close
|
19
|
-
ensure
|
20
|
-
File.unlink(test_path) if File.exist?(test_path)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_can_use_a_pathname_as_output
|
25
|
-
test_path = Pathname.new("tmp/test_can_use_a_pathname_as_output.json")
|
26
|
-
begin
|
27
|
-
merge = ::FlatKit::Merge.new(output: test_path, inputs: [], input_fallback: "json", compare_fields: [])
|
28
|
-
assert_equal(test_path.to_s, merge.writer.output.name)
|
29
|
-
assert_instance_of(::FlatKit::Output::File, merge.writer.output)
|
30
|
-
merge.writer.close
|
31
|
-
ensure
|
32
|
-
test_path.unlink if test_path.exist?
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_raises_error_if_unable_to_parse_output
|
37
|
-
test_path = Object.new
|
38
|
-
assert_raises(FlatKit::Error) { ::FlatKit::Merge.new(output: test_path, inputs: [], compare_fields: []) }
|
39
|
-
end
|
40
|
-
end
|
data/test/test_merge_tree.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestMergeTree < ::Minitest::Test
|
4
|
-
def setup
|
5
|
-
@dataset_count = 20
|
6
|
-
@records_per_dataset = 100
|
7
|
-
@records = []
|
8
|
-
@datasets = Array.new.tap do |a|
|
9
|
-
@dataset_count.times do
|
10
|
-
dd = DeviceDataset.new(count: @records_per_dataset)
|
11
|
-
dd.persist_sorted_records_as_jsonl
|
12
|
-
@records.concat(dd.records)
|
13
|
-
a << dd
|
14
|
-
end
|
15
|
-
end
|
16
|
-
@compare_fields = @datasets.first.compare_fields
|
17
|
-
@readers = @datasets.map { |dd|
|
18
|
-
::FlatKit::Jsonl::Reader.new(source: dd.filename_sorted_jsonl, compare_fields: @compare_fields)
|
19
|
-
}
|
20
|
-
end
|
21
|
-
|
22
|
-
def teardown
|
23
|
-
@datasets.each do |ds|
|
24
|
-
ds.cleanup_files
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_init_tree
|
29
|
-
tree = ::FlatKit::MergeTree.new(@readers)
|
30
|
-
assert_equal(20, tree.leaves.size)
|
31
|
-
|
32
|
-
assert_equal(5, tree.depth)
|
33
|
-
|
34
|
-
# 0th level should have 10 nodes - since 20 leaves
|
35
|
-
assert_equal(10, tree.levels[0].size)
|
36
|
-
|
37
|
-
# 1st level should have 5 nodes - since 10 nodes lower
|
38
|
-
assert_equal(5, tree.levels[1].size)
|
39
|
-
|
40
|
-
# 2nd level should have 3 nodes - since 5 above (and we shim in a Sentinel
|
41
|
-
# node on the last internal node)
|
42
|
-
assert_equal(3, tree.levels[2].size)
|
43
|
-
assert_instance_of(::FlatKit::SentinelInternalNode, tree.levels[2].last.right)
|
44
|
-
|
45
|
-
# 3rd level should have 2 nodes
|
46
|
-
assert_equal(2, tree.levels[3].size)
|
47
|
-
|
48
|
-
# 4th level should have 1 nodes
|
49
|
-
assert_equal(1, tree.levels[4].size)
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_merging
|
53
|
-
expected_records = @records.sort_by { |r| @compare_fields.map { |f| r[f] } }
|
54
|
-
tree = ::FlatKit::MergeTree.new(@readers)
|
55
|
-
actual_records = tree.to_a.map { |r| r.to_hash }
|
56
|
-
|
57
|
-
assert_equal(expected_records.size, actual_records.size)
|
58
|
-
|
59
|
-
expected_records.each_with_index do |expected, idx|
|
60
|
-
actual = actual_records[idx]
|
61
|
-
assert_equal(expected, actual)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
data/test/test_version.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestVersion < ::Minitest::Test
|
4
|
-
def test_version_constant_match
|
5
|
-
assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION)
|
6
|
-
end
|
7
|
-
|
8
|
-
def test_version_string_match
|
9
|
-
assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION.to_s)
|
10
|
-
end
|
11
|
-
end
|
data/test/xsv/test_format.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestFormat < ::Minitest::Test
|
5
|
-
|
6
|
-
def test_handles_csv
|
7
|
-
assert(::FlatKit::Xsv::Format.handles?("csv"))
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_handles_tsv
|
11
|
-
assert(::FlatKit::Xsv::Format.handles?("tsv"))
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_handles_txt
|
15
|
-
assert(::FlatKit::Xsv::Format.handles?("txt"))
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_does_not_handle_json
|
19
|
-
refute(::FlatKit::Xsv::Format.handles?("json"))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
data/test/xsv/test_reader.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestReader < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@test_path = "tmp/test_reads_from_io.csv"
|
10
|
-
|
11
|
-
File.open(@test_path, "wb") do |f|
|
12
|
-
f.write(@dataset.records_as_csv)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def teardown
|
17
|
-
File.unlink(@test_path) if File.exist?(@test_path)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_fields
|
21
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
22
|
-
reader.to_a
|
23
|
-
assert_equal(@dataset.fields, reader.fields)
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_raises_error_on_invalid_source
|
27
|
-
assert_raises(::FlatKit::Error) {
|
28
|
-
::FlatKit::Xsv::Reader.new(source: Object.new, compare_fields: nil)
|
29
|
-
}
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_automatically_figures_out_fields_if_needed
|
33
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path)
|
34
|
-
reader.take(1)
|
35
|
-
assert_equal(@dataset.fields, reader.fields)
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_reads_from_pathname
|
39
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
40
|
-
all = reader.to_a
|
41
|
-
assert_equal(@count, reader.count)
|
42
|
-
assert_equal(@count, all.size)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_reads_from_io
|
46
|
-
File.open(@test_path) do |f|
|
47
|
-
reader = ::FlatKit::Xsv::Reader.new(source: f, compare_fields: @compare_fields)
|
48
|
-
all = reader.to_a
|
49
|
-
assert_equal(@count, reader.count)
|
50
|
-
assert_equal(@count, all.size)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_raises_error_on_io_error
|
55
|
-
s = StringIO.new
|
56
|
-
s.close_read
|
57
|
-
reader = ::FlatKit::Xsv::Reader.new(source: s, compare_fields: @compare_fields)
|
58
|
-
assert_raises(::FlatKit::Error) { reader.to_a }
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
data/test/xsv/test_record.rb
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
require 'faker'
|
3
|
-
require 'byebug'
|
4
|
-
|
5
|
-
module TestXsv
|
6
|
-
class TestRecord< ::Minitest::Test
|
7
|
-
def setup
|
8
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
9
|
-
@csv_row = @one_row_dataset.records_as_csv_rows.first
|
10
|
-
@compare_fields = @one_row_dataset.compare_fields
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_initializes_from_data
|
14
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
15
|
-
original_record = @one_row_dataset.records.first
|
16
|
-
@compare_fields.each do |field|
|
17
|
-
assert_equal(original_record[field], record[field])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_ignores_non_compare_fields_values
|
22
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
23
|
-
refute(record["version"])
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_is_sortable
|
27
|
-
dataset = DeviceDataset.new(count: 20)
|
28
|
-
fk_records = Array.new.tap do |a|
|
29
|
-
dataset.records_as_csv_rows.each do |csv_row|
|
30
|
-
a << FlatKit::Xsv::Record.new(data: csv_row, compare_fields: @compare_fields)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
sorted = fk_records.sort
|
35
|
-
output_text = CSV.generate('', headers: dataset.fields, write_headers: true) do |csv|
|
36
|
-
sorted.each do |row|
|
37
|
-
csv << row.data
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
assert_equal(output_text, dataset.sorted_records_as_csv)
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_to_hash
|
45
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
46
|
-
h = record.to_hash
|
47
|
-
assert_equal(@one_row_dataset.records.first, h)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_from_record
|
51
|
-
rec1 = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
52
|
-
rec2 = FlatKit::Xsv::Record.from_record(rec1)
|
53
|
-
assert_equal(rec1, rec2)
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_incomplete_initialization
|
57
|
-
assert_raises(FlatKit::Error) {
|
58
|
-
FlatKit::Xsv::Record.new(data: nil, compare_fields: [])
|
59
|
-
}
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_to_s_from_csv_record
|
63
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
64
|
-
line = record.to_s
|
65
|
-
expected = @one_row_dataset.records_as_csv_rows[0].to_csv
|
66
|
-
assert_equal(expected, line)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|