flat_kit 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
data/test/jsonl/test_writer.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestJsonl
|
4
|
-
class TestWriter < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@write_path = "tmp/test_writes_to_io.jsonl"
|
10
|
-
@read_path = "tmp/test_read.jsonl"
|
11
|
-
|
12
|
-
File.open(@read_path, "wb") do |f|
|
13
|
-
f.write(@dataset.records_as_jsonl)
|
14
|
-
end
|
15
|
-
|
16
|
-
@reader = ::FlatKit::Jsonl::Reader.new(source: @read_path, compare_fields: @compare_fields)
|
17
|
-
@records = @reader.to_a
|
18
|
-
end
|
19
|
-
|
20
|
-
def teardown
|
21
|
-
File.unlink(@write_path) if File.exist?(@write_path)
|
22
|
-
File.unlink(@read_path) if File.exist?(@read_path)
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_raises_error_on_invalid_destination
|
26
|
-
assert_raises(::FlatKit::Error) {
|
27
|
-
::FlatKit::Jsonl::Writer.new(destination: Object.new)
|
28
|
-
}
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_writes_to_pathname
|
32
|
-
writer = ::FlatKit::Jsonl::Writer.new(destination: @write_path)
|
33
|
-
@records.each do |r|
|
34
|
-
writer.write(r)
|
35
|
-
end
|
36
|
-
writer.close
|
37
|
-
assert_equal(@count, writer.count)
|
38
|
-
|
39
|
-
expected = @dataset.records_as_jsonl
|
40
|
-
actual = IO.read(@write_path)
|
41
|
-
assert_equal(expected, actual)
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_writes_to_io
|
45
|
-
File.open(@write_path, "w+") do |f|
|
46
|
-
writer = ::FlatKit::Jsonl::Writer.new(destination: f)
|
47
|
-
|
48
|
-
@records.each do |r|
|
49
|
-
writer.write(r)
|
50
|
-
end
|
51
|
-
writer.close
|
52
|
-
|
53
|
-
assert_equal(@count, writer.count)
|
54
|
-
|
55
|
-
expected = @dataset.records_as_jsonl
|
56
|
-
actual = IO.read(@write_path)
|
57
|
-
assert_equal(expected, actual)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
def test_raises_error_on_io_error
|
62
|
-
s = StringIO.new
|
63
|
-
s.close_write
|
64
|
-
writer = ::FlatKit::Jsonl::Writer.new(destination: s)
|
65
|
-
assert_raises(::FlatKit::Error) { writer.write(@records.first) }
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
data/test/output/test_file.rb
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestOutput
|
4
|
-
class TestFile < ::Minitest::Test
|
5
|
-
def test_does_not_handle_stderr_text
|
6
|
-
::FlatKit::Output::IO::STDERRS.each do |e|
|
7
|
-
refute(::FlatKit::Output::File.handles?(e), "#{e} is not stderr text")
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_only_handles_string
|
12
|
-
refute(::FlatKit::Output::File.handles?(Object.new))
|
13
|
-
end
|
14
|
-
|
15
|
-
def test_doest_not_handles_stdout_text
|
16
|
-
::FlatKit::Output::IO::STDOUTS.each do |e|
|
17
|
-
refute(::FlatKit::Output::File.handles?(e), "#{e} is not stdout text")
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_init_from_path
|
22
|
-
test_path = "tmp/test_init_from_path.txt"
|
23
|
-
begin
|
24
|
-
io = ::FlatKit::Output::File.new(test_path)
|
25
|
-
assert_equal(test_path, io.name)
|
26
|
-
assert_instance_of(::File, io.io)
|
27
|
-
ensure
|
28
|
-
File.unlink(test_path) if File.exist?(test_path)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_writes_to_file
|
33
|
-
test_path = "tmp/test_writes_to_file.txt"
|
34
|
-
begin
|
35
|
-
output = ::FlatKit::Output::File.new(test_path)
|
36
|
-
assert_equal(test_path, output.name)
|
37
|
-
output.io.write("test_writes_to_file output")
|
38
|
-
output.close
|
39
|
-
t = IO.read(test_path)
|
40
|
-
assert_equal("test_writes_to_file output", t)
|
41
|
-
ensure
|
42
|
-
File.unlink(test_path) if File.exist?(test_path)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_writes_to_gzfile
|
47
|
-
test_path = "tmp/test_writes_to_gzfile.txt.gz"
|
48
|
-
begin
|
49
|
-
output = ::FlatKit::Output::File.new(test_path)
|
50
|
-
assert_equal(test_path, output.name)
|
51
|
-
output.io.write("test_writes_to_gzfile output")
|
52
|
-
output.close
|
53
|
-
t = %x[ gunzip -c #{test_path} ]
|
54
|
-
assert_equal("test_writes_to_gzfile output", t)
|
55
|
-
ensure
|
56
|
-
File.unlink(test_path) if File.exist?(test_path)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
data/test/output/test_io.rb
DELETED
@@ -1,104 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestOutput
|
4
|
-
class NullIO < ::IO
|
5
|
-
def initialize()
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class TestIO < ::Minitest::Test
|
10
|
-
def test_handles_stderr_text
|
11
|
-
::FlatKit::Output::IO::STDERRS.each do |e|
|
12
|
-
assert(::FlatKit::Output::IO.handles?(e), "#{e} is not stderr text")
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_handles_stderr_io
|
17
|
-
x = $stderr
|
18
|
-
assert(::FlatKit::Output::IO.handles?(x), "is not stderr")
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_handles_stdout_text
|
22
|
-
::FlatKit::Output::IO::STDOUTS.each do |e|
|
23
|
-
assert(::FlatKit::Output::IO.handles?(e), "#{e} is not stdout text")
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_handles_stdout_io
|
28
|
-
x = $stderr
|
29
|
-
assert(::FlatKit::Output::IO.handles?(x), "is not stdout")
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_handles_stringio
|
33
|
-
assert(::FlatKit::Output::IO.handles?(StringIO.new))
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_does_not_handle_other
|
37
|
-
x = Object.new
|
38
|
-
refute(::FlatKit::Output::IO.handles?(x))
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_init_from_dash
|
42
|
-
io = ::FlatKit::Output::IO.new("-")
|
43
|
-
assert_equal("<STDOUT>", io.name)
|
44
|
-
assert_equal(::STDOUT, io.io)
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_init_from_stderr_text
|
48
|
-
io = ::FlatKit::Output::IO.new("stderr")
|
49
|
-
assert_equal("<STDERR>", io.name)
|
50
|
-
assert_equal(::STDERR, io.io)
|
51
|
-
end
|
52
|
-
|
53
|
-
def test_init_from_file_object
|
54
|
-
test_path = "tmp/test_init_from_file_object.txt"
|
55
|
-
begin
|
56
|
-
File.open(test_path, "w") do |f|
|
57
|
-
io = ::FlatKit::Output::IO.new(f)
|
58
|
-
assert_equal(test_path, io.name)
|
59
|
-
assert_instance_of(::File, io.io)
|
60
|
-
end
|
61
|
-
ensure
|
62
|
-
File.unlink(test_path) if File.exist?(test_path)
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_init_from_stdout
|
67
|
-
io = ::FlatKit::Output::IO.new($stdout)
|
68
|
-
assert_equal("<STDOUT>", io.name)
|
69
|
-
assert_equal(::STDOUT, io.io)
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_init_from_string_io_object
|
73
|
-
sio = StringIO.new
|
74
|
-
io = ::FlatKit::Output::IO.new(sio)
|
75
|
-
assert_match(/StringIO/, io.name)
|
76
|
-
assert_instance_of(::StringIO, io.io)
|
77
|
-
end
|
78
|
-
|
79
|
-
def test_init_from_io_object
|
80
|
-
null_io = NullIO.new
|
81
|
-
io = ::FlatKit::Output::IO.new(null_io)
|
82
|
-
assert_match(/NullIO/, io.name)
|
83
|
-
assert_instance_of(::TestOutput::NullIO, io.io)
|
84
|
-
end
|
85
|
-
|
86
|
-
def test_writes_to_io
|
87
|
-
test_path = "tmp/test_writes_to_io.txt"
|
88
|
-
begin
|
89
|
-
File.open(test_path, "w") do |f|
|
90
|
-
io = ::FlatKit::Output::IO.new(f)
|
91
|
-
assert_equal(test_path, io.name)
|
92
|
-
assert_instance_of(::File, io.io)
|
93
|
-
io.io.write("test_writes_to_io output")
|
94
|
-
io.close
|
95
|
-
end
|
96
|
-
t = IO.read(test_path)
|
97
|
-
assert_equal("test_writes_to_io output", t)
|
98
|
-
ensure
|
99
|
-
File.unlink(test_path) if File.exist?(test_path)
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
end
|
104
|
-
end
|
data/test/test_conversions.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestConversions < ::Minitest::Test
|
4
|
-
def setup
|
5
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
6
|
-
@src_record = @one_row_dataset.records.first
|
7
|
-
@csv_row = @one_row_dataset.records_as_csv_rows.first
|
8
|
-
@compare_fields = @one_row_dataset.compare_fields
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_from_csv_to_json
|
12
|
-
xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
13
|
-
json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
|
14
|
-
|
15
|
-
assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
|
16
|
-
assert_equal(@one_row_dataset.records.first, json_record.to_hash)
|
17
|
-
assert_equal(xsv_record, json_record)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_from_json_to_csv
|
21
|
-
src_json = JSON.generate(@src_record)
|
22
|
-
json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
|
23
|
-
xsv_record = FlatKit::Xsv::Record.from_record(json_record)
|
24
|
-
|
25
|
-
assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
|
26
|
-
assert_equal(@one_row_dataset.records.first, json_record.to_hash)
|
27
|
-
assert_equal(xsv_record, json_record)
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_roundtrip_csv_json_csv
|
31
|
-
xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
32
|
-
json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
|
33
|
-
xsv2 = FlatKit::Xsv::Record.from_record(json_record)
|
34
|
-
|
35
|
-
assert_equal(xsv_record.to_s, xsv2.to_s)
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_roundtrip_json_csv_json
|
39
|
-
src_json = JSON.generate(@src_record)
|
40
|
-
json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
|
41
|
-
xsv_record = FlatKit::Xsv::Record.from_record(json_record)
|
42
|
-
json2 = FlatKit::Jsonl::Record.from_record(xsv_record)
|
43
|
-
assert_equal(src_json, json2.to_s)
|
44
|
-
end
|
45
|
-
end
|
data/test/test_event_emitter.rb
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestEventEmitter < ::Minitest::Test
|
4
|
-
class Pub
|
5
|
-
include ::FlatKit::EventEmitter
|
6
|
-
end
|
7
|
-
|
8
|
-
class Sub
|
9
|
-
attr_reader :name
|
10
|
-
attr_reader :data
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@name = nil
|
14
|
-
@data = nil
|
15
|
-
end
|
16
|
-
|
17
|
-
def on_event(name:, data:)
|
18
|
-
@name = name
|
19
|
-
@data = data
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
class BadSub; end
|
24
|
-
|
25
|
-
def setup
|
26
|
-
@emitter = Pub.new
|
27
|
-
@receiver = Sub.new
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_counts_no_listeners_before_adding_one
|
31
|
-
assert_equal(0, @emitter.count_listeners)
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_adds_listener
|
35
|
-
@emitter.add_listener(@receiver)
|
36
|
-
assert_equal(1, @emitter.count_listeners)
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_removes_listener
|
40
|
-
@emitter.add_listener(@receiver)
|
41
|
-
assert_equal(1, @emitter.count_listeners)
|
42
|
-
|
43
|
-
@emitter.remove_listener(@receiver)
|
44
|
-
assert_equal(0, @emitter.count_listeners)
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_only_adds_an_listener_once
|
48
|
-
@emitter.add_listener(@receiver)
|
49
|
-
assert_equal(1, @emitter.count_listeners)
|
50
|
-
|
51
|
-
@emitter.add_listener(@receiver)
|
52
|
-
assert_equal(1, @emitter.count_listeners)
|
53
|
-
end
|
54
|
-
|
55
|
-
def test_verifies_reciever_responds_t_observed
|
56
|
-
assert_raises(::NoMethodError) { @emitter.add_listener(BadSub.new) }
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_listeners_get_notified
|
60
|
-
@receiver_2 = Sub.new
|
61
|
-
@emitter.add_listener(@receiver)
|
62
|
-
@emitter.add_listener(@receiver_2)
|
63
|
-
|
64
|
-
@emitter.notify_listeners(name: :notification, data: "DATA!")
|
65
|
-
|
66
|
-
assert_equal(:notification, @receiver.name)
|
67
|
-
assert_equal(:notification, @receiver_2.name)
|
68
|
-
|
69
|
-
assert_equal("DATA!", @receiver.data)
|
70
|
-
assert_equal("DATA!", @receiver_2.data)
|
71
|
-
end
|
72
|
-
end
|
data/test/test_format.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require_relative 'test_helper'
|
2
|
-
|
3
|
-
class TestFormat < ::Minitest::Test
|
4
|
-
def test_finds_jsonl_format
|
5
|
-
klass = ::FlatKit::Format.for("data.json.gz")
|
6
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_finds_xsv_format
|
10
|
-
klass = ::FlatKit::Format.for("data.csv.gz")
|
11
|
-
assert_equal(::FlatKit::Xsv::Format, klass)
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_finds_jsonl_format_for_full_path
|
15
|
-
klass = ::FlatKit::Format.for("tmp/sorted/foo.jsonl")
|
16
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_finds_jsonl_format_with_fallback
|
20
|
-
path = "tmp/sorted/foo.json"
|
21
|
-
klass = ::FlatKit::Format.for_with_fallback!(path: path, fallback: "auto")
|
22
|
-
assert_equal(::FlatKit::Jsonl::Format, klass)
|
23
|
-
end
|
24
|
-
end
|
data/test/test_helper.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
SimpleCov.start if ENV['COVERAGE']
|
3
|
-
|
4
|
-
require 'byebug'
|
5
|
-
|
6
|
-
require 'minitest/autorun'
|
7
|
-
require 'minitest/focus'
|
8
|
-
require 'minitest/pride'
|
9
|
-
|
10
|
-
module TestHelper
|
11
|
-
def scratch_dir
|
12
|
-
p = Pathname.new(__FILE__).parent.parent.join('tmp/testing_scratch')
|
13
|
-
p.mkpath
|
14
|
-
p
|
15
|
-
end
|
16
|
-
|
17
|
-
def generate_slug(length: 10)
|
18
|
-
SecureRandom.alphanumeric(10)
|
19
|
-
end
|
20
|
-
|
21
|
-
def scratch_file(prefix: "test_", slug: generate_slug, extension: ".jsonl")
|
22
|
-
scratch_dir.join("#{prefix}#{slug}#{extension}")
|
23
|
-
end
|
24
|
-
end
|
25
|
-
require_relative '../lib/flat_kit'
|
26
|
-
require_relative './device_dataset'
|
data/test/test_merge.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestMerge < ::Minitest::Test
|
4
|
-
|
5
|
-
def test_can_use_use_dash_as_output
|
6
|
-
merge = ::FlatKit::Merge.new(inputs: [], input_fallback: "json",
|
7
|
-
output: "-", output_fallback: "json", compare_fields: [])
|
8
|
-
assert_match(/STDOUT/, merge.writer.output.name)
|
9
|
-
assert_instance_of(::FlatKit::Output::IO, merge.writer.output)
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_can_use_a_text_path_as_output
|
13
|
-
test_path = "tmp/test_can_use_a_text_path_as_output.json"
|
14
|
-
begin
|
15
|
-
merge = ::FlatKit::Merge.new(output: test_path, inputs: [], input_fallback: "json", compare_fields: [])
|
16
|
-
assert_equal(test_path, merge.writer.output.name)
|
17
|
-
assert_instance_of(::FlatKit::Output::File, merge.writer.output)
|
18
|
-
merge.writer.close
|
19
|
-
ensure
|
20
|
-
File.unlink(test_path) if File.exist?(test_path)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_can_use_a_pathname_as_output
|
25
|
-
test_path = Pathname.new("tmp/test_can_use_a_pathname_as_output.json")
|
26
|
-
begin
|
27
|
-
merge = ::FlatKit::Merge.new(output: test_path, inputs: [], input_fallback: "json", compare_fields: [])
|
28
|
-
assert_equal(test_path.to_s, merge.writer.output.name)
|
29
|
-
assert_instance_of(::FlatKit::Output::File, merge.writer.output)
|
30
|
-
merge.writer.close
|
31
|
-
ensure
|
32
|
-
test_path.unlink if test_path.exist?
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_raises_error_if_unable_to_parse_output
|
37
|
-
test_path = Object.new
|
38
|
-
assert_raises(FlatKit::Error) { ::FlatKit::Merge.new(output: test_path, inputs: [], compare_fields: []) }
|
39
|
-
end
|
40
|
-
end
|
data/test/test_merge_tree.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestMergeTree < ::Minitest::Test
|
4
|
-
def setup
|
5
|
-
@dataset_count = 20
|
6
|
-
@records_per_dataset = 100
|
7
|
-
@records = []
|
8
|
-
@datasets = Array.new.tap do |a|
|
9
|
-
@dataset_count.times do
|
10
|
-
dd = DeviceDataset.new(count: @records_per_dataset)
|
11
|
-
dd.persist_sorted_records_as_jsonl
|
12
|
-
@records.concat(dd.records)
|
13
|
-
a << dd
|
14
|
-
end
|
15
|
-
end
|
16
|
-
@compare_fields = @datasets.first.compare_fields
|
17
|
-
@readers = @datasets.map { |dd|
|
18
|
-
::FlatKit::Jsonl::Reader.new(source: dd.filename_sorted_jsonl, compare_fields: @compare_fields)
|
19
|
-
}
|
20
|
-
end
|
21
|
-
|
22
|
-
def teardown
|
23
|
-
@datasets.each do |ds|
|
24
|
-
ds.cleanup_files
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_init_tree
|
29
|
-
tree = ::FlatKit::MergeTree.new(@readers)
|
30
|
-
assert_equal(20, tree.leaves.size)
|
31
|
-
|
32
|
-
assert_equal(5, tree.depth)
|
33
|
-
|
34
|
-
# 0th level should have 10 nodes - since 20 leaves
|
35
|
-
assert_equal(10, tree.levels[0].size)
|
36
|
-
|
37
|
-
# 1st level should have 5 nodes - since 10 nodes lower
|
38
|
-
assert_equal(5, tree.levels[1].size)
|
39
|
-
|
40
|
-
# 2nd level should have 3 nodes - since 5 above (and we shim in a Sentinel
|
41
|
-
# node on the last internal node)
|
42
|
-
assert_equal(3, tree.levels[2].size)
|
43
|
-
assert_instance_of(::FlatKit::SentinelInternalNode, tree.levels[2].last.right)
|
44
|
-
|
45
|
-
# 3rd level should have 2 nodes
|
46
|
-
assert_equal(2, tree.levels[3].size)
|
47
|
-
|
48
|
-
# 4th level should have 1 nodes
|
49
|
-
assert_equal(1, tree.levels[4].size)
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_merging
|
53
|
-
expected_records = @records.sort_by { |r| @compare_fields.map { |f| r[f] } }
|
54
|
-
tree = ::FlatKit::MergeTree.new(@readers)
|
55
|
-
actual_records = tree.to_a.map { |r| r.to_hash }
|
56
|
-
|
57
|
-
assert_equal(expected_records.size, actual_records.size)
|
58
|
-
|
59
|
-
expected_records.each_with_index do |expected, idx|
|
60
|
-
actual = actual_records[idx]
|
61
|
-
assert_equal(expected, actual)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
data/test/test_version.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestVersion < ::Minitest::Test
|
4
|
-
def test_version_constant_match
|
5
|
-
assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION)
|
6
|
-
end
|
7
|
-
|
8
|
-
def test_version_string_match
|
9
|
-
assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION.to_s)
|
10
|
-
end
|
11
|
-
end
|
data/test/xsv/test_format.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestFormat < ::Minitest::Test
|
5
|
-
|
6
|
-
def test_handles_csv
|
7
|
-
assert(::FlatKit::Xsv::Format.handles?("csv"))
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_handles_tsv
|
11
|
-
assert(::FlatKit::Xsv::Format.handles?("tsv"))
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_handles_txt
|
15
|
-
assert(::FlatKit::Xsv::Format.handles?("txt"))
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_does_not_handle_json
|
19
|
-
refute(::FlatKit::Xsv::Format.handles?("json"))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
data/test/xsv/test_reader.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestReader < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@test_path = "tmp/test_reads_from_io.csv"
|
10
|
-
|
11
|
-
File.open(@test_path, "wb") do |f|
|
12
|
-
f.write(@dataset.records_as_csv)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def teardown
|
17
|
-
File.unlink(@test_path) if File.exist?(@test_path)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_fields
|
21
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
22
|
-
reader.to_a
|
23
|
-
assert_equal(@dataset.fields, reader.fields)
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_raises_error_on_invalid_source
|
27
|
-
assert_raises(::FlatKit::Error) {
|
28
|
-
::FlatKit::Xsv::Reader.new(source: Object.new, compare_fields: nil)
|
29
|
-
}
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_automatically_figures_out_fields_if_needed
|
33
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path)
|
34
|
-
reader.take(1)
|
35
|
-
assert_equal(@dataset.fields, reader.fields)
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_reads_from_pathname
|
39
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
40
|
-
all = reader.to_a
|
41
|
-
assert_equal(@count, reader.count)
|
42
|
-
assert_equal(@count, all.size)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_reads_from_io
|
46
|
-
File.open(@test_path) do |f|
|
47
|
-
reader = ::FlatKit::Xsv::Reader.new(source: f, compare_fields: @compare_fields)
|
48
|
-
all = reader.to_a
|
49
|
-
assert_equal(@count, reader.count)
|
50
|
-
assert_equal(@count, all.size)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_raises_error_on_io_error
|
55
|
-
s = StringIO.new
|
56
|
-
s.close_read
|
57
|
-
reader = ::FlatKit::Xsv::Reader.new(source: s, compare_fields: @compare_fields)
|
58
|
-
assert_raises(::FlatKit::Error) { reader.to_a }
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
data/test/xsv/test_record.rb
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
require 'faker'
|
3
|
-
require 'byebug'
|
4
|
-
|
5
|
-
module TestXsv
|
6
|
-
class TestRecord< ::Minitest::Test
|
7
|
-
def setup
|
8
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
9
|
-
@csv_row = @one_row_dataset.records_as_csv_rows.first
|
10
|
-
@compare_fields = @one_row_dataset.compare_fields
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_initializes_from_data
|
14
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
15
|
-
original_record = @one_row_dataset.records.first
|
16
|
-
@compare_fields.each do |field|
|
17
|
-
assert_equal(original_record[field], record[field])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_ignores_non_compare_fields_values
|
22
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
23
|
-
refute(record["version"])
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_is_sortable
|
27
|
-
dataset = DeviceDataset.new(count: 20)
|
28
|
-
fk_records = Array.new.tap do |a|
|
29
|
-
dataset.records_as_csv_rows.each do |csv_row|
|
30
|
-
a << FlatKit::Xsv::Record.new(data: csv_row, compare_fields: @compare_fields)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
sorted = fk_records.sort
|
35
|
-
output_text = CSV.generate('', headers: dataset.fields, write_headers: true) do |csv|
|
36
|
-
sorted.each do |row|
|
37
|
-
csv << row.data
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
assert_equal(output_text, dataset.sorted_records_as_csv)
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_to_hash
|
45
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
46
|
-
h = record.to_hash
|
47
|
-
assert_equal(@one_row_dataset.records.first, h)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_from_record
|
51
|
-
rec1 = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
52
|
-
rec2 = FlatKit::Xsv::Record.from_record(rec1)
|
53
|
-
assert_equal(rec1, rec2)
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_incomplete_initialization
|
57
|
-
assert_raises(FlatKit::Error) {
|
58
|
-
FlatKit::Xsv::Record.new(data: nil, compare_fields: [])
|
59
|
-
}
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_to_s_from_csv_record
|
63
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
64
|
-
line = record.to_s
|
65
|
-
expected = @one_row_dataset.records_as_csv_rows[0].to_csv
|
66
|
-
assert_equal(expected, line)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|