flat_kit 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +9 -0
- data/Manifest.txt +3 -42
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +46 -32
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +41 -39
- data/lib/flat_kit/command.rb +10 -11
- data/lib/flat_kit/descendant_tracker.rb +9 -6
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +5 -2
- data/lib/flat_kit/field_stats.rb +31 -26
- data/lib/flat_kit/field_type/boolean_type.rb +9 -5
- data/lib/flat_kit/field_type/date_type.rb +19 -17
- data/lib/flat_kit/field_type/float_type.rb +15 -9
- data/lib/flat_kit/field_type/guess_type.rb +9 -6
- data/lib/flat_kit/field_type/integer_type.rb +6 -4
- data/lib/flat_kit/field_type/null_type.rb +5 -1
- data/lib/flat_kit/field_type/string_type.rb +8 -6
- data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
- data/lib/flat_kit/field_type/unknown_type.rb +12 -8
- data/lib/flat_kit/field_type.rb +52 -44
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +15 -18
- data/lib/flat_kit/jsonl/writer.rb +8 -10
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -18
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +8 -7
- data/lib/flat_kit/position.rb +3 -4
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
- data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
- data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
- data/lib/flat_kit/stat_type.rb +18 -13
- data/lib/flat_kit/stats.rb +12 -14
- data/lib/flat_kit/writer.rb +5 -6
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +13 -10
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +31 -26
- metadata +20 -158
- data/Rakefile +0 -21
- data/examples/stream-active-record-to-csv.rb +0 -42
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/field_type/test_boolean_type.rb +0 -65
- data/test/field_type/test_date_type.rb +0 -71
- data/test/field_type/test_float_type.rb +0 -56
- data/test/field_type/test_guess_type.rb +0 -14
- data/test/field_type/test_integer_type.rb +0 -52
- data/test/field_type/test_null_type.rb +0 -41
- data/test/field_type/test_string_type.rb +0 -18
- data/test/field_type/test_timestamp_type.rb +0 -108
- data/test/field_type/test_unknown_type.rb +0 -35
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -86
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/run +0 -23
- data/test/stat_type/test_nominal_stats.rb +0 -69
- data/test/stat_type/test_numerical_stats.rb +0 -118
- data/test/stat_type/test_ordinal_stats.rb +0 -92
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -89
- data/test/test_field_stats.rb +0 -134
- data/test/test_field_type.rb +0 -34
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -89
data/test/test_merge_tree.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestMergeTree < ::Minitest::Test
|
4
|
-
def setup
|
5
|
-
@dataset_count = 20
|
6
|
-
@records_per_dataset = 100
|
7
|
-
@records = []
|
8
|
-
@datasets = Array.new.tap do |a|
|
9
|
-
@dataset_count.times do
|
10
|
-
dd = DeviceDataset.new(count: @records_per_dataset)
|
11
|
-
dd.persist_sorted_records_as_jsonl
|
12
|
-
@records.concat(dd.records)
|
13
|
-
a << dd
|
14
|
-
end
|
15
|
-
end
|
16
|
-
@compare_fields = @datasets.first.compare_fields
|
17
|
-
@readers = @datasets.map { |dd|
|
18
|
-
::FlatKit::Jsonl::Reader.new(source: dd.filename_sorted_jsonl, compare_fields: @compare_fields)
|
19
|
-
}
|
20
|
-
end
|
21
|
-
|
22
|
-
def teardown
|
23
|
-
@datasets.each do |ds|
|
24
|
-
ds.cleanup_files
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_init_tree
|
29
|
-
tree = ::FlatKit::MergeTree.new(@readers)
|
30
|
-
assert_equal(20, tree.leaves.size)
|
31
|
-
|
32
|
-
assert_equal(5, tree.depth)
|
33
|
-
|
34
|
-
# 0th level should have 10 nodes - since 20 leaves
|
35
|
-
assert_equal(10, tree.levels[0].size)
|
36
|
-
|
37
|
-
# 1st level should have 5 nodes - since 10 nodes lower
|
38
|
-
assert_equal(5, tree.levels[1].size)
|
39
|
-
|
40
|
-
# 2nd level should have 3 nodes - since 5 above (and we shim in a Sentinel
|
41
|
-
# node on the last internal node)
|
42
|
-
assert_equal(3, tree.levels[2].size)
|
43
|
-
assert_instance_of(::FlatKit::SentinelInternalNode, tree.levels[2].last.right)
|
44
|
-
|
45
|
-
# 3rd level should have 2 nodes
|
46
|
-
assert_equal(2, tree.levels[3].size)
|
47
|
-
|
48
|
-
# 4th level should have 1 nodes
|
49
|
-
assert_equal(1, tree.levels[4].size)
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_merging
|
53
|
-
expected_records = @records.sort_by { |r| @compare_fields.map { |f| r[f] } }
|
54
|
-
tree = ::FlatKit::MergeTree.new(@readers)
|
55
|
-
actual_records = tree.to_a.map { |r| r.to_hash }
|
56
|
-
|
57
|
-
assert_equal(expected_records.size, actual_records.size)
|
58
|
-
|
59
|
-
expected_records.each_with_index do |expected, idx|
|
60
|
-
actual = actual_records[idx]
|
61
|
-
assert_equal(expected, actual)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
data/test/test_version.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestVersion < ::Minitest::Test
|
4
|
-
def test_version_constant_match
|
5
|
-
assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION)
|
6
|
-
end
|
7
|
-
|
8
|
-
def test_version_string_match
|
9
|
-
assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION.to_s)
|
10
|
-
end
|
11
|
-
end
|
data/test/xsv/test_format.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestFormat < ::Minitest::Test
|
5
|
-
|
6
|
-
def test_handles_csv
|
7
|
-
assert(::FlatKit::Xsv::Format.handles?("csv"))
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_handles_tsv
|
11
|
-
assert(::FlatKit::Xsv::Format.handles?("tsv"))
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_handles_txt
|
15
|
-
assert(::FlatKit::Xsv::Format.handles?("txt"))
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_does_not_handle_json
|
19
|
-
refute(::FlatKit::Xsv::Format.handles?("json"))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
data/test/xsv/test_reader.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestReader < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@test_path = "tmp/test_reads_from_io.csv"
|
10
|
-
|
11
|
-
File.open(@test_path, "wb") do |f|
|
12
|
-
f.write(@dataset.records_as_csv)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def teardown
|
17
|
-
File.unlink(@test_path) if File.exist?(@test_path)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_fields
|
21
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
22
|
-
reader.to_a
|
23
|
-
assert_equal(@dataset.fields, reader.fields)
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_raises_error_on_invalid_source
|
27
|
-
assert_raises(::FlatKit::Error) {
|
28
|
-
::FlatKit::Xsv::Reader.new(source: Object.new, compare_fields: nil)
|
29
|
-
}
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_automatically_figures_out_fields_if_needed
|
33
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path)
|
34
|
-
reader.take(1)
|
35
|
-
assert_equal(@dataset.fields, reader.fields)
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_reads_from_pathname
|
39
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
40
|
-
all = reader.to_a
|
41
|
-
assert_equal(@count, reader.count)
|
42
|
-
assert_equal(@count, all.size)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_reads_from_io
|
46
|
-
File.open(@test_path) do |f|
|
47
|
-
reader = ::FlatKit::Xsv::Reader.new(source: f, compare_fields: @compare_fields)
|
48
|
-
all = reader.to_a
|
49
|
-
assert_equal(@count, reader.count)
|
50
|
-
assert_equal(@count, all.size)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_raises_error_on_io_error
|
55
|
-
s = StringIO.new
|
56
|
-
s.close_read
|
57
|
-
reader = ::FlatKit::Xsv::Reader.new(source: s, compare_fields: @compare_fields)
|
58
|
-
assert_raises(::FlatKit::Error) { reader.to_a }
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
data/test/xsv/test_record.rb
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
require 'faker'
|
3
|
-
require 'byebug'
|
4
|
-
|
5
|
-
module TestXsv
|
6
|
-
class TestRecord< ::Minitest::Test
|
7
|
-
def setup
|
8
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
9
|
-
@csv_row = @one_row_dataset.records_as_csv_rows.first
|
10
|
-
@compare_fields = @one_row_dataset.compare_fields
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_initializes_from_data
|
14
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
15
|
-
original_record = @one_row_dataset.records.first
|
16
|
-
@compare_fields.each do |field|
|
17
|
-
assert_equal(original_record[field], record[field])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_ignores_non_compare_fields_values
|
22
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
23
|
-
refute(record["version"])
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_is_sortable
|
27
|
-
dataset = DeviceDataset.new(count: 20)
|
28
|
-
fk_records = Array.new.tap do |a|
|
29
|
-
dataset.records_as_csv_rows.each do |csv_row|
|
30
|
-
a << FlatKit::Xsv::Record.new(data: csv_row, compare_fields: @compare_fields)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
sorted = fk_records.sort
|
35
|
-
output_text = CSV.generate('', headers: dataset.fields, write_headers: true) do |csv|
|
36
|
-
sorted.each do |row|
|
37
|
-
csv << row.data
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
assert_equal(output_text, dataset.sorted_records_as_csv)
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_to_hash
|
45
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
46
|
-
h = record.to_hash
|
47
|
-
assert_equal(@one_row_dataset.records.first, h)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_from_record
|
51
|
-
rec1 = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
52
|
-
rec2 = FlatKit::Xsv::Record.from_record(rec1)
|
53
|
-
assert_equal(rec1, rec2)
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_incomplete_initialization
|
57
|
-
assert_raises(FlatKit::Error) {
|
58
|
-
FlatKit::Xsv::Record.new(data: nil, compare_fields: [])
|
59
|
-
}
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_to_s_from_csv_record
|
63
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
64
|
-
line = record.to_s
|
65
|
-
expected = @one_row_dataset.records_as_csv_rows[0].to_csv
|
66
|
-
assert_equal(expected, line)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
data/test/xsv/test_writer.rb
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestWriter < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@write_path = "tmp/test_writes_to_io.csv"
|
10
|
-
@read_path = "tmp/test_read.csv"
|
11
|
-
|
12
|
-
File.open(@read_path, "wb") do |f|
|
13
|
-
f.write(@dataset.records_as_csv)
|
14
|
-
end
|
15
|
-
|
16
|
-
@reader = ::FlatKit::Xsv::Reader.new(source: @read_path, compare_fields: @compare_fields)
|
17
|
-
@records = @reader.to_a
|
18
|
-
end
|
19
|
-
|
20
|
-
def teardown
|
21
|
-
File.unlink(@write_path) if File.exist?(@write_path)
|
22
|
-
File.unlink(@read_path) if File.exist?(@read_path)
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_raises_error_on_invalid_destination
|
26
|
-
assert_raises(::FlatKit::Error) {
|
27
|
-
::FlatKit::Xsv::Writer.new(destination: Object.new, fields: @reader.fields)
|
28
|
-
}
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_writes_to_pathname
|
32
|
-
writer = ::FlatKit::Xsv::Writer.new(destination: @write_path, fields: @reader.fields)
|
33
|
-
@records.each do |r|
|
34
|
-
writer.write(r)
|
35
|
-
end
|
36
|
-
writer.close
|
37
|
-
assert_equal(@count, writer.count)
|
38
|
-
|
39
|
-
expected = @dataset.records_as_csv
|
40
|
-
actual = IO.read(@write_path)
|
41
|
-
assert_equal(expected, actual)
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_position
|
45
|
-
File.open(@write_path, "w+") do |f|
|
46
|
-
writer = ::FlatKit::Xsv::Writer.new(destination: f,fields: :auto)
|
47
|
-
records_bytes = 0
|
48
|
-
header_bytes = nil
|
49
|
-
|
50
|
-
@records.each_with_index do |r, idx|
|
51
|
-
record_length = r.to_s.bytesize
|
52
|
-
|
53
|
-
position = writer.write(r)
|
54
|
-
# make sure write stores the last_position api and returns that value
|
55
|
-
assert_equal(position, writer.last_position)
|
56
|
-
|
57
|
-
header_bytes = writer.header_bytes if header_bytes == nil
|
58
|
-
assert(header_bytes > 0)
|
59
|
-
|
60
|
-
assert_equal(idx, position.index)
|
61
|
-
assert_equal(header_bytes + records_bytes, position.offset)
|
62
|
-
assert_equal(record_length, position.bytesize)
|
63
|
-
|
64
|
-
records_bytes += record_length
|
65
|
-
|
66
|
-
current_position = writer.current_position
|
67
|
-
assert_equal(idx+1, current_position.index)
|
68
|
-
assert_equal(header_bytes + records_bytes, current_position.offset)
|
69
|
-
assert_equal(0, current_position.bytesize)
|
70
|
-
|
71
|
-
end
|
72
|
-
writer.close
|
73
|
-
|
74
|
-
assert_equal(@count, writer.count)
|
75
|
-
|
76
|
-
expected = @dataset.records_as_csv
|
77
|
-
actual = IO.read(@write_path)
|
78
|
-
assert_equal(expected, actual)
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def test_raises_error_on_io_error
|
83
|
-
s = StringIO.new
|
84
|
-
writer = ::FlatKit::Xsv::Writer.new(destination: s, fields: @reader.fields)
|
85
|
-
s.close_write
|
86
|
-
assert_raises(::FlatKit::Error) { writer.write(@records.first) }
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|