flat_kit 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +9 -0
- data/Manifest.txt +3 -42
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +46 -32
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +41 -39
- data/lib/flat_kit/command.rb +10 -11
- data/lib/flat_kit/descendant_tracker.rb +9 -6
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +5 -2
- data/lib/flat_kit/field_stats.rb +31 -26
- data/lib/flat_kit/field_type/boolean_type.rb +9 -5
- data/lib/flat_kit/field_type/date_type.rb +19 -17
- data/lib/flat_kit/field_type/float_type.rb +15 -9
- data/lib/flat_kit/field_type/guess_type.rb +9 -6
- data/lib/flat_kit/field_type/integer_type.rb +6 -4
- data/lib/flat_kit/field_type/null_type.rb +5 -1
- data/lib/flat_kit/field_type/string_type.rb +8 -6
- data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
- data/lib/flat_kit/field_type/unknown_type.rb +12 -8
- data/lib/flat_kit/field_type.rb +52 -44
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +15 -18
- data/lib/flat_kit/jsonl/writer.rb +8 -10
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -18
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +8 -7
- data/lib/flat_kit/position.rb +3 -4
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
- data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
- data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
- data/lib/flat_kit/stat_type.rb +18 -13
- data/lib/flat_kit/stats.rb +12 -14
- data/lib/flat_kit/writer.rb +5 -6
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +13 -10
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +31 -26
- metadata +20 -158
- data/Rakefile +0 -21
- data/examples/stream-active-record-to-csv.rb +0 -42
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/field_type/test_boolean_type.rb +0 -65
- data/test/field_type/test_date_type.rb +0 -71
- data/test/field_type/test_float_type.rb +0 -56
- data/test/field_type/test_guess_type.rb +0 -14
- data/test/field_type/test_integer_type.rb +0 -52
- data/test/field_type/test_null_type.rb +0 -41
- data/test/field_type/test_string_type.rb +0 -18
- data/test/field_type/test_timestamp_type.rb +0 -108
- data/test/field_type/test_unknown_type.rb +0 -35
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -86
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/run +0 -23
- data/test/stat_type/test_nominal_stats.rb +0 -69
- data/test/stat_type/test_numerical_stats.rb +0 -118
- data/test/stat_type/test_ordinal_stats.rb +0 -92
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -89
- data/test/test_field_stats.rb +0 -134
- data/test/test_field_type.rb +0 -34
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -89
data/test/test_merge_tree.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestMergeTree < ::Minitest::Test
|
4
|
-
def setup
|
5
|
-
@dataset_count = 20
|
6
|
-
@records_per_dataset = 100
|
7
|
-
@records = []
|
8
|
-
@datasets = Array.new.tap do |a|
|
9
|
-
@dataset_count.times do
|
10
|
-
dd = DeviceDataset.new(count: @records_per_dataset)
|
11
|
-
dd.persist_sorted_records_as_jsonl
|
12
|
-
@records.concat(dd.records)
|
13
|
-
a << dd
|
14
|
-
end
|
15
|
-
end
|
16
|
-
@compare_fields = @datasets.first.compare_fields
|
17
|
-
@readers = @datasets.map { |dd|
|
18
|
-
::FlatKit::Jsonl::Reader.new(source: dd.filename_sorted_jsonl, compare_fields: @compare_fields)
|
19
|
-
}
|
20
|
-
end
|
21
|
-
|
22
|
-
def teardown
|
23
|
-
@datasets.each do |ds|
|
24
|
-
ds.cleanup_files
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_init_tree
|
29
|
-
tree = ::FlatKit::MergeTree.new(@readers)
|
30
|
-
assert_equal(20, tree.leaves.size)
|
31
|
-
|
32
|
-
assert_equal(5, tree.depth)
|
33
|
-
|
34
|
-
# 0th level should have 10 nodes - since 20 leaves
|
35
|
-
assert_equal(10, tree.levels[0].size)
|
36
|
-
|
37
|
-
# 1st level should have 5 nodes - since 10 nodes lower
|
38
|
-
assert_equal(5, tree.levels[1].size)
|
39
|
-
|
40
|
-
# 2nd level should have 3 nodes - since 5 above (and we shim in a Sentinel
|
41
|
-
# node on the last internal node)
|
42
|
-
assert_equal(3, tree.levels[2].size)
|
43
|
-
assert_instance_of(::FlatKit::SentinelInternalNode, tree.levels[2].last.right)
|
44
|
-
|
45
|
-
# 3rd level should have 2 nodes
|
46
|
-
assert_equal(2, tree.levels[3].size)
|
47
|
-
|
48
|
-
# 4th level should have 1 nodes
|
49
|
-
assert_equal(1, tree.levels[4].size)
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_merging
|
53
|
-
expected_records = @records.sort_by { |r| @compare_fields.map { |f| r[f] } }
|
54
|
-
tree = ::FlatKit::MergeTree.new(@readers)
|
55
|
-
actual_records = tree.to_a.map { |r| r.to_hash }
|
56
|
-
|
57
|
-
assert_equal(expected_records.size, actual_records.size)
|
58
|
-
|
59
|
-
expected_records.each_with_index do |expected, idx|
|
60
|
-
actual = actual_records[idx]
|
61
|
-
assert_equal(expected, actual)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
data/test/test_version.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
class TestVersion < ::Minitest::Test
|
4
|
-
def test_version_constant_match
|
5
|
-
assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION)
|
6
|
-
end
|
7
|
-
|
8
|
-
def test_version_string_match
|
9
|
-
assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION.to_s)
|
10
|
-
end
|
11
|
-
end
|
data/test/xsv/test_format.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestFormat < ::Minitest::Test
|
5
|
-
|
6
|
-
def test_handles_csv
|
7
|
-
assert(::FlatKit::Xsv::Format.handles?("csv"))
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_handles_tsv
|
11
|
-
assert(::FlatKit::Xsv::Format.handles?("tsv"))
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_handles_txt
|
15
|
-
assert(::FlatKit::Xsv::Format.handles?("txt"))
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_does_not_handle_json
|
19
|
-
refute(::FlatKit::Xsv::Format.handles?("json"))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
data/test/xsv/test_reader.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestReader < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@test_path = "tmp/test_reads_from_io.csv"
|
10
|
-
|
11
|
-
File.open(@test_path, "wb") do |f|
|
12
|
-
f.write(@dataset.records_as_csv)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def teardown
|
17
|
-
File.unlink(@test_path) if File.exist?(@test_path)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_fields
|
21
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
22
|
-
reader.to_a
|
23
|
-
assert_equal(@dataset.fields, reader.fields)
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_raises_error_on_invalid_source
|
27
|
-
assert_raises(::FlatKit::Error) {
|
28
|
-
::FlatKit::Xsv::Reader.new(source: Object.new, compare_fields: nil)
|
29
|
-
}
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_automatically_figures_out_fields_if_needed
|
33
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path)
|
34
|
-
reader.take(1)
|
35
|
-
assert_equal(@dataset.fields, reader.fields)
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_reads_from_pathname
|
39
|
-
reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
|
40
|
-
all = reader.to_a
|
41
|
-
assert_equal(@count, reader.count)
|
42
|
-
assert_equal(@count, all.size)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_reads_from_io
|
46
|
-
File.open(@test_path) do |f|
|
47
|
-
reader = ::FlatKit::Xsv::Reader.new(source: f, compare_fields: @compare_fields)
|
48
|
-
all = reader.to_a
|
49
|
-
assert_equal(@count, reader.count)
|
50
|
-
assert_equal(@count, all.size)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_raises_error_on_io_error
|
55
|
-
s = StringIO.new
|
56
|
-
s.close_read
|
57
|
-
reader = ::FlatKit::Xsv::Reader.new(source: s, compare_fields: @compare_fields)
|
58
|
-
assert_raises(::FlatKit::Error) { reader.to_a }
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
data/test/xsv/test_record.rb
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
require 'faker'
|
3
|
-
require 'byebug'
|
4
|
-
|
5
|
-
module TestXsv
|
6
|
-
class TestRecord< ::Minitest::Test
|
7
|
-
def setup
|
8
|
-
@one_row_dataset = DeviceDataset.new(count: 1)
|
9
|
-
@csv_row = @one_row_dataset.records_as_csv_rows.first
|
10
|
-
@compare_fields = @one_row_dataset.compare_fields
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_initializes_from_data
|
14
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
15
|
-
original_record = @one_row_dataset.records.first
|
16
|
-
@compare_fields.each do |field|
|
17
|
-
assert_equal(original_record[field], record[field])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_ignores_non_compare_fields_values
|
22
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
23
|
-
refute(record["version"])
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_is_sortable
|
27
|
-
dataset = DeviceDataset.new(count: 20)
|
28
|
-
fk_records = Array.new.tap do |a|
|
29
|
-
dataset.records_as_csv_rows.each do |csv_row|
|
30
|
-
a << FlatKit::Xsv::Record.new(data: csv_row, compare_fields: @compare_fields)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
sorted = fk_records.sort
|
35
|
-
output_text = CSV.generate('', headers: dataset.fields, write_headers: true) do |csv|
|
36
|
-
sorted.each do |row|
|
37
|
-
csv << row.data
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
assert_equal(output_text, dataset.sorted_records_as_csv)
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_to_hash
|
45
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
46
|
-
h = record.to_hash
|
47
|
-
assert_equal(@one_row_dataset.records.first, h)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_from_record
|
51
|
-
rec1 = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
52
|
-
rec2 = FlatKit::Xsv::Record.from_record(rec1)
|
53
|
-
assert_equal(rec1, rec2)
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_incomplete_initialization
|
57
|
-
assert_raises(FlatKit::Error) {
|
58
|
-
FlatKit::Xsv::Record.new(data: nil, compare_fields: [])
|
59
|
-
}
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_to_s_from_csv_record
|
63
|
-
record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
|
64
|
-
line = record.to_s
|
65
|
-
expected = @one_row_dataset.records_as_csv_rows[0].to_csv
|
66
|
-
assert_equal(expected, line)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
data/test/xsv/test_writer.rb
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
require_relative '../test_helper'
|
2
|
-
|
3
|
-
module TestXsv
|
4
|
-
class TestWriter < ::Minitest::Test
|
5
|
-
def setup
|
6
|
-
@count = 20
|
7
|
-
@dataset = DeviceDataset.new(count: @count)
|
8
|
-
@compare_fields = @dataset.compare_fields
|
9
|
-
@write_path = "tmp/test_writes_to_io.csv"
|
10
|
-
@read_path = "tmp/test_read.csv"
|
11
|
-
|
12
|
-
File.open(@read_path, "wb") do |f|
|
13
|
-
f.write(@dataset.records_as_csv)
|
14
|
-
end
|
15
|
-
|
16
|
-
@reader = ::FlatKit::Xsv::Reader.new(source: @read_path, compare_fields: @compare_fields)
|
17
|
-
@records = @reader.to_a
|
18
|
-
end
|
19
|
-
|
20
|
-
def teardown
|
21
|
-
File.unlink(@write_path) if File.exist?(@write_path)
|
22
|
-
File.unlink(@read_path) if File.exist?(@read_path)
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_raises_error_on_invalid_destination
|
26
|
-
assert_raises(::FlatKit::Error) {
|
27
|
-
::FlatKit::Xsv::Writer.new(destination: Object.new, fields: @reader.fields)
|
28
|
-
}
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_writes_to_pathname
|
32
|
-
writer = ::FlatKit::Xsv::Writer.new(destination: @write_path, fields: @reader.fields)
|
33
|
-
@records.each do |r|
|
34
|
-
writer.write(r)
|
35
|
-
end
|
36
|
-
writer.close
|
37
|
-
assert_equal(@count, writer.count)
|
38
|
-
|
39
|
-
expected = @dataset.records_as_csv
|
40
|
-
actual = IO.read(@write_path)
|
41
|
-
assert_equal(expected, actual)
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_position
|
45
|
-
File.open(@write_path, "w+") do |f|
|
46
|
-
writer = ::FlatKit::Xsv::Writer.new(destination: f,fields: :auto)
|
47
|
-
records_bytes = 0
|
48
|
-
header_bytes = nil
|
49
|
-
|
50
|
-
@records.each_with_index do |r, idx|
|
51
|
-
record_length = r.to_s.bytesize
|
52
|
-
|
53
|
-
position = writer.write(r)
|
54
|
-
# make sure write stores the last_position api and returns that value
|
55
|
-
assert_equal(position, writer.last_position)
|
56
|
-
|
57
|
-
header_bytes = writer.header_bytes if header_bytes == nil
|
58
|
-
assert(header_bytes > 0)
|
59
|
-
|
60
|
-
assert_equal(idx, position.index)
|
61
|
-
assert_equal(header_bytes + records_bytes, position.offset)
|
62
|
-
assert_equal(record_length, position.bytesize)
|
63
|
-
|
64
|
-
records_bytes += record_length
|
65
|
-
|
66
|
-
current_position = writer.current_position
|
67
|
-
assert_equal(idx+1, current_position.index)
|
68
|
-
assert_equal(header_bytes + records_bytes, current_position.offset)
|
69
|
-
assert_equal(0, current_position.bytesize)
|
70
|
-
|
71
|
-
end
|
72
|
-
writer.close
|
73
|
-
|
74
|
-
assert_equal(@count, writer.count)
|
75
|
-
|
76
|
-
expected = @dataset.records_as_csv
|
77
|
-
actual = IO.read(@write_path)
|
78
|
-
assert_equal(expected, actual)
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def test_raises_error_on_io_error
|
83
|
-
s = StringIO.new
|
84
|
-
writer = ::FlatKit::Xsv::Writer.new(destination: s, fields: @reader.fields)
|
85
|
-
s.close_write
|
86
|
-
assert_raises(::FlatKit::Error) { writer.write(@records.first) }
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|