flat_kit 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +9 -0
  4. data/Manifest.txt +3 -42
  5. data/{bin → exe}/fk +2 -1
  6. data/flat_kit.gemspec +33 -0
  7. data/lib/flat_kit/cli.rb +46 -32
  8. data/lib/flat_kit/command/cat.rb +34 -32
  9. data/lib/flat_kit/command/merge.rb +37 -36
  10. data/lib/flat_kit/command/sort.rb +37 -37
  11. data/lib/flat_kit/command/stats.rb +41 -39
  12. data/lib/flat_kit/command.rb +10 -11
  13. data/lib/flat_kit/descendant_tracker.rb +9 -6
  14. data/lib/flat_kit/error.rb +4 -0
  15. data/lib/flat_kit/event_emitter.rb +5 -2
  16. data/lib/flat_kit/field_stats.rb +31 -26
  17. data/lib/flat_kit/field_type/boolean_type.rb +9 -5
  18. data/lib/flat_kit/field_type/date_type.rb +19 -17
  19. data/lib/flat_kit/field_type/float_type.rb +15 -9
  20. data/lib/flat_kit/field_type/guess_type.rb +9 -6
  21. data/lib/flat_kit/field_type/integer_type.rb +6 -4
  22. data/lib/flat_kit/field_type/null_type.rb +5 -1
  23. data/lib/flat_kit/field_type/string_type.rb +8 -6
  24. data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
  25. data/lib/flat_kit/field_type/unknown_type.rb +12 -8
  26. data/lib/flat_kit/field_type.rb +52 -44
  27. data/lib/flat_kit/format.rb +11 -5
  28. data/lib/flat_kit/input/file.rb +11 -9
  29. data/lib/flat_kit/input/io.rb +18 -21
  30. data/lib/flat_kit/input.rb +8 -7
  31. data/lib/flat_kit/internal_node.rb +22 -19
  32. data/lib/flat_kit/jsonl/format.rb +6 -2
  33. data/lib/flat_kit/jsonl/reader.rb +7 -4
  34. data/lib/flat_kit/jsonl/record.rb +15 -18
  35. data/lib/flat_kit/jsonl/writer.rb +8 -10
  36. data/lib/flat_kit/jsonl.rb +8 -4
  37. data/lib/flat_kit/leaf_node.rb +6 -5
  38. data/lib/flat_kit/log_formatter.rb +20 -0
  39. data/lib/flat_kit/logger.rb +12 -19
  40. data/lib/flat_kit/merge.rb +21 -18
  41. data/lib/flat_kit/merge_tree.rb +5 -6
  42. data/lib/flat_kit/output/file.rb +13 -9
  43. data/lib/flat_kit/output/io.rb +40 -35
  44. data/lib/flat_kit/output.rb +8 -7
  45. data/lib/flat_kit/position.rb +3 -4
  46. data/lib/flat_kit/reader.rb +8 -8
  47. data/lib/flat_kit/record.rb +12 -12
  48. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  49. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  50. data/lib/flat_kit/sort.rb +8 -9
  51. data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
  52. data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
  53. data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
  54. data/lib/flat_kit/stat_type.rb +18 -13
  55. data/lib/flat_kit/stats.rb +12 -14
  56. data/lib/flat_kit/writer.rb +5 -6
  57. data/lib/flat_kit/xsv/format.rb +6 -2
  58. data/lib/flat_kit/xsv/reader.rb +8 -6
  59. data/lib/flat_kit/xsv/record.rb +21 -15
  60. data/lib/flat_kit/xsv/writer.rb +13 -10
  61. data/lib/flat_kit/xsv.rb +7 -4
  62. data/lib/flat_kit.rb +31 -26
  63. metadata +20 -158
  64. data/Rakefile +0 -21
  65. data/examples/stream-active-record-to-csv.rb +0 -42
  66. data/tasks/default.rake +0 -242
  67. data/tasks/extension.rake +0 -38
  68. data/tasks/man.rake +0 -7
  69. data/tasks/this.rb +0 -208
  70. data/test/device_dataset.rb +0 -117
  71. data/test/field_type/test_boolean_type.rb +0 -65
  72. data/test/field_type/test_date_type.rb +0 -71
  73. data/test/field_type/test_float_type.rb +0 -56
  74. data/test/field_type/test_guess_type.rb +0 -14
  75. data/test/field_type/test_integer_type.rb +0 -52
  76. data/test/field_type/test_null_type.rb +0 -41
  77. data/test/field_type/test_string_type.rb +0 -18
  78. data/test/field_type/test_timestamp_type.rb +0 -108
  79. data/test/field_type/test_unknown_type.rb +0 -35
  80. data/test/input/test_file.rb +0 -73
  81. data/test/input/test_io.rb +0 -93
  82. data/test/jsonl/test_format.rb +0 -22
  83. data/test/jsonl/test_reader.rb +0 -49
  84. data/test/jsonl/test_record.rb +0 -61
  85. data/test/jsonl/test_writer.rb +0 -86
  86. data/test/output/test_file.rb +0 -60
  87. data/test/output/test_io.rb +0 -104
  88. data/test/run +0 -23
  89. data/test/stat_type/test_nominal_stats.rb +0 -69
  90. data/test/stat_type/test_numerical_stats.rb +0 -118
  91. data/test/stat_type/test_ordinal_stats.rb +0 -92
  92. data/test/test_conversions.rb +0 -45
  93. data/test/test_event_emitter.rb +0 -89
  94. data/test/test_field_stats.rb +0 -134
  95. data/test/test_field_type.rb +0 -34
  96. data/test/test_format.rb +0 -24
  97. data/test/test_helper.rb +0 -26
  98. data/test/test_merge.rb +0 -40
  99. data/test/test_merge_tree.rb +0 -64
  100. data/test/test_version.rb +0 -11
  101. data/test/xsv/test_format.rb +0 -22
  102. data/test/xsv/test_reader.rb +0 -61
  103. data/test/xsv/test_record.rb +0 -69
  104. data/test/xsv/test_writer.rb +0 -89
@@ -1,64 +0,0 @@
1
- require 'test_helper'
2
-
3
- class TestMergeTree < ::Minitest::Test
4
- def setup
5
- @dataset_count = 20
6
- @records_per_dataset = 100
7
- @records = []
8
- @datasets = Array.new.tap do |a|
9
- @dataset_count.times do
10
- dd = DeviceDataset.new(count: @records_per_dataset)
11
- dd.persist_sorted_records_as_jsonl
12
- @records.concat(dd.records)
13
- a << dd
14
- end
15
- end
16
- @compare_fields = @datasets.first.compare_fields
17
- @readers = @datasets.map { |dd|
18
- ::FlatKit::Jsonl::Reader.new(source: dd.filename_sorted_jsonl, compare_fields: @compare_fields)
19
- }
20
- end
21
-
22
- def teardown
23
- @datasets.each do |ds|
24
- ds.cleanup_files
25
- end
26
- end
27
-
28
- def test_init_tree
29
- tree = ::FlatKit::MergeTree.new(@readers)
30
- assert_equal(20, tree.leaves.size)
31
-
32
- assert_equal(5, tree.depth)
33
-
34
- # 0th level should have 10 nodes - since 20 leaves
35
- assert_equal(10, tree.levels[0].size)
36
-
37
- # 1st level should have 5 nodes - since 10 nodes lower
38
- assert_equal(5, tree.levels[1].size)
39
-
40
- # 2nd level should have 3 nodes - since 5 above (and we shim in a Sentinel
41
- # node on the last internal node)
42
- assert_equal(3, tree.levels[2].size)
43
- assert_instance_of(::FlatKit::SentinelInternalNode, tree.levels[2].last.right)
44
-
45
- # 3rd level should have 2 nodes
46
- assert_equal(2, tree.levels[3].size)
47
-
48
- # 4th level should have 1 nodes
49
- assert_equal(1, tree.levels[4].size)
50
- end
51
-
52
- def test_merging
53
- expected_records = @records.sort_by { |r| @compare_fields.map { |f| r[f] } }
54
- tree = ::FlatKit::MergeTree.new(@readers)
55
- actual_records = tree.to_a.map { |r| r.to_hash }
56
-
57
- assert_equal(expected_records.size, actual_records.size)
58
-
59
- expected_records.each_with_index do |expected, idx|
60
- actual = actual_records[idx]
61
- assert_equal(expected, actual)
62
- end
63
- end
64
- end
data/test/test_version.rb DELETED
@@ -1,11 +0,0 @@
1
- require 'test_helper'
2
-
3
- class TestVersion < ::Minitest::Test
4
- def test_version_constant_match
5
- assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION)
6
- end
7
-
8
- def test_version_string_match
9
- assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION.to_s)
10
- end
11
- end
@@ -1,22 +0,0 @@
1
- require_relative '../test_helper'
2
-
3
- module TestXsv
4
- class TestFormat < ::Minitest::Test
5
-
6
- def test_handles_csv
7
- assert(::FlatKit::Xsv::Format.handles?("csv"))
8
- end
9
-
10
- def test_handles_tsv
11
- assert(::FlatKit::Xsv::Format.handles?("tsv"))
12
- end
13
-
14
- def test_handles_txt
15
- assert(::FlatKit::Xsv::Format.handles?("txt"))
16
- end
17
-
18
- def test_does_not_handle_json
19
- refute(::FlatKit::Xsv::Format.handles?("json"))
20
- end
21
- end
22
- end
@@ -1,61 +0,0 @@
1
- require_relative '../test_helper'
2
-
3
- module TestXsv
4
- class TestReader < ::Minitest::Test
5
- def setup
6
- @count = 20
7
- @dataset = DeviceDataset.new(count: @count)
8
- @compare_fields = @dataset.compare_fields
9
- @test_path = "tmp/test_reads_from_io.csv"
10
-
11
- File.open(@test_path, "wb") do |f|
12
- f.write(@dataset.records_as_csv)
13
- end
14
- end
15
-
16
- def teardown
17
- File.unlink(@test_path) if File.exist?(@test_path)
18
- end
19
-
20
- def test_fields
21
- reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
22
- reader.to_a
23
- assert_equal(@dataset.fields, reader.fields)
24
- end
25
-
26
- def test_raises_error_on_invalid_source
27
- assert_raises(::FlatKit::Error) {
28
- ::FlatKit::Xsv::Reader.new(source: Object.new, compare_fields: nil)
29
- }
30
- end
31
-
32
- def test_automatically_figures_out_fields_if_needed
33
- reader = ::FlatKit::Xsv::Reader.new(source: @test_path)
34
- reader.take(1)
35
- assert_equal(@dataset.fields, reader.fields)
36
- end
37
-
38
- def test_reads_from_pathname
39
- reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
40
- all = reader.to_a
41
- assert_equal(@count, reader.count)
42
- assert_equal(@count, all.size)
43
- end
44
-
45
- def test_reads_from_io
46
- File.open(@test_path) do |f|
47
- reader = ::FlatKit::Xsv::Reader.new(source: f, compare_fields: @compare_fields)
48
- all = reader.to_a
49
- assert_equal(@count, reader.count)
50
- assert_equal(@count, all.size)
51
- end
52
- end
53
-
54
- def test_raises_error_on_io_error
55
- s = StringIO.new
56
- s.close_read
57
- reader = ::FlatKit::Xsv::Reader.new(source: s, compare_fields: @compare_fields)
58
- assert_raises(::FlatKit::Error) { reader.to_a }
59
- end
60
- end
61
- end
@@ -1,69 +0,0 @@
1
- require 'test_helper'
2
- require 'faker'
3
- require 'byebug'
4
-
5
- module TestXsv
6
- class TestRecord< ::Minitest::Test
7
- def setup
8
- @one_row_dataset = DeviceDataset.new(count: 1)
9
- @csv_row = @one_row_dataset.records_as_csv_rows.first
10
- @compare_fields = @one_row_dataset.compare_fields
11
- end
12
-
13
- def test_initializes_from_data
14
- record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
15
- original_record = @one_row_dataset.records.first
16
- @compare_fields.each do |field|
17
- assert_equal(original_record[field], record[field])
18
- end
19
- end
20
-
21
- def test_ignores_non_compare_fields_values
22
- record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
23
- refute(record["version"])
24
- end
25
-
26
- def test_is_sortable
27
- dataset = DeviceDataset.new(count: 20)
28
- fk_records = Array.new.tap do |a|
29
- dataset.records_as_csv_rows.each do |csv_row|
30
- a << FlatKit::Xsv::Record.new(data: csv_row, compare_fields: @compare_fields)
31
- end
32
- end
33
-
34
- sorted = fk_records.sort
35
- output_text = CSV.generate('', headers: dataset.fields, write_headers: true) do |csv|
36
- sorted.each do |row|
37
- csv << row.data
38
- end
39
- end
40
-
41
- assert_equal(output_text, dataset.sorted_records_as_csv)
42
- end
43
-
44
- def test_to_hash
45
- record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
46
- h = record.to_hash
47
- assert_equal(@one_row_dataset.records.first, h)
48
- end
49
-
50
- def test_from_record
51
- rec1 = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
52
- rec2 = FlatKit::Xsv::Record.from_record(rec1)
53
- assert_equal(rec1, rec2)
54
- end
55
-
56
- def test_incomplete_initialization
57
- assert_raises(FlatKit::Error) {
58
- FlatKit::Xsv::Record.new(data: nil, compare_fields: [])
59
- }
60
- end
61
-
62
- def test_to_s_from_csv_record
63
- record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
64
- line = record.to_s
65
- expected = @one_row_dataset.records_as_csv_rows[0].to_csv
66
- assert_equal(expected, line)
67
- end
68
- end
69
- end
@@ -1,89 +0,0 @@
1
- require_relative '../test_helper'
2
-
3
- module TestXsv
4
- class TestWriter < ::Minitest::Test
5
- def setup
6
- @count = 20
7
- @dataset = DeviceDataset.new(count: @count)
8
- @compare_fields = @dataset.compare_fields
9
- @write_path = "tmp/test_writes_to_io.csv"
10
- @read_path = "tmp/test_read.csv"
11
-
12
- File.open(@read_path, "wb") do |f|
13
- f.write(@dataset.records_as_csv)
14
- end
15
-
16
- @reader = ::FlatKit::Xsv::Reader.new(source: @read_path, compare_fields: @compare_fields)
17
- @records = @reader.to_a
18
- end
19
-
20
- def teardown
21
- File.unlink(@write_path) if File.exist?(@write_path)
22
- File.unlink(@read_path) if File.exist?(@read_path)
23
- end
24
-
25
- def test_raises_error_on_invalid_destination
26
- assert_raises(::FlatKit::Error) {
27
- ::FlatKit::Xsv::Writer.new(destination: Object.new, fields: @reader.fields)
28
- }
29
- end
30
-
31
- def test_writes_to_pathname
32
- writer = ::FlatKit::Xsv::Writer.new(destination: @write_path, fields: @reader.fields)
33
- @records.each do |r|
34
- writer.write(r)
35
- end
36
- writer.close
37
- assert_equal(@count, writer.count)
38
-
39
- expected = @dataset.records_as_csv
40
- actual = IO.read(@write_path)
41
- assert_equal(expected, actual)
42
- end
43
-
44
- def test_position
45
- File.open(@write_path, "w+") do |f|
46
- writer = ::FlatKit::Xsv::Writer.new(destination: f,fields: :auto)
47
- records_bytes = 0
48
- header_bytes = nil
49
-
50
- @records.each_with_index do |r, idx|
51
- record_length = r.to_s.bytesize
52
-
53
- position = writer.write(r)
54
- # make sure write stores the last_position api and returns that value
55
- assert_equal(position, writer.last_position)
56
-
57
- header_bytes = writer.header_bytes if header_bytes == nil
58
- assert(header_bytes > 0)
59
-
60
- assert_equal(idx, position.index)
61
- assert_equal(header_bytes + records_bytes, position.offset)
62
- assert_equal(record_length, position.bytesize)
63
-
64
- records_bytes += record_length
65
-
66
- current_position = writer.current_position
67
- assert_equal(idx+1, current_position.index)
68
- assert_equal(header_bytes + records_bytes, current_position.offset)
69
- assert_equal(0, current_position.bytesize)
70
-
71
- end
72
- writer.close
73
-
74
- assert_equal(@count, writer.count)
75
-
76
- expected = @dataset.records_as_csv
77
- actual = IO.read(@write_path)
78
- assert_equal(expected, actual)
79
- end
80
- end
81
-
82
- def test_raises_error_on_io_error
83
- s = StringIO.new
84
- writer = ::FlatKit::Xsv::Writer.new(destination: s, fields: @reader.fields)
85
- s.close_write
86
- assert_raises(::FlatKit::Error) { writer.write(@records.first) }
87
- end
88
- end
89
- end