flat_kit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +46 -0
  3. data/HISTORY.md +5 -0
  4. data/LICENSE.txt +21 -0
  5. data/Manifest.txt +66 -0
  6. data/README.md +80 -0
  7. data/Rakefile +20 -0
  8. data/bin/fk +4 -0
  9. data/lib/flat_kit.rb +23 -0
  10. data/lib/flat_kit/cli.rb +80 -0
  11. data/lib/flat_kit/command.rb +53 -0
  12. data/lib/flat_kit/command/cat.rb +93 -0
  13. data/lib/flat_kit/command/merge.rb +88 -0
  14. data/lib/flat_kit/command/sort.rb +88 -0
  15. data/lib/flat_kit/descendant_tracker.rb +27 -0
  16. data/lib/flat_kit/error.rb +5 -0
  17. data/lib/flat_kit/format.rb +34 -0
  18. data/lib/flat_kit/input.rb +32 -0
  19. data/lib/flat_kit/input/file.rb +53 -0
  20. data/lib/flat_kit/input/io.rb +54 -0
  21. data/lib/flat_kit/internal_node.rb +84 -0
  22. data/lib/flat_kit/jsonl.rb +8 -0
  23. data/lib/flat_kit/jsonl/format.rb +25 -0
  24. data/lib/flat_kit/jsonl/reader.rb +30 -0
  25. data/lib/flat_kit/jsonl/record.rb +84 -0
  26. data/lib/flat_kit/jsonl/writer.rb +45 -0
  27. data/lib/flat_kit/leaf_node.rb +71 -0
  28. data/lib/flat_kit/logger.rb +39 -0
  29. data/lib/flat_kit/merge.rb +35 -0
  30. data/lib/flat_kit/merge_tree.rb +104 -0
  31. data/lib/flat_kit/output.rb +32 -0
  32. data/lib/flat_kit/output/file.rb +55 -0
  33. data/lib/flat_kit/output/io.rb +73 -0
  34. data/lib/flat_kit/reader.rb +61 -0
  35. data/lib/flat_kit/record.rb +83 -0
  36. data/lib/flat_kit/sentinel_internal_node.rb +37 -0
  37. data/lib/flat_kit/sentinel_leaf_node.rb +37 -0
  38. data/lib/flat_kit/sort.rb +35 -0
  39. data/lib/flat_kit/writer.rb +38 -0
  40. data/lib/flat_kit/xsv.rb +8 -0
  41. data/lib/flat_kit/xsv/format.rb +25 -0
  42. data/lib/flat_kit/xsv/reader.rb +45 -0
  43. data/lib/flat_kit/xsv/record.rb +90 -0
  44. data/lib/flat_kit/xsv/writer.rb +70 -0
  45. data/tasks/default.rake +242 -0
  46. data/tasks/extension.rake +38 -0
  47. data/tasks/man.rake +7 -0
  48. data/tasks/this.rb +208 -0
  49. data/test/device_dataset.rb +117 -0
  50. data/test/input/test_file.rb +73 -0
  51. data/test/input/test_io.rb +93 -0
  52. data/test/jsonl/test_format.rb +22 -0
  53. data/test/jsonl/test_reader.rb +49 -0
  54. data/test/jsonl/test_record.rb +61 -0
  55. data/test/jsonl/test_writer.rb +68 -0
  56. data/test/output/test_file.rb +60 -0
  57. data/test/output/test_io.rb +104 -0
  58. data/test/test_conversions.rb +45 -0
  59. data/test/test_format.rb +24 -0
  60. data/test/test_helper.rb +26 -0
  61. data/test/test_merge.rb +40 -0
  62. data/test/test_merge_tree.rb +64 -0
  63. data/test/test_version.rb +11 -0
  64. data/test/xsv/test_format.rb +22 -0
  65. data/test/xsv/test_reader.rb +61 -0
  66. data/test/xsv/test_record.rb +69 -0
  67. data/test/xsv/test_writer.rb +68 -0
  68. metadata +237 -0
@@ -0,0 +1,22 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestJsonl
4
+ class TestFormat < ::Minitest::Test
5
+
6
+ def test_handles_json
7
+ assert(::FlatKit::Jsonl::Format.handles?("data.json.gz"))
8
+ end
9
+
10
+ def test_handles_jsonl
11
+ assert(::FlatKit::Jsonl::Format.handles?("data.jsonl"))
12
+ end
13
+
14
+ def test_handles_ndjson
15
+ assert(::FlatKit::Jsonl::Format.handles?("log.ndjson"))
16
+ end
17
+
18
+ def test_does_not_handle_csv
19
+ refute(::FlatKit::Jsonl::Format.handles?("data.csv"))
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,49 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestJsonl
4
+ class TestReader < ::Minitest::Test
5
+ def setup
6
+ @count = 20
7
+ @dataset = DeviceDataset.new(count: @count)
8
+ @compare_fields = @dataset.compare_fields
9
+ @test_path = "tmp/test_reads_from_io.jsonl"
10
+
11
+ File.open(@test_path, "wb") do |f|
12
+ f.write(@dataset.records_as_jsonl)
13
+ end
14
+ end
15
+
16
+ def teardown
17
+ File.unlink(@test_path) if File.exist?(@test_path)
18
+ end
19
+
20
+ def test_raises_error_on_invalid_source
21
+ assert_raises(::FlatKit::Error) {
22
+ ::FlatKit::Jsonl::Reader.new(source: Object.new, compare_fields: nil)
23
+ }
24
+ end
25
+
26
+ def test_reads_from_pathname
27
+ reader = ::FlatKit::Jsonl::Reader.new(source: @test_path, compare_fields: @compare_fields)
28
+ all = reader.to_a
29
+ assert_equal(@count, reader.count)
30
+ assert_equal(@count, all.size)
31
+ end
32
+
33
+ def test_reads_from_io
34
+ File.open(@test_path) do |f|
35
+ reader = ::FlatKit::Jsonl::Reader.new(source: f, compare_fields: @compare_fields)
36
+ all = reader.to_a
37
+ assert_equal(@count, reader.count)
38
+ assert_equal(@count, all.size)
39
+ end
40
+ end
41
+
42
+ def test_raises_error_on_io_error
43
+ s = StringIO.new
44
+ s.close_read
45
+ reader = ::FlatKit::Jsonl::Reader.new(source: s, compare_fields: @compare_fields)
46
+ assert_raises(::FlatKit::Error) { reader.to_a }
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,61 @@
1
+ require_relative '../test_helper'
2
+ require 'faker'
3
+ require 'byebug'
4
+
5
+ module TestJsonl
6
+ class TestRecord < ::Minitest::Test
7
+ def setup
8
+ @one_row_dataset = DeviceDataset.new(count: 1)
9
+ @src_record = @one_row_dataset.records.first
10
+ @src_json = JSON.generate(@src_record)
11
+ @compare_fields = @one_row_dataset.compare_fields
12
+ end
13
+
14
+ def test_initializes_from_data
15
+ record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
16
+ @compare_fields.each do |k|
17
+ assert_equal(@src_record[k], record[k])
18
+ end
19
+ end
20
+
21
+ def test_ignores_non_compare_fields_values
22
+ record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
23
+
24
+ refute(record["version"])
25
+ end
26
+
27
+ def test_is_sortable
28
+ dataset = DeviceDataset.new(count: 20)
29
+ fk_records = Array.new.tap do |a|
30
+ dataset.records.each do |r|
31
+ data = JSON.generate(r)
32
+ record = FlatKit::Jsonl::Record.new(data: data, compare_fields: @compare_fields)
33
+ a << record
34
+ end
35
+ end
36
+
37
+ sorted = fk_records.sort
38
+
39
+ sio = StringIO.new
40
+ sorted.each do |r|
41
+ sio.puts(r.to_s)
42
+ end
43
+
44
+ sorted_string = sio.string
45
+ assert_equal(dataset.sorted_records_as_jsonl, sorted_string)
46
+ end
47
+
48
+ def test_converts_to_hash
49
+ record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
50
+ h = record.to_hash
51
+
52
+ assert_equal(@src_record, h)
53
+ end
54
+
55
+ def test_converts_from_record
56
+ rec1 = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
57
+ rec2 = FlatKit::Jsonl::Record.from_record(rec1)
58
+ assert_equal(rec1, rec2)
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,68 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestJsonl
4
+ class TestWriter < ::Minitest::Test
5
+ def setup
6
+ @count = 20
7
+ @dataset = DeviceDataset.new(count: @count)
8
+ @compare_fields = @dataset.compare_fields
9
+ @write_path = "tmp/test_writes_to_io.jsonl"
10
+ @read_path = "tmp/test_read.jsonl"
11
+
12
+ File.open(@read_path, "wb") do |f|
13
+ f.write(@dataset.records_as_jsonl)
14
+ end
15
+
16
+ @reader = ::FlatKit::Jsonl::Reader.new(source: @read_path, compare_fields: @compare_fields)
17
+ @records = @reader.to_a
18
+ end
19
+
20
+ def teardown
21
+ File.unlink(@write_path) if File.exist?(@write_path)
22
+ File.unlink(@read_path) if File.exist?(@read_path)
23
+ end
24
+
25
+ def test_raises_error_on_invalid_destination
26
+ assert_raises(::FlatKit::Error) {
27
+ ::FlatKit::Jsonl::Writer.new(destination: Object.new)
28
+ }
29
+ end
30
+
31
+ def test_writes_to_pathname
32
+ writer = ::FlatKit::Jsonl::Writer.new(destination: @write_path)
33
+ @records.each do |r|
34
+ writer.write(r)
35
+ end
36
+ writer.close
37
+ assert_equal(@count, writer.count)
38
+
39
+ expected = @dataset.records_as_jsonl
40
+ actual = IO.read(@write_path)
41
+ assert_equal(expected, actual)
42
+ end
43
+
44
+ def test_writes_to_io
45
+ File.open(@write_path, "w+") do |f|
46
+ writer = ::FlatKit::Jsonl::Writer.new(destination: f)
47
+
48
+ @records.each do |r|
49
+ writer.write(r)
50
+ end
51
+ writer.close
52
+
53
+ assert_equal(@count, writer.count)
54
+
55
+ expected = @dataset.records_as_jsonl
56
+ actual = IO.read(@write_path)
57
+ assert_equal(expected, actual)
58
+ end
59
+ end
60
+
61
+ def test_raises_error_on_io_error
62
+ s = StringIO.new
63
+ s.close_write
64
+ writer = ::FlatKit::Jsonl::Writer.new(destination: s)
65
+ assert_raises(::FlatKit::Error) { writer.write(@records.first) }
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,60 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestOutput
4
+ class TestFile < ::Minitest::Test
5
+ def test_does_not_handle_stderr_text
6
+ ::FlatKit::Output::IO::STDERRS.each do |e|
7
+ refute(::FlatKit::Output::File.handles?(e), "#{e} is not stderr text")
8
+ end
9
+ end
10
+
11
+ def test_only_handles_string
12
+ refute(::FlatKit::Output::File.handles?(Object.new))
13
+ end
14
+
15
+ def test_doest_not_handles_stdout_text
16
+ ::FlatKit::Output::IO::STDOUTS.each do |e|
17
+ refute(::FlatKit::Output::File.handles?(e), "#{e} is not stdout text")
18
+ end
19
+ end
20
+
21
+ def test_init_from_path
22
+ test_path = "tmp/test_init_from_path.txt"
23
+ begin
24
+ io = ::FlatKit::Output::File.new(test_path)
25
+ assert_equal(test_path, io.name)
26
+ assert_instance_of(::File, io.io)
27
+ ensure
28
+ File.unlink(test_path) if File.exist?(test_path)
29
+ end
30
+ end
31
+
32
+ def test_writes_to_file
33
+ test_path = "tmp/test_writes_to_file.txt"
34
+ begin
35
+ output = ::FlatKit::Output::File.new(test_path)
36
+ assert_equal(test_path, output.name)
37
+ output.io.write("test_writes_to_file output")
38
+ output.close
39
+ t = IO.read(test_path)
40
+ assert_equal("test_writes_to_file output", t)
41
+ ensure
42
+ File.unlink(test_path) if File.exist?(test_path)
43
+ end
44
+ end
45
+
46
+ def test_writes_to_gzfile
47
+ test_path = "tmp/test_writes_to_gzfile.txt.gz"
48
+ begin
49
+ output = ::FlatKit::Output::File.new(test_path)
50
+ assert_equal(test_path, output.name)
51
+ output.io.write("test_writes_to_gzfile output")
52
+ output.close
53
+ t = %x[ gunzip -c #{test_path} ]
54
+ assert_equal("test_writes_to_gzfile output", t)
55
+ ensure
56
+ File.unlink(test_path) if File.exist?(test_path)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,104 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestOutput
4
+ class NullIO < ::IO
5
+ def initialize()
6
+ end
7
+ end
8
+
9
+ class TestIO < ::Minitest::Test
10
+ def test_handles_stderr_text
11
+ ::FlatKit::Output::IO::STDERRS.each do |e|
12
+ assert(::FlatKit::Output::IO.handles?(e), "#{e} is not stderr text")
13
+ end
14
+ end
15
+
16
+ def test_handles_stderr_io
17
+ x = $stderr
18
+ assert(::FlatKit::Output::IO.handles?(x), "is not stderr")
19
+ end
20
+
21
+ def test_handles_stdout_text
22
+ ::FlatKit::Output::IO::STDOUTS.each do |e|
23
+ assert(::FlatKit::Output::IO.handles?(e), "#{e} is not stdout text")
24
+ end
25
+ end
26
+
27
+ def test_handles_stdout_io
28
+ x = $stderr
29
+ assert(::FlatKit::Output::IO.handles?(x), "is not stdout")
30
+ end
31
+
32
+ def test_handles_stringio
33
+ assert(::FlatKit::Output::IO.handles?(StringIO.new))
34
+ end
35
+
36
+ def test_does_not_handle_other
37
+ x = Object.new
38
+ refute(::FlatKit::Output::IO.handles?(x))
39
+ end
40
+
41
+ def test_init_from_dash
42
+ io = ::FlatKit::Output::IO.new("-")
43
+ assert_equal("<STDOUT>", io.name)
44
+ assert_equal(::STDOUT, io.io)
45
+ end
46
+
47
+ def test_init_from_stderr_text
48
+ io = ::FlatKit::Output::IO.new("stderr")
49
+ assert_equal("<STDERR>", io.name)
50
+ assert_equal(::STDERR, io.io)
51
+ end
52
+
53
+ def test_init_from_file_object
54
+ test_path = "tmp/test_init_from_file_object.txt"
55
+ begin
56
+ File.open(test_path, "w") do |f|
57
+ io = ::FlatKit::Output::IO.new(f)
58
+ assert_equal(test_path, io.name)
59
+ assert_instance_of(::File, io.io)
60
+ end
61
+ ensure
62
+ File.unlink(test_path) if File.exist?(test_path)
63
+ end
64
+ end
65
+
66
+ def test_init_from_stdout
67
+ io = ::FlatKit::Output::IO.new($stdout)
68
+ assert_equal("<STDOUT>", io.name)
69
+ assert_equal(::STDOUT, io.io)
70
+ end
71
+
72
+ def test_init_from_string_io_object
73
+ sio = StringIO.new
74
+ io = ::FlatKit::Output::IO.new(sio)
75
+ assert_match(/StringIO/, io.name)
76
+ assert_instance_of(::StringIO, io.io)
77
+ end
78
+
79
+ def test_init_from_io_object
80
+ null_io = NullIO.new
81
+ io = ::FlatKit::Output::IO.new(null_io)
82
+ assert_match(/NullIO/, io.name)
83
+ assert_instance_of(::TestOutput::NullIO, io.io)
84
+ end
85
+
86
+ def test_writes_to_io
87
+ test_path = "tmp/test_writes_to_io.txt"
88
+ begin
89
+ File.open(test_path, "w") do |f|
90
+ io = ::FlatKit::Output::IO.new(f)
91
+ assert_equal(test_path, io.name)
92
+ assert_instance_of(::File, io.io)
93
+ io.io.write("test_writes_to_io output")
94
+ io.close
95
+ end
96
+ t = IO.read(test_path)
97
+ assert_equal("test_writes_to_io output", t)
98
+ ensure
99
+ File.unlink(test_path) if File.exist?(test_path)
100
+ end
101
+ end
102
+
103
+ end
104
+ end
@@ -0,0 +1,45 @@
1
+ require 'test_helper'
2
+
3
+ class TestConversions < ::Minitest::Test
4
+ def setup
5
+ @one_row_dataset = DeviceDataset.new(count: 1)
6
+ @src_record = @one_row_dataset.records.first
7
+ @csv_row = @one_row_dataset.records_as_csv_rows.first
8
+ @compare_fields = @one_row_dataset.compare_fields
9
+ end
10
+
11
+ def test_from_csv_to_json
12
+ xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
13
+ json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
14
+
15
+ assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
16
+ assert_equal(@one_row_dataset.records.first, json_record.to_hash)
17
+ assert_equal(xsv_record, json_record)
18
+ end
19
+
20
+ def test_from_json_to_csv
21
+ src_json = JSON.generate(@src_record)
22
+ json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
23
+ xsv_record = FlatKit::Xsv::Record.from_record(json_record)
24
+
25
+ assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
26
+ assert_equal(@one_row_dataset.records.first, json_record.to_hash)
27
+ assert_equal(xsv_record, json_record)
28
+ end
29
+
30
+ def test_roundtrip_csv_json_csv
31
+ xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
32
+ json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
33
+ xsv2 = FlatKit::Xsv::Record.from_record(json_record)
34
+
35
+ assert_equal(xsv_record.to_s, xsv2.to_s)
36
+ end
37
+
38
+ def test_roundtrip_json_csv_json
39
+ src_json = JSON.generate(@src_record)
40
+ json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
41
+ xsv_record = FlatKit::Xsv::Record.from_record(json_record)
42
+ json2 = FlatKit::Jsonl::Record.from_record(xsv_record)
43
+ assert_equal(src_json, json2.to_s)
44
+ end
45
+ end
@@ -0,0 +1,24 @@
1
+ require_relative 'test_helper'
2
+
3
+ class TestFormat < ::Minitest::Test
4
+ def test_finds_jsonl_format
5
+ klass = ::FlatKit::Format.for("data.json.gz")
6
+ assert_equal(::FlatKit::Jsonl::Format, klass)
7
+ end
8
+
9
+ def test_finds_xsv_format
10
+ klass = ::FlatKit::Format.for("data.csv.gz")
11
+ assert_equal(::FlatKit::Xsv::Format, klass)
12
+ end
13
+
14
+ def test_finds_jsonl_format_for_full_path
15
+ klass = ::FlatKit::Format.for("tmp/sorted/foo.jsonl")
16
+ assert_equal(::FlatKit::Jsonl::Format, klass)
17
+ end
18
+
19
+ def test_finds_jsonl_format_with_fallback
20
+ path = "tmp/sorted/foo.json"
21
+ klass = ::FlatKit::Format.for_with_fallback!(path: path, fallback: "auto")
22
+ assert_equal(::FlatKit::Jsonl::Format, klass)
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ require 'simplecov'
2
+ SimpleCov.start if ENV['COVERAGE']
3
+
4
+ require 'byebug'
5
+
6
+ require 'minitest/autorun'
7
+ require 'minitest/focus'
8
+ require 'minitest/pride'
9
+
10
+ module TestHelper
11
+ def scratch_dir
12
+ p = Pathname.new(__FILE__).parent.parent.join('tmp/testing_scratch')
13
+ p.mkpath
14
+ p
15
+ end
16
+
17
+ def generate_slug(length: 10)
18
+ SecureRandom.alphanumeric(10)
19
+ end
20
+
21
+ def scratch_file(prefix: "test_", slug: generate_slug, extension: ".jsonl")
22
+ scratch_dir.join("#{prefix}#{slug}#{extension}")
23
+ end
24
+ end
25
+ require_relative '../lib/flat_kit'
26
+ require_relative './device_dataset'