flat_kit 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +46 -0
  3. data/HISTORY.md +5 -0
  4. data/LICENSE.txt +21 -0
  5. data/Manifest.txt +66 -0
  6. data/README.md +80 -0
  7. data/Rakefile +20 -0
  8. data/bin/fk +4 -0
  9. data/lib/flat_kit.rb +23 -0
  10. data/lib/flat_kit/cli.rb +80 -0
  11. data/lib/flat_kit/command.rb +53 -0
  12. data/lib/flat_kit/command/cat.rb +93 -0
  13. data/lib/flat_kit/command/merge.rb +88 -0
  14. data/lib/flat_kit/command/sort.rb +88 -0
  15. data/lib/flat_kit/descendant_tracker.rb +27 -0
  16. data/lib/flat_kit/error.rb +5 -0
  17. data/lib/flat_kit/format.rb +34 -0
  18. data/lib/flat_kit/input.rb +32 -0
  19. data/lib/flat_kit/input/file.rb +53 -0
  20. data/lib/flat_kit/input/io.rb +54 -0
  21. data/lib/flat_kit/internal_node.rb +84 -0
  22. data/lib/flat_kit/jsonl.rb +8 -0
  23. data/lib/flat_kit/jsonl/format.rb +25 -0
  24. data/lib/flat_kit/jsonl/reader.rb +30 -0
  25. data/lib/flat_kit/jsonl/record.rb +84 -0
  26. data/lib/flat_kit/jsonl/writer.rb +45 -0
  27. data/lib/flat_kit/leaf_node.rb +71 -0
  28. data/lib/flat_kit/logger.rb +39 -0
  29. data/lib/flat_kit/merge.rb +35 -0
  30. data/lib/flat_kit/merge_tree.rb +104 -0
  31. data/lib/flat_kit/output.rb +32 -0
  32. data/lib/flat_kit/output/file.rb +55 -0
  33. data/lib/flat_kit/output/io.rb +73 -0
  34. data/lib/flat_kit/reader.rb +61 -0
  35. data/lib/flat_kit/record.rb +83 -0
  36. data/lib/flat_kit/sentinel_internal_node.rb +37 -0
  37. data/lib/flat_kit/sentinel_leaf_node.rb +37 -0
  38. data/lib/flat_kit/sort.rb +35 -0
  39. data/lib/flat_kit/writer.rb +38 -0
  40. data/lib/flat_kit/xsv.rb +8 -0
  41. data/lib/flat_kit/xsv/format.rb +25 -0
  42. data/lib/flat_kit/xsv/reader.rb +45 -0
  43. data/lib/flat_kit/xsv/record.rb +90 -0
  44. data/lib/flat_kit/xsv/writer.rb +70 -0
  45. data/tasks/default.rake +242 -0
  46. data/tasks/extension.rake +38 -0
  47. data/tasks/man.rake +7 -0
  48. data/tasks/this.rb +208 -0
  49. data/test/device_dataset.rb +117 -0
  50. data/test/input/test_file.rb +73 -0
  51. data/test/input/test_io.rb +93 -0
  52. data/test/jsonl/test_format.rb +22 -0
  53. data/test/jsonl/test_reader.rb +49 -0
  54. data/test/jsonl/test_record.rb +61 -0
  55. data/test/jsonl/test_writer.rb +68 -0
  56. data/test/output/test_file.rb +60 -0
  57. data/test/output/test_io.rb +104 -0
  58. data/test/test_conversions.rb +45 -0
  59. data/test/test_format.rb +24 -0
  60. data/test/test_helper.rb +26 -0
  61. data/test/test_merge.rb +40 -0
  62. data/test/test_merge_tree.rb +64 -0
  63. data/test/test_version.rb +11 -0
  64. data/test/xsv/test_format.rb +22 -0
  65. data/test/xsv/test_reader.rb +61 -0
  66. data/test/xsv/test_record.rb +69 -0
  67. data/test/xsv/test_writer.rb +68 -0
  68. metadata +237 -0
@@ -0,0 +1,22 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestJsonl
4
+ class TestFormat < ::Minitest::Test
5
+
6
+ def test_handles_json
7
+ assert(::FlatKit::Jsonl::Format.handles?("data.json.gz"))
8
+ end
9
+
10
+ def test_handles_jsonl
11
+ assert(::FlatKit::Jsonl::Format.handles?("data.jsonl"))
12
+ end
13
+
14
+ def test_handles_ndjson
15
+ assert(::FlatKit::Jsonl::Format.handles?("log.ndjson"))
16
+ end
17
+
18
+ def test_does_not_handle_csv
19
+ refute(::FlatKit::Jsonl::Format.handles?("data.csv"))
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,49 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestJsonl
4
+ class TestReader < ::Minitest::Test
5
+ def setup
6
+ @count = 20
7
+ @dataset = DeviceDataset.new(count: @count)
8
+ @compare_fields = @dataset.compare_fields
9
+ @test_path = "tmp/test_reads_from_io.jsonl"
10
+
11
+ File.open(@test_path, "wb") do |f|
12
+ f.write(@dataset.records_as_jsonl)
13
+ end
14
+ end
15
+
16
+ def teardown
17
+ File.unlink(@test_path) if File.exist?(@test_path)
18
+ end
19
+
20
+ def test_raises_error_on_invalid_source
21
+ assert_raises(::FlatKit::Error) {
22
+ ::FlatKit::Jsonl::Reader.new(source: Object.new, compare_fields: nil)
23
+ }
24
+ end
25
+
26
+ def test_reads_from_pathname
27
+ reader = ::FlatKit::Jsonl::Reader.new(source: @test_path, compare_fields: @compare_fields)
28
+ all = reader.to_a
29
+ assert_equal(@count, reader.count)
30
+ assert_equal(@count, all.size)
31
+ end
32
+
33
+ def test_reads_from_io
34
+ File.open(@test_path) do |f|
35
+ reader = ::FlatKit::Jsonl::Reader.new(source: f, compare_fields: @compare_fields)
36
+ all = reader.to_a
37
+ assert_equal(@count, reader.count)
38
+ assert_equal(@count, all.size)
39
+ end
40
+ end
41
+
42
+ def test_raises_error_on_io_error
43
+ s = StringIO.new
44
+ s.close_read
45
+ reader = ::FlatKit::Jsonl::Reader.new(source: s, compare_fields: @compare_fields)
46
+ assert_raises(::FlatKit::Error) { reader.to_a }
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,61 @@
1
+ require_relative '../test_helper'
2
+ require 'faker'
3
+ require 'byebug'
4
+
5
+ module TestJsonl
6
+ class TestRecord < ::Minitest::Test
7
+ def setup
8
+ @one_row_dataset = DeviceDataset.new(count: 1)
9
+ @src_record = @one_row_dataset.records.first
10
+ @src_json = JSON.generate(@src_record)
11
+ @compare_fields = @one_row_dataset.compare_fields
12
+ end
13
+
14
+ def test_initializes_from_data
15
+ record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
16
+ @compare_fields.each do |k|
17
+ assert_equal(@src_record[k], record[k])
18
+ end
19
+ end
20
+
21
+ def test_ignores_non_compare_fields_values
22
+ record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
23
+
24
+ refute(record["version"])
25
+ end
26
+
27
+ def test_is_sortable
28
+ dataset = DeviceDataset.new(count: 20)
29
+ fk_records = Array.new.tap do |a|
30
+ dataset.records.each do |r|
31
+ data = JSON.generate(r)
32
+ record = FlatKit::Jsonl::Record.new(data: data, compare_fields: @compare_fields)
33
+ a << record
34
+ end
35
+ end
36
+
37
+ sorted = fk_records.sort
38
+
39
+ sio = StringIO.new
40
+ sorted.each do |r|
41
+ sio.puts(r.to_s)
42
+ end
43
+
44
+ sorted_string = sio.string
45
+ assert_equal(dataset.sorted_records_as_jsonl, sorted_string)
46
+ end
47
+
48
+ def test_converts_to_hash
49
+ record = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
50
+ h = record.to_hash
51
+
52
+ assert_equal(@src_record, h)
53
+ end
54
+
55
+ def test_converts_from_record
56
+ rec1 = FlatKit::Jsonl::Record.new(data: @src_json, compare_fields: @compare_fields)
57
+ rec2 = FlatKit::Jsonl::Record.from_record(rec1)
58
+ assert_equal(rec1, rec2)
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,68 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestJsonl
4
+ class TestWriter < ::Minitest::Test
5
+ def setup
6
+ @count = 20
7
+ @dataset = DeviceDataset.new(count: @count)
8
+ @compare_fields = @dataset.compare_fields
9
+ @write_path = "tmp/test_writes_to_io.jsonl"
10
+ @read_path = "tmp/test_read.jsonl"
11
+
12
+ File.open(@read_path, "wb") do |f|
13
+ f.write(@dataset.records_as_jsonl)
14
+ end
15
+
16
+ @reader = ::FlatKit::Jsonl::Reader.new(source: @read_path, compare_fields: @compare_fields)
17
+ @records = @reader.to_a
18
+ end
19
+
20
+ def teardown
21
+ File.unlink(@write_path) if File.exist?(@write_path)
22
+ File.unlink(@read_path) if File.exist?(@read_path)
23
+ end
24
+
25
+ def test_raises_error_on_invalid_destination
26
+ assert_raises(::FlatKit::Error) {
27
+ ::FlatKit::Jsonl::Writer.new(destination: Object.new)
28
+ }
29
+ end
30
+
31
+ def test_writes_to_pathname
32
+ writer = ::FlatKit::Jsonl::Writer.new(destination: @write_path)
33
+ @records.each do |r|
34
+ writer.write(r)
35
+ end
36
+ writer.close
37
+ assert_equal(@count, writer.count)
38
+
39
+ expected = @dataset.records_as_jsonl
40
+ actual = IO.read(@write_path)
41
+ assert_equal(expected, actual)
42
+ end
43
+
44
+ def test_writes_to_io
45
+ File.open(@write_path, "w+") do |f|
46
+ writer = ::FlatKit::Jsonl::Writer.new(destination: f)
47
+
48
+ @records.each do |r|
49
+ writer.write(r)
50
+ end
51
+ writer.close
52
+
53
+ assert_equal(@count, writer.count)
54
+
55
+ expected = @dataset.records_as_jsonl
56
+ actual = IO.read(@write_path)
57
+ assert_equal(expected, actual)
58
+ end
59
+ end
60
+
61
+ def test_raises_error_on_io_error
62
+ s = StringIO.new
63
+ s.close_write
64
+ writer = ::FlatKit::Jsonl::Writer.new(destination: s)
65
+ assert_raises(::FlatKit::Error) { writer.write(@records.first) }
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,60 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestOutput
4
+ class TestFile < ::Minitest::Test
5
+ def test_does_not_handle_stderr_text
6
+ ::FlatKit::Output::IO::STDERRS.each do |e|
7
+ refute(::FlatKit::Output::File.handles?(e), "#{e} is not stderr text")
8
+ end
9
+ end
10
+
11
+ def test_only_handles_string
12
+ refute(::FlatKit::Output::File.handles?(Object.new))
13
+ end
14
+
15
+ def test_doest_not_handles_stdout_text
16
+ ::FlatKit::Output::IO::STDOUTS.each do |e|
17
+ refute(::FlatKit::Output::File.handles?(e), "#{e} is not stdout text")
18
+ end
19
+ end
20
+
21
+ def test_init_from_path
22
+ test_path = "tmp/test_init_from_path.txt"
23
+ begin
24
+ io = ::FlatKit::Output::File.new(test_path)
25
+ assert_equal(test_path, io.name)
26
+ assert_instance_of(::File, io.io)
27
+ ensure
28
+ File.unlink(test_path) if File.exist?(test_path)
29
+ end
30
+ end
31
+
32
+ def test_writes_to_file
33
+ test_path = "tmp/test_writes_to_file.txt"
34
+ begin
35
+ output = ::FlatKit::Output::File.new(test_path)
36
+ assert_equal(test_path, output.name)
37
+ output.io.write("test_writes_to_file output")
38
+ output.close
39
+ t = IO.read(test_path)
40
+ assert_equal("test_writes_to_file output", t)
41
+ ensure
42
+ File.unlink(test_path) if File.exist?(test_path)
43
+ end
44
+ end
45
+
46
+ def test_writes_to_gzfile
47
+ test_path = "tmp/test_writes_to_gzfile.txt.gz"
48
+ begin
49
+ output = ::FlatKit::Output::File.new(test_path)
50
+ assert_equal(test_path, output.name)
51
+ output.io.write("test_writes_to_gzfile output")
52
+ output.close
53
+ t = %x[ gunzip -c #{test_path} ]
54
+ assert_equal("test_writes_to_gzfile output", t)
55
+ ensure
56
+ File.unlink(test_path) if File.exist?(test_path)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,104 @@
1
+ require_relative '../test_helper'
2
+
3
+ module TestOutput
4
+ class NullIO < ::IO
5
+ def initialize()
6
+ end
7
+ end
8
+
9
+ class TestIO < ::Minitest::Test
10
+ def test_handles_stderr_text
11
+ ::FlatKit::Output::IO::STDERRS.each do |e|
12
+ assert(::FlatKit::Output::IO.handles?(e), "#{e} is not stderr text")
13
+ end
14
+ end
15
+
16
+ def test_handles_stderr_io
17
+ x = $stderr
18
+ assert(::FlatKit::Output::IO.handles?(x), "is not stderr")
19
+ end
20
+
21
+ def test_handles_stdout_text
22
+ ::FlatKit::Output::IO::STDOUTS.each do |e|
23
+ assert(::FlatKit::Output::IO.handles?(e), "#{e} is not stdout text")
24
+ end
25
+ end
26
+
27
+ def test_handles_stdout_io
28
+ x = $stderr
29
+ assert(::FlatKit::Output::IO.handles?(x), "is not stdout")
30
+ end
31
+
32
+ def test_handles_stringio
33
+ assert(::FlatKit::Output::IO.handles?(StringIO.new))
34
+ end
35
+
36
+ def test_does_not_handle_other
37
+ x = Object.new
38
+ refute(::FlatKit::Output::IO.handles?(x))
39
+ end
40
+
41
+ def test_init_from_dash
42
+ io = ::FlatKit::Output::IO.new("-")
43
+ assert_equal("<STDOUT>", io.name)
44
+ assert_equal(::STDOUT, io.io)
45
+ end
46
+
47
+ def test_init_from_stderr_text
48
+ io = ::FlatKit::Output::IO.new("stderr")
49
+ assert_equal("<STDERR>", io.name)
50
+ assert_equal(::STDERR, io.io)
51
+ end
52
+
53
+ def test_init_from_file_object
54
+ test_path = "tmp/test_init_from_file_object.txt"
55
+ begin
56
+ File.open(test_path, "w") do |f|
57
+ io = ::FlatKit::Output::IO.new(f)
58
+ assert_equal(test_path, io.name)
59
+ assert_instance_of(::File, io.io)
60
+ end
61
+ ensure
62
+ File.unlink(test_path) if File.exist?(test_path)
63
+ end
64
+ end
65
+
66
+ def test_init_from_stdout
67
+ io = ::FlatKit::Output::IO.new($stdout)
68
+ assert_equal("<STDOUT>", io.name)
69
+ assert_equal(::STDOUT, io.io)
70
+ end
71
+
72
+ def test_init_from_string_io_object
73
+ sio = StringIO.new
74
+ io = ::FlatKit::Output::IO.new(sio)
75
+ assert_match(/StringIO/, io.name)
76
+ assert_instance_of(::StringIO, io.io)
77
+ end
78
+
79
+ def test_init_from_io_object
80
+ null_io = NullIO.new
81
+ io = ::FlatKit::Output::IO.new(null_io)
82
+ assert_match(/NullIO/, io.name)
83
+ assert_instance_of(::TestOutput::NullIO, io.io)
84
+ end
85
+
86
+ def test_writes_to_io
87
+ test_path = "tmp/test_writes_to_io.txt"
88
+ begin
89
+ File.open(test_path, "w") do |f|
90
+ io = ::FlatKit::Output::IO.new(f)
91
+ assert_equal(test_path, io.name)
92
+ assert_instance_of(::File, io.io)
93
+ io.io.write("test_writes_to_io output")
94
+ io.close
95
+ end
96
+ t = IO.read(test_path)
97
+ assert_equal("test_writes_to_io output", t)
98
+ ensure
99
+ File.unlink(test_path) if File.exist?(test_path)
100
+ end
101
+ end
102
+
103
+ end
104
+ end
@@ -0,0 +1,45 @@
1
+ require 'test_helper'
2
+
3
+ class TestConversions < ::Minitest::Test
4
+ def setup
5
+ @one_row_dataset = DeviceDataset.new(count: 1)
6
+ @src_record = @one_row_dataset.records.first
7
+ @csv_row = @one_row_dataset.records_as_csv_rows.first
8
+ @compare_fields = @one_row_dataset.compare_fields
9
+ end
10
+
11
+ def test_from_csv_to_json
12
+ xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
13
+ json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
14
+
15
+ assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
16
+ assert_equal(@one_row_dataset.records.first, json_record.to_hash)
17
+ assert_equal(xsv_record, json_record)
18
+ end
19
+
20
+ def test_from_json_to_csv
21
+ src_json = JSON.generate(@src_record)
22
+ json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
23
+ xsv_record = FlatKit::Xsv::Record.from_record(json_record)
24
+
25
+ assert_equal(@one_row_dataset.records.first, xsv_record.to_hash)
26
+ assert_equal(@one_row_dataset.records.first, json_record.to_hash)
27
+ assert_equal(xsv_record, json_record)
28
+ end
29
+
30
+ def test_roundtrip_csv_json_csv
31
+ xsv_record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
32
+ json_record = FlatKit::Jsonl::Record.from_record(xsv_record)
33
+ xsv2 = FlatKit::Xsv::Record.from_record(json_record)
34
+
35
+ assert_equal(xsv_record.to_s, xsv2.to_s)
36
+ end
37
+
38
+ def test_roundtrip_json_csv_json
39
+ src_json = JSON.generate(@src_record)
40
+ json_record = FlatKit::Jsonl::Record.new(data: src_json, compare_fields: @compare_fields)
41
+ xsv_record = FlatKit::Xsv::Record.from_record(json_record)
42
+ json2 = FlatKit::Jsonl::Record.from_record(xsv_record)
43
+ assert_equal(src_json, json2.to_s)
44
+ end
45
+ end
@@ -0,0 +1,24 @@
1
+ require_relative 'test_helper'
2
+
3
+ class TestFormat < ::Minitest::Test
4
+ def test_finds_jsonl_format
5
+ klass = ::FlatKit::Format.for("data.json.gz")
6
+ assert_equal(::FlatKit::Jsonl::Format, klass)
7
+ end
8
+
9
+ def test_finds_xsv_format
10
+ klass = ::FlatKit::Format.for("data.csv.gz")
11
+ assert_equal(::FlatKit::Xsv::Format, klass)
12
+ end
13
+
14
+ def test_finds_jsonl_format_for_full_path
15
+ klass = ::FlatKit::Format.for("tmp/sorted/foo.jsonl")
16
+ assert_equal(::FlatKit::Jsonl::Format, klass)
17
+ end
18
+
19
+ def test_finds_jsonl_format_with_fallback
20
+ path = "tmp/sorted/foo.json"
21
+ klass = ::FlatKit::Format.for_with_fallback!(path: path, fallback: "auto")
22
+ assert_equal(::FlatKit::Jsonl::Format, klass)
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ require 'simplecov'
2
+ SimpleCov.start if ENV['COVERAGE']
3
+
4
+ require 'byebug'
5
+
6
+ require 'minitest/autorun'
7
+ require 'minitest/focus'
8
+ require 'minitest/pride'
9
+
10
+ module TestHelper
11
+ def scratch_dir
12
+ p = Pathname.new(__FILE__).parent.parent.join('tmp/testing_scratch')
13
+ p.mkpath
14
+ p
15
+ end
16
+
17
+ def generate_slug(length: 10)
18
+ SecureRandom.alphanumeric(10)
19
+ end
20
+
21
+ def scratch_file(prefix: "test_", slug: generate_slug, extension: ".jsonl")
22
+ scratch_dir.join("#{prefix}#{slug}#{extension}")
23
+ end
24
+ end
25
+ require_relative '../lib/flat_kit'
26
+ require_relative './device_dataset'