flat_kit 0.3.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +9 -0
  4. data/Manifest.txt +3 -42
  5. data/{bin → exe}/fk +2 -1
  6. data/flat_kit.gemspec +33 -0
  7. data/lib/flat_kit/cli.rb +46 -32
  8. data/lib/flat_kit/command/cat.rb +34 -32
  9. data/lib/flat_kit/command/merge.rb +37 -36
  10. data/lib/flat_kit/command/sort.rb +37 -37
  11. data/lib/flat_kit/command/stats.rb +41 -39
  12. data/lib/flat_kit/command.rb +10 -11
  13. data/lib/flat_kit/descendant_tracker.rb +9 -6
  14. data/lib/flat_kit/error.rb +4 -0
  15. data/lib/flat_kit/event_emitter.rb +5 -2
  16. data/lib/flat_kit/field_stats.rb +31 -26
  17. data/lib/flat_kit/field_type/boolean_type.rb +9 -5
  18. data/lib/flat_kit/field_type/date_type.rb +19 -17
  19. data/lib/flat_kit/field_type/float_type.rb +15 -9
  20. data/lib/flat_kit/field_type/guess_type.rb +9 -6
  21. data/lib/flat_kit/field_type/integer_type.rb +6 -4
  22. data/lib/flat_kit/field_type/null_type.rb +5 -1
  23. data/lib/flat_kit/field_type/string_type.rb +8 -6
  24. data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
  25. data/lib/flat_kit/field_type/unknown_type.rb +12 -8
  26. data/lib/flat_kit/field_type.rb +52 -44
  27. data/lib/flat_kit/format.rb +11 -5
  28. data/lib/flat_kit/input/file.rb +11 -9
  29. data/lib/flat_kit/input/io.rb +18 -21
  30. data/lib/flat_kit/input.rb +8 -7
  31. data/lib/flat_kit/internal_node.rb +22 -19
  32. data/lib/flat_kit/jsonl/format.rb +6 -2
  33. data/lib/flat_kit/jsonl/reader.rb +7 -4
  34. data/lib/flat_kit/jsonl/record.rb +15 -18
  35. data/lib/flat_kit/jsonl/writer.rb +8 -10
  36. data/lib/flat_kit/jsonl.rb +8 -4
  37. data/lib/flat_kit/leaf_node.rb +6 -5
  38. data/lib/flat_kit/log_formatter.rb +20 -0
  39. data/lib/flat_kit/logger.rb +12 -19
  40. data/lib/flat_kit/merge.rb +21 -18
  41. data/lib/flat_kit/merge_tree.rb +5 -6
  42. data/lib/flat_kit/output/file.rb +13 -9
  43. data/lib/flat_kit/output/io.rb +40 -35
  44. data/lib/flat_kit/output.rb +8 -7
  45. data/lib/flat_kit/position.rb +3 -4
  46. data/lib/flat_kit/reader.rb +8 -8
  47. data/lib/flat_kit/record.rb +12 -12
  48. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  49. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  50. data/lib/flat_kit/sort.rb +8 -9
  51. data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
  52. data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
  53. data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
  54. data/lib/flat_kit/stat_type.rb +18 -13
  55. data/lib/flat_kit/stats.rb +12 -14
  56. data/lib/flat_kit/writer.rb +5 -6
  57. data/lib/flat_kit/xsv/format.rb +6 -2
  58. data/lib/flat_kit/xsv/reader.rb +8 -6
  59. data/lib/flat_kit/xsv/record.rb +21 -15
  60. data/lib/flat_kit/xsv/writer.rb +13 -10
  61. data/lib/flat_kit/xsv.rb +7 -4
  62. data/lib/flat_kit.rb +31 -26
  63. metadata +20 -158
  64. data/Rakefile +0 -21
  65. data/examples/stream-active-record-to-csv.rb +0 -42
  66. data/tasks/default.rake +0 -242
  67. data/tasks/extension.rake +0 -38
  68. data/tasks/man.rake +0 -7
  69. data/tasks/this.rb +0 -208
  70. data/test/device_dataset.rb +0 -117
  71. data/test/field_type/test_boolean_type.rb +0 -65
  72. data/test/field_type/test_date_type.rb +0 -71
  73. data/test/field_type/test_float_type.rb +0 -56
  74. data/test/field_type/test_guess_type.rb +0 -14
  75. data/test/field_type/test_integer_type.rb +0 -52
  76. data/test/field_type/test_null_type.rb +0 -41
  77. data/test/field_type/test_string_type.rb +0 -18
  78. data/test/field_type/test_timestamp_type.rb +0 -108
  79. data/test/field_type/test_unknown_type.rb +0 -35
  80. data/test/input/test_file.rb +0 -73
  81. data/test/input/test_io.rb +0 -93
  82. data/test/jsonl/test_format.rb +0 -22
  83. data/test/jsonl/test_reader.rb +0 -49
  84. data/test/jsonl/test_record.rb +0 -61
  85. data/test/jsonl/test_writer.rb +0 -86
  86. data/test/output/test_file.rb +0 -60
  87. data/test/output/test_io.rb +0 -104
  88. data/test/run +0 -23
  89. data/test/stat_type/test_nominal_stats.rb +0 -69
  90. data/test/stat_type/test_numerical_stats.rb +0 -118
  91. data/test/stat_type/test_ordinal_stats.rb +0 -92
  92. data/test/test_conversions.rb +0 -45
  93. data/test/test_event_emitter.rb +0 -89
  94. data/test/test_field_stats.rb +0 -134
  95. data/test/test_field_type.rb +0 -34
  96. data/test/test_format.rb +0 -24
  97. data/test/test_helper.rb +0 -26
  98. data/test/test_merge.rb +0 -40
  99. data/test/test_merge_tree.rb +0 -64
  100. data/test/test_version.rb +0 -11
  101. data/test/xsv/test_format.rb +0 -22
  102. data/test/xsv/test_reader.rb +0 -61
  103. data/test/xsv/test_record.rb +0 -69
  104. data/test/xsv/test_writer.rb +0 -89
@@ -1,64 +0,0 @@
1
- require 'test_helper'
2
-
3
- class TestMergeTree < ::Minitest::Test
4
- def setup
5
- @dataset_count = 20
6
- @records_per_dataset = 100
7
- @records = []
8
- @datasets = Array.new.tap do |a|
9
- @dataset_count.times do
10
- dd = DeviceDataset.new(count: @records_per_dataset)
11
- dd.persist_sorted_records_as_jsonl
12
- @records.concat(dd.records)
13
- a << dd
14
- end
15
- end
16
- @compare_fields = @datasets.first.compare_fields
17
- @readers = @datasets.map { |dd|
18
- ::FlatKit::Jsonl::Reader.new(source: dd.filename_sorted_jsonl, compare_fields: @compare_fields)
19
- }
20
- end
21
-
22
- def teardown
23
- @datasets.each do |ds|
24
- ds.cleanup_files
25
- end
26
- end
27
-
28
- def test_init_tree
29
- tree = ::FlatKit::MergeTree.new(@readers)
30
- assert_equal(20, tree.leaves.size)
31
-
32
- assert_equal(5, tree.depth)
33
-
34
- # 0th level should have 10 nodes - since 20 leaves
35
- assert_equal(10, tree.levels[0].size)
36
-
37
- # 1st level should have 5 nodes - since 10 nodes lower
38
- assert_equal(5, tree.levels[1].size)
39
-
40
- # 2nd level should have 3 nodes - since 5 above (and we shim in a Sentinel
41
- # node on the last internal node)
42
- assert_equal(3, tree.levels[2].size)
43
- assert_instance_of(::FlatKit::SentinelInternalNode, tree.levels[2].last.right)
44
-
45
- # 3rd level should have 2 nodes
46
- assert_equal(2, tree.levels[3].size)
47
-
48
- # 4th level should have 1 nodes
49
- assert_equal(1, tree.levels[4].size)
50
- end
51
-
52
- def test_merging
53
- expected_records = @records.sort_by { |r| @compare_fields.map { |f| r[f] } }
54
- tree = ::FlatKit::MergeTree.new(@readers)
55
- actual_records = tree.to_a.map { |r| r.to_hash }
56
-
57
- assert_equal(expected_records.size, actual_records.size)
58
-
59
- expected_records.each_with_index do |expected, idx|
60
- actual = actual_records[idx]
61
- assert_equal(expected, actual)
62
- end
63
- end
64
- end
data/test/test_version.rb DELETED
@@ -1,11 +0,0 @@
1
- require 'test_helper'
2
-
3
- class TestVersion < ::Minitest::Test
4
- def test_version_constant_match
5
- assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION)
6
- end
7
-
8
- def test_version_string_match
9
- assert_match(/\A\d+\.\d+\.\d+\Z/, FlatKit::VERSION.to_s)
10
- end
11
- end
@@ -1,22 +0,0 @@
1
- require_relative '../test_helper'
2
-
3
- module TestXsv
4
- class TestFormat < ::Minitest::Test
5
-
6
- def test_handles_csv
7
- assert(::FlatKit::Xsv::Format.handles?("csv"))
8
- end
9
-
10
- def test_handles_tsv
11
- assert(::FlatKit::Xsv::Format.handles?("tsv"))
12
- end
13
-
14
- def test_handles_txt
15
- assert(::FlatKit::Xsv::Format.handles?("txt"))
16
- end
17
-
18
- def test_does_not_handle_json
19
- refute(::FlatKit::Xsv::Format.handles?("json"))
20
- end
21
- end
22
- end
@@ -1,61 +0,0 @@
1
- require_relative '../test_helper'
2
-
3
- module TestXsv
4
- class TestReader < ::Minitest::Test
5
- def setup
6
- @count = 20
7
- @dataset = DeviceDataset.new(count: @count)
8
- @compare_fields = @dataset.compare_fields
9
- @test_path = "tmp/test_reads_from_io.csv"
10
-
11
- File.open(@test_path, "wb") do |f|
12
- f.write(@dataset.records_as_csv)
13
- end
14
- end
15
-
16
- def teardown
17
- File.unlink(@test_path) if File.exist?(@test_path)
18
- end
19
-
20
- def test_fields
21
- reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
22
- reader.to_a
23
- assert_equal(@dataset.fields, reader.fields)
24
- end
25
-
26
- def test_raises_error_on_invalid_source
27
- assert_raises(::FlatKit::Error) {
28
- ::FlatKit::Xsv::Reader.new(source: Object.new, compare_fields: nil)
29
- }
30
- end
31
-
32
- def test_automatically_figures_out_fields_if_needed
33
- reader = ::FlatKit::Xsv::Reader.new(source: @test_path)
34
- reader.take(1)
35
- assert_equal(@dataset.fields, reader.fields)
36
- end
37
-
38
- def test_reads_from_pathname
39
- reader = ::FlatKit::Xsv::Reader.new(source: @test_path, compare_fields: @compare_fields)
40
- all = reader.to_a
41
- assert_equal(@count, reader.count)
42
- assert_equal(@count, all.size)
43
- end
44
-
45
- def test_reads_from_io
46
- File.open(@test_path) do |f|
47
- reader = ::FlatKit::Xsv::Reader.new(source: f, compare_fields: @compare_fields)
48
- all = reader.to_a
49
- assert_equal(@count, reader.count)
50
- assert_equal(@count, all.size)
51
- end
52
- end
53
-
54
- def test_raises_error_on_io_error
55
- s = StringIO.new
56
- s.close_read
57
- reader = ::FlatKit::Xsv::Reader.new(source: s, compare_fields: @compare_fields)
58
- assert_raises(::FlatKit::Error) { reader.to_a }
59
- end
60
- end
61
- end
@@ -1,69 +0,0 @@
1
- require 'test_helper'
2
- require 'faker'
3
- require 'byebug'
4
-
5
- module TestXsv
6
- class TestRecord< ::Minitest::Test
7
- def setup
8
- @one_row_dataset = DeviceDataset.new(count: 1)
9
- @csv_row = @one_row_dataset.records_as_csv_rows.first
10
- @compare_fields = @one_row_dataset.compare_fields
11
- end
12
-
13
- def test_initializes_from_data
14
- record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
15
- original_record = @one_row_dataset.records.first
16
- @compare_fields.each do |field|
17
- assert_equal(original_record[field], record[field])
18
- end
19
- end
20
-
21
- def test_ignores_non_compare_fields_values
22
- record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
23
- refute(record["version"])
24
- end
25
-
26
- def test_is_sortable
27
- dataset = DeviceDataset.new(count: 20)
28
- fk_records = Array.new.tap do |a|
29
- dataset.records_as_csv_rows.each do |csv_row|
30
- a << FlatKit::Xsv::Record.new(data: csv_row, compare_fields: @compare_fields)
31
- end
32
- end
33
-
34
- sorted = fk_records.sort
35
- output_text = CSV.generate('', headers: dataset.fields, write_headers: true) do |csv|
36
- sorted.each do |row|
37
- csv << row.data
38
- end
39
- end
40
-
41
- assert_equal(output_text, dataset.sorted_records_as_csv)
42
- end
43
-
44
- def test_to_hash
45
- record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
46
- h = record.to_hash
47
- assert_equal(@one_row_dataset.records.first, h)
48
- end
49
-
50
- def test_from_record
51
- rec1 = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
52
- rec2 = FlatKit::Xsv::Record.from_record(rec1)
53
- assert_equal(rec1, rec2)
54
- end
55
-
56
- def test_incomplete_initialization
57
- assert_raises(FlatKit::Error) {
58
- FlatKit::Xsv::Record.new(data: nil, compare_fields: [])
59
- }
60
- end
61
-
62
- def test_to_s_from_csv_record
63
- record = FlatKit::Xsv::Record.new(data: @csv_row, compare_fields: @compare_fields)
64
- line = record.to_s
65
- expected = @one_row_dataset.records_as_csv_rows[0].to_csv
66
- assert_equal(expected, line)
67
- end
68
- end
69
- end
@@ -1,89 +0,0 @@
1
- require_relative '../test_helper'
2
-
3
- module TestXsv
4
- class TestWriter < ::Minitest::Test
5
- def setup
6
- @count = 20
7
- @dataset = DeviceDataset.new(count: @count)
8
- @compare_fields = @dataset.compare_fields
9
- @write_path = "tmp/test_writes_to_io.csv"
10
- @read_path = "tmp/test_read.csv"
11
-
12
- File.open(@read_path, "wb") do |f|
13
- f.write(@dataset.records_as_csv)
14
- end
15
-
16
- @reader = ::FlatKit::Xsv::Reader.new(source: @read_path, compare_fields: @compare_fields)
17
- @records = @reader.to_a
18
- end
19
-
20
- def teardown
21
- File.unlink(@write_path) if File.exist?(@write_path)
22
- File.unlink(@read_path) if File.exist?(@read_path)
23
- end
24
-
25
- def test_raises_error_on_invalid_destination
26
- assert_raises(::FlatKit::Error) {
27
- ::FlatKit::Xsv::Writer.new(destination: Object.new, fields: @reader.fields)
28
- }
29
- end
30
-
31
- def test_writes_to_pathname
32
- writer = ::FlatKit::Xsv::Writer.new(destination: @write_path, fields: @reader.fields)
33
- @records.each do |r|
34
- writer.write(r)
35
- end
36
- writer.close
37
- assert_equal(@count, writer.count)
38
-
39
- expected = @dataset.records_as_csv
40
- actual = IO.read(@write_path)
41
- assert_equal(expected, actual)
42
- end
43
-
44
- def test_position
45
- File.open(@write_path, "w+") do |f|
46
- writer = ::FlatKit::Xsv::Writer.new(destination: f,fields: :auto)
47
- records_bytes = 0
48
- header_bytes = nil
49
-
50
- @records.each_with_index do |r, idx|
51
- record_length = r.to_s.bytesize
52
-
53
- position = writer.write(r)
54
- # make sure write stores the last_position api and returns that value
55
- assert_equal(position, writer.last_position)
56
-
57
- header_bytes = writer.header_bytes if header_bytes == nil
58
- assert(header_bytes > 0)
59
-
60
- assert_equal(idx, position.index)
61
- assert_equal(header_bytes + records_bytes, position.offset)
62
- assert_equal(record_length, position.bytesize)
63
-
64
- records_bytes += record_length
65
-
66
- current_position = writer.current_position
67
- assert_equal(idx+1, current_position.index)
68
- assert_equal(header_bytes + records_bytes, current_position.offset)
69
- assert_equal(0, current_position.bytesize)
70
-
71
- end
72
- writer.close
73
-
74
- assert_equal(@count, writer.count)
75
-
76
- expected = @dataset.records_as_csv
77
- actual = IO.read(@write_path)
78
- assert_equal(expected, actual)
79
- end
80
- end
81
-
82
- def test_raises_error_on_io_error
83
- s = StringIO.new
84
- writer = ::FlatKit::Xsv::Writer.new(destination: s, fields: @reader.fields)
85
- s.close_write
86
- assert_raises(::FlatKit::Error) { writer.write(@records.first) }
87
- end
88
- end
89
- end