flat_kit 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +9 -0
  4. data/Manifest.txt +3 -42
  5. data/{bin → exe}/fk +2 -1
  6. data/flat_kit.gemspec +33 -0
  7. data/lib/flat_kit/cli.rb +46 -32
  8. data/lib/flat_kit/command/cat.rb +34 -32
  9. data/lib/flat_kit/command/merge.rb +37 -36
  10. data/lib/flat_kit/command/sort.rb +37 -37
  11. data/lib/flat_kit/command/stats.rb +41 -39
  12. data/lib/flat_kit/command.rb +10 -11
  13. data/lib/flat_kit/descendant_tracker.rb +9 -6
  14. data/lib/flat_kit/error.rb +4 -0
  15. data/lib/flat_kit/event_emitter.rb +5 -2
  16. data/lib/flat_kit/field_stats.rb +31 -26
  17. data/lib/flat_kit/field_type/boolean_type.rb +9 -5
  18. data/lib/flat_kit/field_type/date_type.rb +19 -17
  19. data/lib/flat_kit/field_type/float_type.rb +15 -9
  20. data/lib/flat_kit/field_type/guess_type.rb +9 -6
  21. data/lib/flat_kit/field_type/integer_type.rb +6 -4
  22. data/lib/flat_kit/field_type/null_type.rb +5 -1
  23. data/lib/flat_kit/field_type/string_type.rb +8 -6
  24. data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
  25. data/lib/flat_kit/field_type/unknown_type.rb +12 -8
  26. data/lib/flat_kit/field_type.rb +52 -44
  27. data/lib/flat_kit/format.rb +11 -5
  28. data/lib/flat_kit/input/file.rb +11 -9
  29. data/lib/flat_kit/input/io.rb +18 -21
  30. data/lib/flat_kit/input.rb +8 -7
  31. data/lib/flat_kit/internal_node.rb +22 -19
  32. data/lib/flat_kit/jsonl/format.rb +6 -2
  33. data/lib/flat_kit/jsonl/reader.rb +7 -4
  34. data/lib/flat_kit/jsonl/record.rb +15 -18
  35. data/lib/flat_kit/jsonl/writer.rb +8 -10
  36. data/lib/flat_kit/jsonl.rb +8 -4
  37. data/lib/flat_kit/leaf_node.rb +6 -5
  38. data/lib/flat_kit/log_formatter.rb +20 -0
  39. data/lib/flat_kit/logger.rb +12 -19
  40. data/lib/flat_kit/merge.rb +21 -18
  41. data/lib/flat_kit/merge_tree.rb +5 -6
  42. data/lib/flat_kit/output/file.rb +13 -9
  43. data/lib/flat_kit/output/io.rb +40 -35
  44. data/lib/flat_kit/output.rb +8 -7
  45. data/lib/flat_kit/position.rb +3 -4
  46. data/lib/flat_kit/reader.rb +8 -8
  47. data/lib/flat_kit/record.rb +12 -12
  48. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  49. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  50. data/lib/flat_kit/sort.rb +8 -9
  51. data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
  52. data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
  53. data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
  54. data/lib/flat_kit/stat_type.rb +18 -13
  55. data/lib/flat_kit/stats.rb +12 -14
  56. data/lib/flat_kit/writer.rb +5 -6
  57. data/lib/flat_kit/xsv/format.rb +6 -2
  58. data/lib/flat_kit/xsv/reader.rb +8 -6
  59. data/lib/flat_kit/xsv/record.rb +21 -15
  60. data/lib/flat_kit/xsv/writer.rb +13 -10
  61. data/lib/flat_kit/xsv.rb +7 -4
  62. data/lib/flat_kit.rb +31 -26
  63. metadata +20 -158
  64. data/Rakefile +0 -21
  65. data/examples/stream-active-record-to-csv.rb +0 -42
  66. data/tasks/default.rake +0 -242
  67. data/tasks/extension.rake +0 -38
  68. data/tasks/man.rake +0 -7
  69. data/tasks/this.rb +0 -208
  70. data/test/device_dataset.rb +0 -117
  71. data/test/field_type/test_boolean_type.rb +0 -65
  72. data/test/field_type/test_date_type.rb +0 -71
  73. data/test/field_type/test_float_type.rb +0 -56
  74. data/test/field_type/test_guess_type.rb +0 -14
  75. data/test/field_type/test_integer_type.rb +0 -52
  76. data/test/field_type/test_null_type.rb +0 -41
  77. data/test/field_type/test_string_type.rb +0 -18
  78. data/test/field_type/test_timestamp_type.rb +0 -108
  79. data/test/field_type/test_unknown_type.rb +0 -35
  80. data/test/input/test_file.rb +0 -73
  81. data/test/input/test_io.rb +0 -93
  82. data/test/jsonl/test_format.rb +0 -22
  83. data/test/jsonl/test_reader.rb +0 -49
  84. data/test/jsonl/test_record.rb +0 -61
  85. data/test/jsonl/test_writer.rb +0 -86
  86. data/test/output/test_file.rb +0 -60
  87. data/test/output/test_io.rb +0 -104
  88. data/test/run +0 -23
  89. data/test/stat_type/test_nominal_stats.rb +0 -69
  90. data/test/stat_type/test_numerical_stats.rb +0 -118
  91. data/test/stat_type/test_ordinal_stats.rb +0 -92
  92. data/test/test_conversions.rb +0 -45
  93. data/test/test_event_emitter.rb +0 -89
  94. data/test/test_field_stats.rb +0 -134
  95. data/test/test_field_type.rb +0 -34
  96. data/test/test_format.rb +0 -24
  97. data/test/test_helper.rb +0 -26
  98. data/test/test_merge.rb +0 -40
  99. data/test/test_merge_tree.rb +0 -64
  100. data/test/test_version.rb +0 -11
  101. data/test/xsv/test_format.rb +0 -22
  102. data/test/xsv/test_reader.rb +0 -61
  103. data/test/xsv/test_record.rb +0 -69
  104. data/test/xsv/test_writer.rb +0 -89
@@ -1,7 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class FieldType
5
+ # Internal: Unknown type, this is what we use for unknown values in the data
6
+ #
3
7
  class UnknownType < FieldType
4
-
5
8
  REGEX = %r{\A(na|n/a|unk|unknown)\Z}i
6
9
 
7
10
  def self.type_name
@@ -9,18 +12,19 @@ module FlatKit
9
12
  end
10
13
 
11
14
  def self.matches?(data)
12
- return false unless data.kind_of?(String)
13
- return true if data.length == 0
14
- return REGEX.match?(data)
15
+ return false unless data.is_a?(String)
16
+ return true if data.empty?
17
+
18
+ REGEX.match?(data)
15
19
  end
16
20
 
17
21
  def self.coerce(data)
18
22
  return data if REGEX.match?(data)
19
- return CoerceFailure
20
- rescue
21
- return CoerceFailure
22
- end
23
23
 
24
+ CoerceFailure
25
+ rescue StandardError
26
+ CoerceFailure
27
+ end
24
28
  end
25
29
  end
26
30
  end
@@ -1,75 +1,83 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: The base class for all field types
5
+ #
2
6
  class FieldType
3
-
4
7
  extend FlatKit::DescendantTracker
5
8
 
6
9
  CoerceFailure = Class.new(::Object).freeze
7
10
 
11
+ def self.weights
12
+ @weights ||= {
13
+ # Boolean has crossover with Integer so going to let it overrule Integer
14
+ BooleanType => 5,
15
+
16
+ # Integer could potentially overlap with Float, but it is more restrictive
17
+ # so let it override Flaot
18
+ IntegerType => 4,
19
+ FloatType => 3,
20
+
21
+ # Date and Timestamps string representation shouldn't intersect with anything so
22
+ # leaving it at the same level as Null and Unkonwn
23
+ DateType => 2,
24
+ TimestampType => 2,
25
+
26
+ # Null and Unknown shoulnd't conflict since their string representations
27
+ # do not intersect
28
+ NullType => 2,
29
+ UnknownType => 2,
30
+
31
+ # Stringtype is the fallback for anything that has a string
32
+ # representation, so it should lose out on integers, floats, nulls,
33
+ # unknowns as strings
34
+ StringType => 1,
35
+
36
+ # at the bottom - since it should never match anywhere
37
+ GuessType => 0,
38
+ }
39
+ end
40
+
8
41
  def self.candidate_types(data)
9
42
  find_children(:matches?, data)
10
43
  end
11
44
 
45
+ # rubocop:disable Style/RedundantSort
46
+ # We need the stable sort, max_by(&:weight) returns the wrong one
12
47
  def self.best_guess(data)
13
- candidate_types(data).sort_by { |t| t.weight }.last
48
+ candidate_types(data).sort_by(&:weight).last
14
49
  end
50
+ # rubocop:enable Style/RedundantSort
15
51
 
16
52
  def self.type_name
17
- raise NotImplementedError, "must impleent #{self.type_name}"
53
+ raise NotImplementedError, "must impleent #{type_name}"
18
54
  end
19
55
 
20
56
  def self.matches?(data)
21
- raise NotImplementedError, "must implement #{self.name}.matches?(data)"
57
+ raise NotImplementedError, "must implement #{name}.matches?(data)"
22
58
  end
23
59
 
24
60
  def self.coerce(data)
25
- raise NotImplementedError, "must implement #{self.name}.coerce(data)"
61
+ raise NotImplementedError, "must implement #{name}.coerce(data)"
26
62
  end
27
63
 
28
64
  # Each type has a weight so if a value matches multiple types, then the list
29
65
  # can be compared to see where the tie breakers are
30
66
  #
31
- # All the weights are here so that
32
- #
67
+ # All the weights are here so that we can see the order of precedence
33
68
  #
34
69
  def self.weight
35
- # Boolean has crossover with Integer so going to let it overrule Integer
36
- return 5 if self == BooleanType
37
-
38
-
39
- # Integer could potentially overlap with Float, but it is more restrictive
40
- # so let it override Flaot
41
- return 4 if self == IntegerType
42
- return 3 if self == FloatType
43
-
44
- # Date and Timestamps string representation shouldn't intersect with anything so
45
- # leaving it at the same level as Null and Unkonwn
46
- return 2 if self == DateType
47
- return 2 if self == TimestampType
48
-
49
- # Null and Unknown shoulnd't conflict since their string representations
50
- # do not intersect
51
- return 2 if self == NullType
52
- return 2 if self == UnknownType
53
-
54
- # Stringtype is the fallback for anything that has a string
55
- # representation, so it should lose out on integers, floats, nulls,
56
- # unknowns as strings
57
- return 1 if self == StringType
58
-
59
- # at the bottom - since it should never match anywhere
60
- return 0 if self == GuessType
61
-
62
- raise NotImplementedError, "No weight assigned to type #{self} - fix immediately"
70
+ weights.fetch(self) { raise NotImplementedError, "No weight assigned to type #{self} - fix immediately" }
63
71
  end
64
72
  end
65
73
  end
66
74
 
67
- require 'flat_kit/field_type/guess_type'
68
- require 'flat_kit/field_type/boolean_type'
69
- require 'flat_kit/field_type/date_type'
70
- require 'flat_kit/field_type/timestamp_type'
71
- require 'flat_kit/field_type/integer_type'
72
- require 'flat_kit/field_type/float_type'
73
- require 'flat_kit/field_type/null_type'
74
- require 'flat_kit/field_type/string_type'
75
- require 'flat_kit/field_type/unknown_type'
75
+ require "flat_kit/field_type/guess_type"
76
+ require "flat_kit/field_type/boolean_type"
77
+ require "flat_kit/field_type/date_type"
78
+ require "flat_kit/field_type/timestamp_type"
79
+ require "flat_kit/field_type/integer_type"
80
+ require "flat_kit/field_type/float_type"
81
+ require "flat_kit/field_type/null_type"
82
+ require "flat_kit/field_type/string_type"
83
+ require "flat_kit/field_type/unknown_type"
@@ -1,9 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: The base class of all data file format classes
5
+ #
2
6
  class Format
3
7
  extend DescendantTracker
4
8
 
5
9
  def self.format_name
6
- raise NotImplementedError, "#{self.class} must implemente #{self.class}.format_name"
10
+ raise NotImplementedError, "#{self.class} must implement #{self.class}.format_name"
7
11
  end
8
12
 
9
13
  def format_name
@@ -20,15 +24,17 @@ module FlatKit
20
24
  return format unless format.nil?
21
25
 
22
26
  # now try the fallback
23
- format = ::FlatKit::Format.for(fallback)
24
- return format
27
+ ::FlatKit::Format.for(fallback)
25
28
  end
26
29
 
27
30
  def self.for_with_fallback!(path:, fallback: "auto")
28
31
  format = for_with_fallback(path: path, fallback: fallback)
29
- raise ::FlatKit::Error::UnknownFormat, "Unable to figure out format for '#{path}' with fallback '#{fallback}'" if format.nil?
32
+ if format.nil?
33
+ raise ::FlatKit::Error::UnknownFormat,
34
+ "Unable to figure out format for '#{path}' with fallback '#{fallback}'"
35
+ end
30
36
 
31
- return format
37
+ format
32
38
  end
33
39
  end
34
40
  end
@@ -1,25 +1,31 @@
1
- require 'zlib'
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+ require "pathname"
2
5
 
3
6
  module FlatKit
4
7
  class Input
8
+ # Internal: Handler for file based input
9
+ #
5
10
  class File < Input
6
- attr_reader :path
7
- attr_reader :count
11
+ attr_reader :path, :count, :io
8
12
 
9
13
  def self.handles?(obj)
10
14
  return true if obj.instance_of?(Pathname)
11
15
  return false unless obj.instance_of?(String)
12
16
 
13
17
  # incase these get loaded in different orders
14
- return false if ::FlatKit::Input::IO.is_stdin?(obj)
18
+ return false if ::FlatKit::Input::IO.stdin?(obj)
15
19
 
16
- return true
20
+ true
17
21
  end
18
22
 
19
23
  def initialize(obj)
24
+ super()
20
25
  @count = 0
21
26
  @path = Pathname.new(obj)
22
27
  raise FlatKit::Error, "Input #{obj} is not readable" unless @path.readable?
28
+
23
29
  @io = open_input(path)
24
30
  end
25
31
 
@@ -31,10 +37,6 @@ module FlatKit
31
37
  @io.close
32
38
  end
33
39
 
34
- def io
35
- @io
36
- end
37
-
38
40
  private
39
41
 
40
42
  # open the opropriate input type depending on the source file name
@@ -1,35 +1,38 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class Input
5
+ # Internal: Handler for non-filebased input. Generally this is just stdin
6
+ #
3
7
  class IO < Input
4
- STDINS = %w[ stdin STDIN - <stdin> ]
8
+ STDINS = %w[stdin STDIN - <stdin>].freeze
5
9
 
6
10
  def self.handles?(obj)
7
- return true if is_stdin?(obj)
8
- return true if [ ::File, ::StringIO, ::IO ].any? { |klass| obj.kind_of?(klass) }
9
- return false
11
+ return true if stdin?(obj)
12
+ return true if [::File, ::StringIO, ::IO].any? { |klass| obj.is_a?(klass) }
13
+
14
+ false
10
15
  end
11
16
 
12
- def self.is_stdin?(obj)
17
+ def self.stdin?(obj)
13
18
  case obj
14
19
  when String
15
20
  return true if STDINS.include?(obj)
16
21
  when ::IO
17
- return true if obj == ::STDIN
22
+ return true if obj == $stdin
18
23
  end
19
- return false
24
+ false
20
25
  end
21
26
 
22
27
  def initialize(obj)
23
- if self.class.is_stdin?(obj) then
28
+ super()
29
+ if self.class.stdin?(obj)
24
30
  @name = "<STDIN>"
25
31
  @io = $stdin
26
- elsif obj.kind_of?(::File) then
27
- @name = obj.path
28
- @io = obj
29
- elsif obj.kind_of?(::StringIO) then
30
- @name = obj.inspect
32
+ elsif obj.is_a?(::IO)
33
+ @name = (obj.respond_to?(:path) && obj.path) || obj.inspect
31
34
  @io = obj
32
- elsif obj.kind_of?(::IO) then
35
+ elsif obj.is_a?(::StringIO)
33
36
  @name = obj.inspect
34
37
  @io = obj
35
38
  else
@@ -37,18 +40,12 @@ module FlatKit
37
40
  end
38
41
  end
39
42
 
40
- def name
41
- @name
42
- end
43
+ attr_reader :name, :io
43
44
 
44
45
  # this goes to an io stream and we are not in charge of opening it
45
46
  def close
46
47
  @io.close
47
48
  end
48
-
49
- def io
50
- @io
51
- end
52
49
  end
53
50
  end
54
51
  end
@@ -1,14 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Base class of all input handlers
5
+ #
2
6
  class Input
3
7
  extend DescendantTracker
4
8
 
5
9
  def self.from(input)
6
- return input if input.kind_of?(::FlatKit::Input)
10
+ return input if input.is_a?(::FlatKit::Input)
7
11
 
8
12
  in_klass = find_child(:handles?, input)
9
- if in_klass then
10
- return in_klass.new(input)
11
- end
13
+ return in_klass.new(input) if in_klass
12
14
 
13
15
  raise FlatKit::Error, "Unable to create input from #{input.class} : #{input.inspect}"
14
16
  end
@@ -17,7 +19,6 @@ module FlatKit
17
19
  raise NotImplementedError, "#{self.class} must implement #name"
18
20
  end
19
21
 
20
- #
21
22
  def io
22
23
  raise NotImplementedError, "#{self.class} must implement #io"
23
24
  end
@@ -28,5 +29,5 @@ module FlatKit
28
29
  end
29
30
  end
30
31
 
31
- require 'flat_kit/input/io'
32
- require 'flat_kit/input/file'
32
+ require "flat_kit/input/io"
33
+ require "flat_kit/input/file"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: This is a class used internally by MergeTree and should not be used
3
5
  # outside of that context.
@@ -10,22 +12,24 @@ module FlatKit
10
12
  # here.
11
13
  #
12
14
  class InternalNode
13
-
14
15
  include Comparable
15
16
 
16
- attr_accessor :left # Internal Node
17
- attr_accessor :right # Internal Node
18
- attr_accessor :winner # Internal Node
19
- attr_accessor :next_level # Who to tell
20
- attr_accessor :leaf # winning leaf node
17
+ # Internal Nodes
18
+ attr_accessor :left, :right, :winner
19
+
20
+ # Who to tell
21
+ attr_accessor :next_level
22
+
23
+ # winning leaf node
24
+ attr_accessor :leaf
21
25
 
22
26
  def initialize(left:, right:)
23
- @left = left
27
+ @left = left
24
28
  @left.next_level = self
25
29
 
26
- @right = right
30
+ @right = right
27
31
  @right.next_level = self
28
- @next_level = nil
32
+ @next_level = nil
29
33
 
30
34
  play
31
35
  end
@@ -53,32 +57,31 @@ module FlatKit
53
57
  # from the tree.
54
58
  #
55
59
  def player_finished(node)
56
- if left.object_id == node.object_id then
60
+ if left.equal?(node)
57
61
  @left = SentinelInternalNode.new
58
62
  @left.next_level = self
59
- elsif right.object_id == node.object_id then
63
+ elsif right.equal?(node)
60
64
  @right = SentinelInternalNode.new
61
65
  @right.next_level = self
62
66
  else
63
67
  raise FlatKit::Error, "Unknown player #{node}"
64
68
  end
65
69
 
66
- if @right.sentinel? && @left.sentinel? then
67
- next_level.player_finished(self) if next_level
68
- end
70
+ return unless @right.sentinel? && @left.sentinel?
71
+
72
+ next_level.player_finished(self) if next_level
69
73
  end
70
74
 
71
75
  def play
72
- @winner = left <= right ? left : right
73
- if !@winner.sentinel? then
74
- @leaf = winner.leaf
75
- end
76
+ @winner = (left <= right) ? left : right
77
+ @leaf = winner.leaf unless @winner.sentinel?
76
78
  next_level.play if next_level
77
79
  end
78
80
 
79
81
  def <=>(other)
80
82
  return -1 if other.sentinel?
81
- value.<=>(other.value)
83
+
84
+ value <=> (other.value)
82
85
  end
83
86
  end
84
87
  end
@@ -1,5 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  module Jsonl
5
+ # Internal: JSONL format class holding the metadata about the JSONL format
6
+ #
3
7
  class Format < ::FlatKit::Format
4
8
  def self.format_name
5
9
  "jsonl"
@@ -7,10 +11,10 @@ module FlatKit
7
11
 
8
12
  def self.handles?(filename)
9
13
  parts = filename.split(".")
10
- %w[ json jsonl ndjson ].each do |ext|
14
+ %w[json jsonl ndjson].each do |ext|
11
15
  return true if parts.include?(ext)
12
16
  end
13
- return false
17
+ false
14
18
  end
15
19
 
16
20
  def self.reader
@@ -1,8 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  module Jsonl
5
+ # Internal: Reader class that parses and yields records from JSONL files
6
+ #
3
7
  class Reader < ::FlatKit::Reader
4
- attr_reader :input
5
- attr_reader :count
8
+ attr_reader :input, :count
6
9
 
7
10
  def self.format_name
8
11
  ::FlatKit::Jsonl::Format.format_name
@@ -15,13 +18,13 @@ module FlatKit
15
18
  end
16
19
 
17
20
  def each
18
- while line = input.io.gets do
21
+ while (line = input.io.gets)
19
22
  record = ::FlatKit::Jsonl::Record.new(data: line, compare_fields: compare_fields)
20
23
  @count += 1
21
24
  yield record
22
25
  end
23
26
  input.close
24
- rescue => e
27
+ rescue StandardError => e
25
28
  ::FlatKit.logger.error "Error reading jsonl records from #{input.name}: #{e}"
26
29
  raise ::FlatKit::Error, e
27
30
  end
@@ -1,8 +1,12 @@
1
- require 'oj'
2
- require 'flat_kit/record'
1
+ # frozen_string_literal: true
2
+
3
+ require "oj"
4
+ require "flat_kit/record"
3
5
 
4
6
  module FlatKit
5
7
  module Jsonl
8
+ # Internal: Class that exposes data from a JSONL format record to the flatkit api
9
+ #
6
10
  class Record < ::FlatKit::Record
7
11
  attr_reader :compare_data
8
12
 
@@ -11,7 +15,7 @@ module FlatKit
11
15
  end
12
16
 
13
17
  def self.from_record(record)
14
- if record.instance_of?(FlatKit::Jsonl::Record) then
18
+ if record.instance_of?(FlatKit::Jsonl::Record)
15
19
 
16
20
  structured = record.complete_structured_data? ? record.complete_structured_data : nil
17
21
 
@@ -25,22 +29,20 @@ module FlatKit
25
29
  end
26
30
 
27
31
  def initialize(data:, compare_fields: :none,
28
- compare_data: Hash.new,
32
+ compare_data: {},
29
33
  complete_structured_data: nil)
30
34
  super(data: data, compare_fields: compare_fields)
31
35
 
32
36
  @complete_structured_data = complete_structured_data
33
37
 
34
- if complete_structured_data? && (compare_data.nil? || compare_data.empty?) then
35
- @compare_data = complete_structured_data
36
- else
37
- @compare_data = compare_data
38
- end
38
+ @compare_data = if complete_structured_data? && (compare_data.nil? || compare_data.empty?)
39
+ complete_structured_data
40
+ else
41
+ compare_data
42
+ end
39
43
 
40
44
  # only load compare data if it dosn't exist
41
- if data && compare_data.empty? then
42
- quick_parse
43
- end
45
+ quick_parse if data && compare_data.empty?
44
46
  end
45
47
 
46
48
  def [](key)
@@ -59,9 +61,7 @@ module FlatKit
59
61
  # overriding parent accessor since we may be initialized without raw bytes
60
62
  # to parse
61
63
  def data
62
- if @data.nil? && complete_structured_data? then
63
- @data = Oj.dump(complete_structured_data, mode: :json)
64
- end
64
+ @data = Oj.dump(complete_structured_data, mode: :json) if @data.nil? && complete_structured_data?
65
65
  @data
66
66
  end
67
67
  alias to_s data
@@ -79,6 +79,3 @@ module FlatKit
79
79
  end
80
80
  end
81
81
  end
82
-
83
-
84
-
@@ -1,15 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  module Jsonl
5
+ # Internal: Class that writes flatkit records to JSONL files
6
+ #
3
7
  class Writer < ::FlatKit::Writer
4
-
5
8
  def self.format_name
6
9
  ::FlatKit::Jsonl::Format.format_name
7
10
  end
8
11
 
9
- def initialize(destination:)
10
- super
11
- end
12
-
13
12
  # write the record and return the Position the record was written
14
13
  #
15
14
  def write(record)
@@ -22,10 +21,10 @@ module FlatKit
22
21
  else
23
22
  raise FlatKit::Error, "Unable to write records of type #{record.class}"
24
23
  end
25
- rescue FlatKit::Error => fe
26
- raise fe
27
- rescue => e
28
- ::FlatKit.logger.error "Error reading jsonl records from #{output.name}: #{e}"
24
+ rescue FlatKit::Error => e
25
+ raise e
26
+ rescue StandardError => e
27
+ ::FlatKit.logger.error "Error writing jsonl records to #{output.name}: #{e}"
29
28
  raise ::FlatKit::Error, e
30
29
  end
31
30
 
@@ -47,7 +46,6 @@ module FlatKit
47
46
  @last_position = ::FlatKit::Position.new(index: record_index,
48
47
  offset: start_offset,
49
48
  bytesize: bytes_written)
50
-
51
49
  end
52
50
  end
53
51
  end
@@ -1,8 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Top level namespace for the newline-oriented JSON format
5
+ #
2
6
  module Jsonl
3
7
  end
4
8
  end
5
- require 'flat_kit/jsonl/record'
6
- require 'flat_kit/jsonl/reader'
7
- require 'flat_kit/jsonl/writer'
8
- require 'flat_kit/jsonl/format'
9
+ require "flat_kit/jsonl/record"
10
+ require "flat_kit/jsonl/reader"
11
+ require "flat_kit/jsonl/writer"
12
+ require "flat_kit/jsonl/format"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: The LeafNode is a wrapper around a Reader object to enable
3
5
  # a consistent api for use in the MergeTree
@@ -9,11 +11,9 @@ module FlatKit
9
11
  # If all the data is used up from the reader, it also notifies the next level
10
12
  # of that so the next level can remove it from the tree.
11
13
  class LeafNode
12
-
13
14
  include Comparable
14
15
 
15
- attr_reader :reader
16
- attr_reader :value
16
+ attr_reader :reader, :value
17
17
 
18
18
  attr_accessor :next_level
19
19
 
@@ -43,7 +43,7 @@ module FlatKit
43
43
 
44
44
  def update_and_replay
45
45
  self.next
46
- if finished? then
46
+ if finished?
47
47
  ::FlatKit.logger.debug "#{reader.source} has finished reading #{reader.count} records"
48
48
  next_level.player_finished(self)
49
49
  end
@@ -65,7 +65,8 @@ module FlatKit
65
65
 
66
66
  def <=>(other)
67
67
  return -1 if other.sentinel?
68
- self.value.<=>(other.value)
68
+
69
+ value <=> (other.value)
69
70
  end
70
71
  end
71
72
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "logger"
4
+
5
+ module FlatKit
6
+ # Internal: Log formatting class for FlatKit
7
+ #
8
+ class LogFormatter < ::Logger::Formatter
9
+ FORMAT = "%s %5d %05s : %s\n"
10
+ DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
11
+ def initialize
12
+ super
13
+ self.datetime_format = DATETIME_FORMAT
14
+ end
15
+
16
+ def call(severity, time, _progname, msg)
17
+ format(FORMAT, format_datetime(time.utc), Process.pid, severity, msg2str(msg))
18
+ end
19
+ end
20
+ end