flat_kit 0.3.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +9 -0
  4. data/Manifest.txt +3 -42
  5. data/{bin → exe}/fk +2 -1
  6. data/flat_kit.gemspec +33 -0
  7. data/lib/flat_kit/cli.rb +46 -32
  8. data/lib/flat_kit/command/cat.rb +34 -32
  9. data/lib/flat_kit/command/merge.rb +37 -36
  10. data/lib/flat_kit/command/sort.rb +37 -37
  11. data/lib/flat_kit/command/stats.rb +41 -39
  12. data/lib/flat_kit/command.rb +10 -11
  13. data/lib/flat_kit/descendant_tracker.rb +9 -6
  14. data/lib/flat_kit/error.rb +4 -0
  15. data/lib/flat_kit/event_emitter.rb +5 -2
  16. data/lib/flat_kit/field_stats.rb +31 -26
  17. data/lib/flat_kit/field_type/boolean_type.rb +9 -5
  18. data/lib/flat_kit/field_type/date_type.rb +19 -17
  19. data/lib/flat_kit/field_type/float_type.rb +15 -9
  20. data/lib/flat_kit/field_type/guess_type.rb +9 -6
  21. data/lib/flat_kit/field_type/integer_type.rb +6 -4
  22. data/lib/flat_kit/field_type/null_type.rb +5 -1
  23. data/lib/flat_kit/field_type/string_type.rb +8 -6
  24. data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
  25. data/lib/flat_kit/field_type/unknown_type.rb +12 -8
  26. data/lib/flat_kit/field_type.rb +52 -44
  27. data/lib/flat_kit/format.rb +11 -5
  28. data/lib/flat_kit/input/file.rb +11 -9
  29. data/lib/flat_kit/input/io.rb +18 -21
  30. data/lib/flat_kit/input.rb +8 -7
  31. data/lib/flat_kit/internal_node.rb +22 -19
  32. data/lib/flat_kit/jsonl/format.rb +6 -2
  33. data/lib/flat_kit/jsonl/reader.rb +7 -4
  34. data/lib/flat_kit/jsonl/record.rb +15 -18
  35. data/lib/flat_kit/jsonl/writer.rb +8 -10
  36. data/lib/flat_kit/jsonl.rb +8 -4
  37. data/lib/flat_kit/leaf_node.rb +6 -5
  38. data/lib/flat_kit/log_formatter.rb +20 -0
  39. data/lib/flat_kit/logger.rb +12 -19
  40. data/lib/flat_kit/merge.rb +21 -18
  41. data/lib/flat_kit/merge_tree.rb +5 -6
  42. data/lib/flat_kit/output/file.rb +13 -9
  43. data/lib/flat_kit/output/io.rb +40 -35
  44. data/lib/flat_kit/output.rb +8 -7
  45. data/lib/flat_kit/position.rb +3 -4
  46. data/lib/flat_kit/reader.rb +8 -8
  47. data/lib/flat_kit/record.rb +12 -12
  48. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  49. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  50. data/lib/flat_kit/sort.rb +8 -9
  51. data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
  52. data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
  53. data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
  54. data/lib/flat_kit/stat_type.rb +18 -13
  55. data/lib/flat_kit/stats.rb +12 -14
  56. data/lib/flat_kit/writer.rb +5 -6
  57. data/lib/flat_kit/xsv/format.rb +6 -2
  58. data/lib/flat_kit/xsv/reader.rb +8 -6
  59. data/lib/flat_kit/xsv/record.rb +21 -15
  60. data/lib/flat_kit/xsv/writer.rb +13 -10
  61. data/lib/flat_kit/xsv.rb +7 -4
  62. data/lib/flat_kit.rb +31 -26
  63. metadata +20 -158
  64. data/Rakefile +0 -21
  65. data/examples/stream-active-record-to-csv.rb +0 -42
  66. data/tasks/default.rake +0 -242
  67. data/tasks/extension.rake +0 -38
  68. data/tasks/man.rake +0 -7
  69. data/tasks/this.rb +0 -208
  70. data/test/device_dataset.rb +0 -117
  71. data/test/field_type/test_boolean_type.rb +0 -65
  72. data/test/field_type/test_date_type.rb +0 -71
  73. data/test/field_type/test_float_type.rb +0 -56
  74. data/test/field_type/test_guess_type.rb +0 -14
  75. data/test/field_type/test_integer_type.rb +0 -52
  76. data/test/field_type/test_null_type.rb +0 -41
  77. data/test/field_type/test_string_type.rb +0 -18
  78. data/test/field_type/test_timestamp_type.rb +0 -108
  79. data/test/field_type/test_unknown_type.rb +0 -35
  80. data/test/input/test_file.rb +0 -73
  81. data/test/input/test_io.rb +0 -93
  82. data/test/jsonl/test_format.rb +0 -22
  83. data/test/jsonl/test_reader.rb +0 -49
  84. data/test/jsonl/test_record.rb +0 -61
  85. data/test/jsonl/test_writer.rb +0 -86
  86. data/test/output/test_file.rb +0 -60
  87. data/test/output/test_io.rb +0 -104
  88. data/test/run +0 -23
  89. data/test/stat_type/test_nominal_stats.rb +0 -69
  90. data/test/stat_type/test_numerical_stats.rb +0 -118
  91. data/test/stat_type/test_ordinal_stats.rb +0 -92
  92. data/test/test_conversions.rb +0 -45
  93. data/test/test_event_emitter.rb +0 -89
  94. data/test/test_field_stats.rb +0 -134
  95. data/test/test_field_type.rb +0 -34
  96. data/test/test_format.rb +0 -24
  97. data/test/test_helper.rb +0 -26
  98. data/test/test_merge.rb +0 -40
  99. data/test/test_merge_tree.rb +0 -64
  100. data/test/test_version.rb +0 -11
  101. data/test/xsv/test_format.rb +0 -22
  102. data/test/xsv/test_reader.rb +0 -61
  103. data/test/xsv/test_record.rb +0 -69
  104. data/test/xsv/test_writer.rb +0 -89
@@ -1,7 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class FieldType
5
+ # Internal: Unknown type, this is what we use for unknown values in the data
6
+ #
3
7
  class UnknownType < FieldType
4
-
5
8
  REGEX = %r{\A(na|n/a|unk|unknown)\Z}i
6
9
 
7
10
  def self.type_name
@@ -9,18 +12,19 @@ module FlatKit
9
12
  end
10
13
 
11
14
  def self.matches?(data)
12
- return false unless data.kind_of?(String)
13
- return true if data.length == 0
14
- return REGEX.match?(data)
15
+ return false unless data.is_a?(String)
16
+ return true if data.empty?
17
+
18
+ REGEX.match?(data)
15
19
  end
16
20
 
17
21
  def self.coerce(data)
18
22
  return data if REGEX.match?(data)
19
- return CoerceFailure
20
- rescue
21
- return CoerceFailure
22
- end
23
23
 
24
+ CoerceFailure
25
+ rescue StandardError
26
+ CoerceFailure
27
+ end
24
28
  end
25
29
  end
26
30
  end
@@ -1,75 +1,83 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: The base class for all field types
5
+ #
2
6
  class FieldType
3
-
4
7
  extend FlatKit::DescendantTracker
5
8
 
6
9
  CoerceFailure = Class.new(::Object).freeze
7
10
 
11
+ def self.weights
12
+ @weights ||= {
13
+ # Boolean has crossover with Integer so going to let it overrule Integer
14
+ BooleanType => 5,
15
+
16
+ # Integer could potentially overlap with Float, but it is more restrictive
17
+ # so let it override Flaot
18
+ IntegerType => 4,
19
+ FloatType => 3,
20
+
21
+ # Date and Timestamps string representation shouldn't intersect with anything so
22
+ # leaving it at the same level as Null and Unkonwn
23
+ DateType => 2,
24
+ TimestampType => 2,
25
+
26
+ # Null and Unknown shoulnd't conflict since their string representations
27
+ # do not intersect
28
+ NullType => 2,
29
+ UnknownType => 2,
30
+
31
+ # Stringtype is the fallback for anything that has a string
32
+ # representation, so it should lose out on integers, floats, nulls,
33
+ # unknowns as strings
34
+ StringType => 1,
35
+
36
+ # at the bottom - since it should never match anywhere
37
+ GuessType => 0,
38
+ }
39
+ end
40
+
8
41
  def self.candidate_types(data)
9
42
  find_children(:matches?, data)
10
43
  end
11
44
 
45
+ # rubocop:disable Style/RedundantSort
46
+ # We need the stable sort, max_by(&:weight) returns the wrong one
12
47
  def self.best_guess(data)
13
- candidate_types(data).sort_by { |t| t.weight }.last
48
+ candidate_types(data).sort_by(&:weight).last
14
49
  end
50
+ # rubocop:enable Style/RedundantSort
15
51
 
16
52
  def self.type_name
17
- raise NotImplementedError, "must impleent #{self.type_name}"
53
+ raise NotImplementedError, "must impleent #{type_name}"
18
54
  end
19
55
 
20
56
  def self.matches?(data)
21
- raise NotImplementedError, "must implement #{self.name}.matches?(data)"
57
+ raise NotImplementedError, "must implement #{name}.matches?(data)"
22
58
  end
23
59
 
24
60
  def self.coerce(data)
25
- raise NotImplementedError, "must implement #{self.name}.coerce(data)"
61
+ raise NotImplementedError, "must implement #{name}.coerce(data)"
26
62
  end
27
63
 
28
64
  # Each type has a weight so if a value matches multiple types, then the list
29
65
  # can be compared to see where the tie breakers are
30
66
  #
31
- # All the weights are here so that
32
- #
67
+ # All the weights are here so that we can see the order of precedence
33
68
  #
34
69
  def self.weight
35
- # Boolean has crossover with Integer so going to let it overrule Integer
36
- return 5 if self == BooleanType
37
-
38
-
39
- # Integer could potentially overlap with Float, but it is more restrictive
40
- # so let it override Flaot
41
- return 4 if self == IntegerType
42
- return 3 if self == FloatType
43
-
44
- # Date and Timestamps string representation shouldn't intersect with anything so
45
- # leaving it at the same level as Null and Unkonwn
46
- return 2 if self == DateType
47
- return 2 if self == TimestampType
48
-
49
- # Null and Unknown shoulnd't conflict since their string representations
50
- # do not intersect
51
- return 2 if self == NullType
52
- return 2 if self == UnknownType
53
-
54
- # Stringtype is the fallback for anything that has a string
55
- # representation, so it should lose out on integers, floats, nulls,
56
- # unknowns as strings
57
- return 1 if self == StringType
58
-
59
- # at the bottom - since it should never match anywhere
60
- return 0 if self == GuessType
61
-
62
- raise NotImplementedError, "No weight assigned to type #{self} - fix immediately"
70
+ weights.fetch(self) { raise NotImplementedError, "No weight assigned to type #{self} - fix immediately" }
63
71
  end
64
72
  end
65
73
  end
66
74
 
67
- require 'flat_kit/field_type/guess_type'
68
- require 'flat_kit/field_type/boolean_type'
69
- require 'flat_kit/field_type/date_type'
70
- require 'flat_kit/field_type/timestamp_type'
71
- require 'flat_kit/field_type/integer_type'
72
- require 'flat_kit/field_type/float_type'
73
- require 'flat_kit/field_type/null_type'
74
- require 'flat_kit/field_type/string_type'
75
- require 'flat_kit/field_type/unknown_type'
75
+ require "flat_kit/field_type/guess_type"
76
+ require "flat_kit/field_type/boolean_type"
77
+ require "flat_kit/field_type/date_type"
78
+ require "flat_kit/field_type/timestamp_type"
79
+ require "flat_kit/field_type/integer_type"
80
+ require "flat_kit/field_type/float_type"
81
+ require "flat_kit/field_type/null_type"
82
+ require "flat_kit/field_type/string_type"
83
+ require "flat_kit/field_type/unknown_type"
@@ -1,9 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: The base class of all data file format classes
5
+ #
2
6
  class Format
3
7
  extend DescendantTracker
4
8
 
5
9
  def self.format_name
6
- raise NotImplementedError, "#{self.class} must implemente #{self.class}.format_name"
10
+ raise NotImplementedError, "#{self.class} must implement #{self.class}.format_name"
7
11
  end
8
12
 
9
13
  def format_name
@@ -20,15 +24,17 @@ module FlatKit
20
24
  return format unless format.nil?
21
25
 
22
26
  # now try the fallback
23
- format = ::FlatKit::Format.for(fallback)
24
- return format
27
+ ::FlatKit::Format.for(fallback)
25
28
  end
26
29
 
27
30
  def self.for_with_fallback!(path:, fallback: "auto")
28
31
  format = for_with_fallback(path: path, fallback: fallback)
29
- raise ::FlatKit::Error::UnknownFormat, "Unable to figure out format for '#{path}' with fallback '#{fallback}'" if format.nil?
32
+ if format.nil?
33
+ raise ::FlatKit::Error::UnknownFormat,
34
+ "Unable to figure out format for '#{path}' with fallback '#{fallback}'"
35
+ end
30
36
 
31
- return format
37
+ format
32
38
  end
33
39
  end
34
40
  end
@@ -1,25 +1,31 @@
1
- require 'zlib'
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+ require "pathname"
2
5
 
3
6
  module FlatKit
4
7
  class Input
8
+ # Internal: Handler for file based input
9
+ #
5
10
  class File < Input
6
- attr_reader :path
7
- attr_reader :count
11
+ attr_reader :path, :count, :io
8
12
 
9
13
  def self.handles?(obj)
10
14
  return true if obj.instance_of?(Pathname)
11
15
  return false unless obj.instance_of?(String)
12
16
 
13
17
  # incase these get loaded in different orders
14
- return false if ::FlatKit::Input::IO.is_stdin?(obj)
18
+ return false if ::FlatKit::Input::IO.stdin?(obj)
15
19
 
16
- return true
20
+ true
17
21
  end
18
22
 
19
23
  def initialize(obj)
24
+ super()
20
25
  @count = 0
21
26
  @path = Pathname.new(obj)
22
27
  raise FlatKit::Error, "Input #{obj} is not readable" unless @path.readable?
28
+
23
29
  @io = open_input(path)
24
30
  end
25
31
 
@@ -31,10 +37,6 @@ module FlatKit
31
37
  @io.close
32
38
  end
33
39
 
34
- def io
35
- @io
36
- end
37
-
38
40
  private
39
41
 
40
42
  # open the opropriate input type depending on the source file name
@@ -1,35 +1,38 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class Input
5
+ # Internal: Handler for non-filebased input. Generally this is just stdin
6
+ #
3
7
  class IO < Input
4
- STDINS = %w[ stdin STDIN - <stdin> ]
8
+ STDINS = %w[stdin STDIN - <stdin>].freeze
5
9
 
6
10
  def self.handles?(obj)
7
- return true if is_stdin?(obj)
8
- return true if [ ::File, ::StringIO, ::IO ].any? { |klass| obj.kind_of?(klass) }
9
- return false
11
+ return true if stdin?(obj)
12
+ return true if [::File, ::StringIO, ::IO].any? { |klass| obj.is_a?(klass) }
13
+
14
+ false
10
15
  end
11
16
 
12
- def self.is_stdin?(obj)
17
+ def self.stdin?(obj)
13
18
  case obj
14
19
  when String
15
20
  return true if STDINS.include?(obj)
16
21
  when ::IO
17
- return true if obj == ::STDIN
22
+ return true if obj == $stdin
18
23
  end
19
- return false
24
+ false
20
25
  end
21
26
 
22
27
  def initialize(obj)
23
- if self.class.is_stdin?(obj) then
28
+ super()
29
+ if self.class.stdin?(obj)
24
30
  @name = "<STDIN>"
25
31
  @io = $stdin
26
- elsif obj.kind_of?(::File) then
27
- @name = obj.path
28
- @io = obj
29
- elsif obj.kind_of?(::StringIO) then
30
- @name = obj.inspect
32
+ elsif obj.is_a?(::IO)
33
+ @name = (obj.respond_to?(:path) && obj.path) || obj.inspect
31
34
  @io = obj
32
- elsif obj.kind_of?(::IO) then
35
+ elsif obj.is_a?(::StringIO)
33
36
  @name = obj.inspect
34
37
  @io = obj
35
38
  else
@@ -37,18 +40,12 @@ module FlatKit
37
40
  end
38
41
  end
39
42
 
40
- def name
41
- @name
42
- end
43
+ attr_reader :name, :io
43
44
 
44
45
  # this goes to an io stream and we are not in charge of opening it
45
46
  def close
46
47
  @io.close
47
48
  end
48
-
49
- def io
50
- @io
51
- end
52
49
  end
53
50
  end
54
51
  end
@@ -1,14 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Base class of all input handlers
5
+ #
2
6
  class Input
3
7
  extend DescendantTracker
4
8
 
5
9
  def self.from(input)
6
- return input if input.kind_of?(::FlatKit::Input)
10
+ return input if input.is_a?(::FlatKit::Input)
7
11
 
8
12
  in_klass = find_child(:handles?, input)
9
- if in_klass then
10
- return in_klass.new(input)
11
- end
13
+ return in_klass.new(input) if in_klass
12
14
 
13
15
  raise FlatKit::Error, "Unable to create input from #{input.class} : #{input.inspect}"
14
16
  end
@@ -17,7 +19,6 @@ module FlatKit
17
19
  raise NotImplementedError, "#{self.class} must implement #name"
18
20
  end
19
21
 
20
- #
21
22
  def io
22
23
  raise NotImplementedError, "#{self.class} must implement #io"
23
24
  end
@@ -28,5 +29,5 @@ module FlatKit
28
29
  end
29
30
  end
30
31
 
31
- require 'flat_kit/input/io'
32
- require 'flat_kit/input/file'
32
+ require "flat_kit/input/io"
33
+ require "flat_kit/input/file"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: This is a class used internally by MergeTree and should not be used
3
5
  # outside of that context.
@@ -10,22 +12,24 @@ module FlatKit
10
12
  # here.
11
13
  #
12
14
  class InternalNode
13
-
14
15
  include Comparable
15
16
 
16
- attr_accessor :left # Internal Node
17
- attr_accessor :right # Internal Node
18
- attr_accessor :winner # Internal Node
19
- attr_accessor :next_level # Who to tell
20
- attr_accessor :leaf # winning leaf node
17
+ # Internal Nodes
18
+ attr_accessor :left, :right, :winner
19
+
20
+ # Who to tell
21
+ attr_accessor :next_level
22
+
23
+ # winning leaf node
24
+ attr_accessor :leaf
21
25
 
22
26
  def initialize(left:, right:)
23
- @left = left
27
+ @left = left
24
28
  @left.next_level = self
25
29
 
26
- @right = right
30
+ @right = right
27
31
  @right.next_level = self
28
- @next_level = nil
32
+ @next_level = nil
29
33
 
30
34
  play
31
35
  end
@@ -53,32 +57,31 @@ module FlatKit
53
57
  # from the tree.
54
58
  #
55
59
  def player_finished(node)
56
- if left.object_id == node.object_id then
60
+ if left.equal?(node)
57
61
  @left = SentinelInternalNode.new
58
62
  @left.next_level = self
59
- elsif right.object_id == node.object_id then
63
+ elsif right.equal?(node)
60
64
  @right = SentinelInternalNode.new
61
65
  @right.next_level = self
62
66
  else
63
67
  raise FlatKit::Error, "Unknown player #{node}"
64
68
  end
65
69
 
66
- if @right.sentinel? && @left.sentinel? then
67
- next_level.player_finished(self) if next_level
68
- end
70
+ return unless @right.sentinel? && @left.sentinel?
71
+
72
+ next_level.player_finished(self) if next_level
69
73
  end
70
74
 
71
75
  def play
72
- @winner = left <= right ? left : right
73
- if !@winner.sentinel? then
74
- @leaf = winner.leaf
75
- end
76
+ @winner = (left <= right) ? left : right
77
+ @leaf = winner.leaf unless @winner.sentinel?
76
78
  next_level.play if next_level
77
79
  end
78
80
 
79
81
  def <=>(other)
80
82
  return -1 if other.sentinel?
81
- value.<=>(other.value)
83
+
84
+ value <=> (other.value)
82
85
  end
83
86
  end
84
87
  end
@@ -1,5 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  module Jsonl
5
+ # Internal: JSONL format class holding the metadata about the JSONL format
6
+ #
3
7
  class Format < ::FlatKit::Format
4
8
  def self.format_name
5
9
  "jsonl"
@@ -7,10 +11,10 @@ module FlatKit
7
11
 
8
12
  def self.handles?(filename)
9
13
  parts = filename.split(".")
10
- %w[ json jsonl ndjson ].each do |ext|
14
+ %w[json jsonl ndjson].each do |ext|
11
15
  return true if parts.include?(ext)
12
16
  end
13
- return false
17
+ false
14
18
  end
15
19
 
16
20
  def self.reader
@@ -1,8 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  module Jsonl
5
+ # Internal: Reader class that parses and yields records from JSONL files
6
+ #
3
7
  class Reader < ::FlatKit::Reader
4
- attr_reader :input
5
- attr_reader :count
8
+ attr_reader :input, :count
6
9
 
7
10
  def self.format_name
8
11
  ::FlatKit::Jsonl::Format.format_name
@@ -15,13 +18,13 @@ module FlatKit
15
18
  end
16
19
 
17
20
  def each
18
- while line = input.io.gets do
21
+ while (line = input.io.gets)
19
22
  record = ::FlatKit::Jsonl::Record.new(data: line, compare_fields: compare_fields)
20
23
  @count += 1
21
24
  yield record
22
25
  end
23
26
  input.close
24
- rescue => e
27
+ rescue StandardError => e
25
28
  ::FlatKit.logger.error "Error reading jsonl records from #{input.name}: #{e}"
26
29
  raise ::FlatKit::Error, e
27
30
  end
@@ -1,8 +1,12 @@
1
- require 'oj'
2
- require 'flat_kit/record'
1
+ # frozen_string_literal: true
2
+
3
+ require "oj"
4
+ require "flat_kit/record"
3
5
 
4
6
  module FlatKit
5
7
  module Jsonl
8
+ # Internal: Class that exposes data from a JSONL format record to the flatkit api
9
+ #
6
10
  class Record < ::FlatKit::Record
7
11
  attr_reader :compare_data
8
12
 
@@ -11,7 +15,7 @@ module FlatKit
11
15
  end
12
16
 
13
17
  def self.from_record(record)
14
- if record.instance_of?(FlatKit::Jsonl::Record) then
18
+ if record.instance_of?(FlatKit::Jsonl::Record)
15
19
 
16
20
  structured = record.complete_structured_data? ? record.complete_structured_data : nil
17
21
 
@@ -25,22 +29,20 @@ module FlatKit
25
29
  end
26
30
 
27
31
  def initialize(data:, compare_fields: :none,
28
- compare_data: Hash.new,
32
+ compare_data: {},
29
33
  complete_structured_data: nil)
30
34
  super(data: data, compare_fields: compare_fields)
31
35
 
32
36
  @complete_structured_data = complete_structured_data
33
37
 
34
- if complete_structured_data? && (compare_data.nil? || compare_data.empty?) then
35
- @compare_data = complete_structured_data
36
- else
37
- @compare_data = compare_data
38
- end
38
+ @compare_data = if complete_structured_data? && (compare_data.nil? || compare_data.empty?)
39
+ complete_structured_data
40
+ else
41
+ compare_data
42
+ end
39
43
 
40
44
  # only load compare data if it dosn't exist
41
- if data && compare_data.empty? then
42
- quick_parse
43
- end
45
+ quick_parse if data && compare_data.empty?
44
46
  end
45
47
 
46
48
  def [](key)
@@ -59,9 +61,7 @@ module FlatKit
59
61
  # overriding parent accessor since we may be initialized without raw bytes
60
62
  # to parse
61
63
  def data
62
- if @data.nil? && complete_structured_data? then
63
- @data = Oj.dump(complete_structured_data, mode: :json)
64
- end
64
+ @data = Oj.dump(complete_structured_data, mode: :json) if @data.nil? && complete_structured_data?
65
65
  @data
66
66
  end
67
67
  alias to_s data
@@ -79,6 +79,3 @@ module FlatKit
79
79
  end
80
80
  end
81
81
  end
82
-
83
-
84
-
@@ -1,15 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  module Jsonl
5
+ # Internal: Class that writes flatkit records to JSONL files
6
+ #
3
7
  class Writer < ::FlatKit::Writer
4
-
5
8
  def self.format_name
6
9
  ::FlatKit::Jsonl::Format.format_name
7
10
  end
8
11
 
9
- def initialize(destination:)
10
- super
11
- end
12
-
13
12
  # write the record and return the Position the record was written
14
13
  #
15
14
  def write(record)
@@ -22,10 +21,10 @@ module FlatKit
22
21
  else
23
22
  raise FlatKit::Error, "Unable to write records of type #{record.class}"
24
23
  end
25
- rescue FlatKit::Error => fe
26
- raise fe
27
- rescue => e
28
- ::FlatKit.logger.error "Error reading jsonl records from #{output.name}: #{e}"
24
+ rescue FlatKit::Error => e
25
+ raise e
26
+ rescue StandardError => e
27
+ ::FlatKit.logger.error "Error writing jsonl records to #{output.name}: #{e}"
29
28
  raise ::FlatKit::Error, e
30
29
  end
31
30
 
@@ -47,7 +46,6 @@ module FlatKit
47
46
  @last_position = ::FlatKit::Position.new(index: record_index,
48
47
  offset: start_offset,
49
48
  bytesize: bytes_written)
50
-
51
49
  end
52
50
  end
53
51
  end
@@ -1,8 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Top level namespace for the newline-oriented JSON format
5
+ #
2
6
  module Jsonl
3
7
  end
4
8
  end
5
- require 'flat_kit/jsonl/record'
6
- require 'flat_kit/jsonl/reader'
7
- require 'flat_kit/jsonl/writer'
8
- require 'flat_kit/jsonl/format'
9
+ require "flat_kit/jsonl/record"
10
+ require "flat_kit/jsonl/reader"
11
+ require "flat_kit/jsonl/writer"
12
+ require "flat_kit/jsonl/format"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: The LeafNode is a wrapper around a Reader object to enable
3
5
  # a consistent api for use in the MergeTree
@@ -9,11 +11,9 @@ module FlatKit
9
11
  # If all the data is used up from the reader, it also notifies the next level
10
12
  # of that so the next level can remove it from the tree.
11
13
  class LeafNode
12
-
13
14
  include Comparable
14
15
 
15
- attr_reader :reader
16
- attr_reader :value
16
+ attr_reader :reader, :value
17
17
 
18
18
  attr_accessor :next_level
19
19
 
@@ -43,7 +43,7 @@ module FlatKit
43
43
 
44
44
  def update_and_replay
45
45
  self.next
46
- if finished? then
46
+ if finished?
47
47
  ::FlatKit.logger.debug "#{reader.source} has finished reading #{reader.count} records"
48
48
  next_level.player_finished(self)
49
49
  end
@@ -65,7 +65,8 @@ module FlatKit
65
65
 
66
66
  def <=>(other)
67
67
  return -1 if other.sentinel?
68
- self.value.<=>(other.value)
68
+
69
+ value <=> (other.value)
69
70
  end
70
71
  end
71
72
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "logger"
4
+
5
+ module FlatKit
6
+ # Internal: Log formatting class for FlatKit
7
+ #
8
+ class LogFormatter < ::Logger::Formatter
9
+ FORMAT = "%s %5d %05s : %s\n"
10
+ DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
11
+ def initialize
12
+ super
13
+ self.datetime_format = DATETIME_FORMAT
14
+ end
15
+
16
+ def call(severity, time, _progname, msg)
17
+ format(FORMAT, format_datetime(time.utc), Process.pid, severity, msg2str(msg))
18
+ end
19
+ end
20
+ end