flat_kit 0.3.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +13 -0
  4. data/Manifest.txt +3 -42
  5. data/README.md +2 -0
  6. data/{bin → exe}/fk +2 -1
  7. data/flat_kit.gemspec +33 -0
  8. data/lib/flat_kit/cli.rb +46 -32
  9. data/lib/flat_kit/command/cat.rb +34 -32
  10. data/lib/flat_kit/command/merge.rb +37 -36
  11. data/lib/flat_kit/command/sort.rb +37 -37
  12. data/lib/flat_kit/command/stats.rb +41 -39
  13. data/lib/flat_kit/command.rb +10 -11
  14. data/lib/flat_kit/descendant_tracker.rb +9 -6
  15. data/lib/flat_kit/error.rb +4 -0
  16. data/lib/flat_kit/event_emitter.rb +6 -3
  17. data/lib/flat_kit/field_stats.rb +31 -26
  18. data/lib/flat_kit/field_type/boolean_type.rb +9 -5
  19. data/lib/flat_kit/field_type/date_type.rb +19 -17
  20. data/lib/flat_kit/field_type/float_type.rb +15 -9
  21. data/lib/flat_kit/field_type/guess_type.rb +9 -6
  22. data/lib/flat_kit/field_type/integer_type.rb +6 -4
  23. data/lib/flat_kit/field_type/null_type.rb +5 -1
  24. data/lib/flat_kit/field_type/string_type.rb +8 -6
  25. data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
  26. data/lib/flat_kit/field_type/unknown_type.rb +12 -8
  27. data/lib/flat_kit/field_type.rb +52 -44
  28. data/lib/flat_kit/format.rb +11 -5
  29. data/lib/flat_kit/input/file.rb +11 -9
  30. data/lib/flat_kit/input/io.rb +18 -21
  31. data/lib/flat_kit/input.rb +8 -7
  32. data/lib/flat_kit/internal_node.rb +22 -19
  33. data/lib/flat_kit/jsonl/format.rb +6 -2
  34. data/lib/flat_kit/jsonl/reader.rb +7 -4
  35. data/lib/flat_kit/jsonl/record.rb +15 -18
  36. data/lib/flat_kit/jsonl/writer.rb +8 -10
  37. data/lib/flat_kit/jsonl.rb +8 -4
  38. data/lib/flat_kit/leaf_node.rb +6 -5
  39. data/lib/flat_kit/log_formatter.rb +20 -0
  40. data/lib/flat_kit/logger.rb +13 -21
  41. data/lib/flat_kit/merge.rb +21 -18
  42. data/lib/flat_kit/merge_tree.rb +5 -6
  43. data/lib/flat_kit/output/file.rb +13 -9
  44. data/lib/flat_kit/output/io.rb +40 -35
  45. data/lib/flat_kit/output.rb +8 -7
  46. data/lib/flat_kit/position.rb +3 -4
  47. data/lib/flat_kit/reader.rb +8 -8
  48. data/lib/flat_kit/record.rb +12 -12
  49. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  50. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  51. data/lib/flat_kit/sort.rb +8 -13
  52. data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
  53. data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
  54. data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
  55. data/lib/flat_kit/stat_type.rb +18 -13
  56. data/lib/flat_kit/stats.rb +12 -15
  57. data/lib/flat_kit/writer.rb +5 -6
  58. data/lib/flat_kit/xsv/format.rb +6 -2
  59. data/lib/flat_kit/xsv/reader.rb +8 -6
  60. data/lib/flat_kit/xsv/record.rb +22 -18
  61. data/lib/flat_kit/xsv/writer.rb +13 -10
  62. data/lib/flat_kit/xsv.rb +7 -4
  63. data/lib/flat_kit.rb +31 -26
  64. metadata +20 -161
  65. data/Rakefile +0 -21
  66. data/examples/stream-active-record-to-csv.rb +0 -42
  67. data/tasks/default.rake +0 -242
  68. data/tasks/extension.rake +0 -38
  69. data/tasks/man.rake +0 -7
  70. data/tasks/this.rb +0 -208
  71. data/test/device_dataset.rb +0 -117
  72. data/test/field_type/test_boolean_type.rb +0 -65
  73. data/test/field_type/test_date_type.rb +0 -71
  74. data/test/field_type/test_float_type.rb +0 -56
  75. data/test/field_type/test_guess_type.rb +0 -14
  76. data/test/field_type/test_integer_type.rb +0 -52
  77. data/test/field_type/test_null_type.rb +0 -41
  78. data/test/field_type/test_string_type.rb +0 -18
  79. data/test/field_type/test_timestamp_type.rb +0 -108
  80. data/test/field_type/test_unknown_type.rb +0 -35
  81. data/test/input/test_file.rb +0 -73
  82. data/test/input/test_io.rb +0 -93
  83. data/test/jsonl/test_format.rb +0 -22
  84. data/test/jsonl/test_reader.rb +0 -49
  85. data/test/jsonl/test_record.rb +0 -61
  86. data/test/jsonl/test_writer.rb +0 -86
  87. data/test/output/test_file.rb +0 -60
  88. data/test/output/test_io.rb +0 -104
  89. data/test/run +0 -23
  90. data/test/stat_type/test_nominal_stats.rb +0 -69
  91. data/test/stat_type/test_numerical_stats.rb +0 -118
  92. data/test/stat_type/test_ordinal_stats.rb +0 -92
  93. data/test/test_conversions.rb +0 -45
  94. data/test/test_event_emitter.rb +0 -89
  95. data/test/test_field_stats.rb +0 -134
  96. data/test/test_field_type.rb +0 -34
  97. data/test/test_format.rb +0 -24
  98. data/test/test_helper.rb +0 -26
  99. data/test/test_merge.rb +0 -40
  100. data/test/test_merge_tree.rb +0 -64
  101. data/test/test_version.rb +0 -11
  102. data/test/xsv/test_format.rb +0 -22
  103. data/test/xsv/test_reader.rb +0 -61
  104. data/test/xsv/test_record.rb +0 -69
  105. data/test/xsv/test_writer.rb +0 -89
@@ -1,36 +1,28 @@
1
- require 'logger'
1
+ # frozen_string_literal: true
2
2
 
3
- module FlatKit
4
- class LogFormatter < ::Logger::Formatter
5
- FORMAT = "%s %5d %05s : %s\n".freeze
6
- DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ".freeze
7
- def initialize
8
- super
9
- self.datetime_format = DATETIME_FORMAT
10
- end
11
-
12
- def call(severity, time, progname, msg)
13
- FORMAT % [format_datetime(time.utc), Process.pid, severity, msg2str(msg)]
14
- end
15
- end
3
+ require "logger"
16
4
 
5
+ # Public: Top level namespace for the gem
6
+ #
7
+ module FlatKit
8
+ # Internal: Logger class
9
+ #
17
10
  class Logger
18
11
  def self.for_io(io)
19
12
  ::Logger.new(io, formatter: LogFormatter.new)
20
13
  end
21
14
 
22
15
  def self.for_path(path)
23
- io = File.open(path.to_s, "a")
24
- for_io(io)
16
+ for_io(File.open(path.to_s, "a"))
25
17
  end
26
18
  end
27
19
 
28
20
  def self.log_to(destination = $stderr)
29
- if destination.kind_of?(::IO) then
30
- @logger = ::FlatKit::Logger.for_io(destination)
31
- else
32
- @logger = ::FlatKit::Logger.for_path(destination)
33
- end
21
+ @logger = if destination.is_a?(::IO)
22
+ ::FlatKit::Logger.for_io(destination)
23
+ else
24
+ ::FlatKit::Logger.for_path(destination)
25
+ end
34
26
  end
35
27
 
36
28
  def self.logger
@@ -1,15 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Class implementing merging from N inputs and output to 1 output.
5
+ #
2
6
  class Merge
3
-
4
7
  include ::FlatKit::EventEmitter
5
8
 
6
- attr_reader :readers
7
- attr_reader :writer
8
- attr_reader :compare_fields
9
+ attr_reader :readers, :writer, :compare_fields
9
10
 
10
- def initialize(inputs:, input_fallback: "auto",
11
- output:, output_fallback: "auto",
12
- compare_fields:)
11
+ def initialize(inputs:, output:, compare_fields:, input_fallback: "auto", output_fallback: "auto")
13
12
  @compare_fields = compare_fields
14
13
  @readers = ::FlatKit::Reader.create_readers_from_paths(paths: inputs, compare_fields: @compare_fields,
15
14
  fallback: input_fallback)
@@ -19,21 +18,12 @@ module FlatKit
19
18
 
20
19
  def call
21
20
  ::FlatKit.logger.debug "Merging the following files into #{writer.destination}"
22
- ::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(", ")}"
21
+ ::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(', ')}"
23
22
  readers.each do |r|
24
23
  ::FlatKit.logger.debug " #{r.source}"
25
24
  end
26
25
 
27
- merge_tree = ::FlatKit::MergeTree.new(readers)
28
-
29
- notify_listeners(name: :start, data: :start)
30
- merge_tree.each do |record|
31
-
32
- position = writer.write(record)
33
- meta = { position: position }
34
- notify_listeners(name: :record, data: record, meta: meta)
35
- end
36
- notify_listeners(name: :stop, data: :stop)
26
+ run_merge(readers)
37
27
 
38
28
  readers.each do |r|
39
29
  ::FlatKit.logger.debug " #{r.source} produced #{r.count} records"
@@ -42,5 +32,18 @@ module FlatKit
42
32
  writer.close
43
33
  ::FlatKit.logger.debug "Wrote #{writer.count} records to #{writer.destination}"
44
34
  end
35
+
36
+ private
37
+
38
+ def run_merge(readers)
39
+ tree = ::FlatKit::MergeTree.new(readers)
40
+ notify_listeners(name: :start, data: :start)
41
+ tree.each do |record|
42
+ position = writer.write(record)
43
+ meta = { position: position }
44
+ notify_listeners(name: :record, data: record, meta: meta)
45
+ end
46
+ notify_listeners(name: :stop, data: :stop)
47
+ end
45
48
  end
46
49
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: Merge a list of sorted records from Readers into a single output Writer
3
5
  #
@@ -29,9 +31,7 @@ module FlatKit
29
31
  class MergeTree
30
32
  include Enumerable
31
33
 
32
- attr_reader :leaves
33
- attr_reader :levels
34
- attr_reader :readers
34
+ attr_reader :leaves, :levels, :readers
35
35
 
36
36
  def initialize(readers)
37
37
  @readers = readers
@@ -44,9 +44,7 @@ module FlatKit
44
44
 
45
45
  # Need to pad the leaves to an even number so that the slicing by 2 for
46
46
  # the tournament will work
47
- if @leaves.size.odd? then
48
- @leaves << SentinelLeafNode.new
49
- end
47
+ @leaves << SentinelLeafNode.new if @leaves.size.odd?
50
48
 
51
49
  init_tree
52
50
  end
@@ -94,6 +92,7 @@ module FlatKit
94
92
  def each
95
93
  loop do
96
94
  break if root.leaf.finished?
95
+
97
96
  yield root.value
98
97
  # consume the yielded value and have the tournament tree replay those
99
98
  # brackets affected
@@ -1,22 +1,31 @@
1
- require 'zlib'
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+ require "pathname"
2
5
 
3
6
  module FlatKit
4
7
  class Output
8
+ # Internal: File output implementation
9
+ #
5
10
  class File < Output
6
11
  attr_reader :path
7
12
 
13
+ # internal api method for testing purposes
14
+ attr_reader :io
15
+
8
16
  def self.handles?(obj)
9
17
  return true if obj.instance_of?(Pathname)
10
18
  return false unless obj.instance_of?(String)
11
19
 
12
20
  # incase these get loaded in different orders
13
- return false if ::FlatKit::Output::IO.is_stdout?(obj)
14
- return false if ::FlatKit::Output::IO.is_stderr?(obj)
21
+ return false if ::FlatKit::Output::IO.stdout?(obj)
22
+ return false if ::FlatKit::Output::IO.stderr?(obj)
15
23
 
16
- return true
24
+ true
17
25
  end
18
26
 
19
27
  def initialize(obj)
28
+ super()
20
29
  @path = Pathname.new(obj)
21
30
  path.dirname.mkpath
22
31
  @io = open_output(path)
@@ -30,11 +39,6 @@ module FlatKit
30
39
  @io.close
31
40
  end
32
41
 
33
- # internal api method for testing purposes
34
- def io
35
- @io
36
- end
37
-
38
42
  private
39
43
 
40
44
  # open the opropriate otuput type depending on the destination file name
@@ -1,73 +1,78 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class Output
5
+ # Internal: Non-file Output impelementation - this is genrally to stdout or stderr
6
+ #
3
7
  class IO < Output
4
- attr_reader :count
8
+ attr_reader :count, :name
9
+
10
+ # internal api method for testing
11
+ attr_reader :io
5
12
 
6
- STDOUTS = %w[ stdout STDOUT - <stdout> ]
7
- STDERRS = %w[ stderr STDERR <stderr> ]
13
+ STDOUTS = %w[stdout STDOUT - <stdout>].freeze
14
+ STDERRS = %w[stderr STDERR <stderr>].freeze
8
15
 
9
16
  def self.handles?(obj)
10
- return true if is_stderr?(obj)
11
- return true if is_stdout?(obj)
12
- return true if [ ::File, ::StringIO, ::IO ].any? { |klass| obj.kind_of?(klass) }
13
- return false
17
+ return true if stderr?(obj)
18
+ return true if stdout?(obj)
19
+ return true if [::File, ::StringIO, ::IO].any? { |klass| obj.is_a?(klass) }
20
+
21
+ false
14
22
  end
15
23
 
16
- def self.is_stderr?(obj)
24
+ def self.stderr?(obj)
17
25
  case obj
18
26
  when String
19
27
  return true if STDERRS.include?(obj)
20
28
  when ::IO
21
- return true if obj == ::STDERR
29
+ return true if obj == $stderr
22
30
  end
23
- return false
31
+ false
24
32
  end
25
33
 
26
- def self.is_stdout?(obj)
34
+ def self.stdout?(obj)
27
35
  case obj
28
36
  when String
29
37
  return true if STDOUTS.include?(obj)
30
38
  when ::IO
31
- return true if obj == ::STDOUT
39
+ return true if obj == $stdout
32
40
  end
33
- return false
41
+ false
34
42
  end
35
43
 
36
44
  def initialize(obj)
45
+ super()
37
46
  @count = 0
38
- if self.class.is_stdout?(obj) then
47
+ @name = nil
48
+ @io = nil
49
+ init_name_and_io(obj)
50
+ end
51
+
52
+ # this goes to an io stream and we are not in charge of opening it
53
+ def close
54
+ @io.close
55
+ end
56
+
57
+ private
58
+
59
+ def init_name_and_io(obj)
60
+ if self.class.stdout?(obj)
39
61
  @name = "<STDOUT>"
40
62
  @io = $stdout
41
- elsif self.class.is_stderr?(obj) then
63
+ elsif self.class.stderr?(obj)
42
64
  @name = "<STDERR>"
43
65
  @io = $stderr
44
- elsif obj.kind_of?(::File) then
45
- @name = obj.path
66
+ elsif obj.is_a?(::IO)
67
+ @name = (obj.respond_to?(:path) && obj.path) || obj.inspect
46
68
  @io = obj
47
- elsif obj.kind_of?(::StringIO) then
48
- @name = obj.inspect
49
- @io = obj
50
- elsif obj.kind_of?(::IO) then
69
+ elsif obj.is_a?(::StringIO)
51
70
  @name = obj.inspect
52
71
  @io = obj
53
72
  else
54
73
  raise ::FlatKit::Error, "Unable to create #{self.class} from #{obj.class} : #{obj.inspect}"
55
74
  end
56
75
  end
57
-
58
- def name
59
- @name
60
- end
61
-
62
- # this goes to an io stream and we are not in charge of opening it
63
- def close
64
- @io.close
65
- end
66
-
67
- # internal api method for testing
68
- def io
69
- @io
70
- end
71
76
  end
72
77
  end
73
78
  end
@@ -1,14 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Base clases for all output handlers
5
+ #
2
6
  class Output
3
7
  extend DescendantTracker
4
8
 
5
9
  def self.from(out)
6
- return out if out.kind_of?(::FlatKit::Output)
10
+ return out if out.is_a?(::FlatKit::Output)
7
11
 
8
12
  out_klass = find_child(:handles?, out)
9
- if out_klass then
10
- return out_klass.new(out)
11
- end
13
+ return out_klass.new(out) if out_klass
12
14
 
13
15
  raise FlatKit::Error, "Unable to create output from #{out.class} : #{out.inspect}"
14
16
  end
@@ -17,7 +19,6 @@ module FlatKit
17
19
  raise NotImplementedError, "#{self.class} must implement #name"
18
20
  end
19
21
 
20
- #
21
22
  def io
22
23
  raise NotImplementedError, "#{self.class} must implement #io"
23
24
  end
@@ -32,5 +33,5 @@ module FlatKit
32
33
  end
33
34
  end
34
35
 
35
- require 'flat_kit/output/io'
36
- require 'flat_kit/output/file'
36
+ require "flat_kit/output/io"
37
+ require "flat_kit/output/file"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # The information about the position of a record in an IO stream
3
5
  #
@@ -5,10 +7,7 @@ module FlatKit
5
7
  # information about the record that was just written
6
8
  #
7
9
  class Position
8
-
9
- attr_reader :index # zero based
10
- attr_reader :offset # byte offset in the IO stream
11
- attr_reader :bytesize # byte length of the record
10
+ attr_reader :index, :offset, :bytesize # zero based # byte offset in the IO stream # byte length of the record
12
11
 
13
12
  def initialize(index: nil, offset: nil, bytesize: nil)
14
13
  @index = index
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: the base class for all format readers.
3
5
  #
@@ -14,24 +16,21 @@ module FlatKit
14
16
  # API:
15
17
  #
16
18
  # initialize(source:, compare_fields:)
17
- # each -> Yields / returns
19
+ # each -> Yields / returns
18
20
  #
19
21
  class Reader
20
22
  include Enumerable
21
23
 
22
- attr_reader :source
23
- attr_reader :compare_fields
24
+ attr_reader :source, :compare_fields
24
25
 
25
26
  def self.create_reader_from_path(path: "-", fallback: "auto", compare_fields: :none)
26
27
  format = ::FlatKit::Format.for_with_fallback!(path: path, fallback: fallback)
27
- return format.reader.new(source: path, compare_fields: compare_fields)
28
+ format.reader.new(source: path, compare_fields: compare_fields)
28
29
  end
29
30
 
30
31
  def self.create_readers_from_paths(paths:, fallback: "auto", compare_fields: :none)
31
32
  # default to stdin if there are no paths
32
- if paths.empty? then
33
- paths << "-"
34
- end
33
+ paths << "-" if paths.empty?
35
34
 
36
35
  paths.map do |path|
37
36
  create_reader_from_path(path: path, fallback: fallback, compare_fields: compare_fields)
@@ -55,7 +54,8 @@ module FlatKit
55
54
 
56
55
  def resolve_compare_fields(value)
57
56
  return [] if value == :none
58
- return value
57
+
58
+ value
59
59
  end
60
60
  end
61
61
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: The base class that all record classes should inherit from.
3
5
  #
@@ -35,11 +37,9 @@ module FlatKit
35
37
  # # the initialize method must call super(data:, compare_fields:) to
36
38
  # initializa the root data structures
37
39
  class Record
38
-
39
40
  include Comparable
40
41
 
41
- attr_reader :data
42
- attr_reader :compare_fields
42
+ attr_reader :data, :compare_fields
43
43
 
44
44
  def initialize(data:, compare_fields:)
45
45
  @data = data
@@ -57,15 +57,15 @@ module FlatKit
57
57
  my_val = self[field]
58
58
  other_val = other[field]
59
59
 
60
- if my_val.nil? && other_val.nil? then
61
- compare_result = 0
62
- elsif my_val.nil?
63
- compare_result = -1
64
- elsif other_val.nil?
65
- compare_result = 1
66
- else
67
- compare_result = my_val.<=>(other_val)
68
- end
60
+ compare_result = if my_val.nil? && other_val.nil?
61
+ 0
62
+ elsif my_val.nil?
63
+ -1
64
+ elsif other_val.nil?
65
+ 1
66
+ else
67
+ my_val <=> other_val
68
+ end
69
69
 
70
70
  return compare_result unless compare_result.zero?
71
71
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: The Sentinel Internal Node is a private class used by the MergeTree
3
5
  # class.
@@ -8,12 +10,10 @@ module FlatKit
8
10
  class SentinelInternalNode
9
11
  include Comparable
10
12
 
11
- attr_reader :left
12
- attr_reader :right
13
- attr_reader :winner
13
+ attr_reader :left, :right, :winner
14
14
  attr_accessor :next_level
15
15
 
16
- def initialize(left: nil, right: nil)
16
+ def initialize(*)
17
17
  @left = nil
18
18
  @right = nil
19
19
  @winner = nil
@@ -31,7 +31,8 @@ module FlatKit
31
31
  # A sentinal node is always greater than any other node
32
32
  def <=>(other)
33
33
  return 0 if other.sentinel?
34
- return 1
34
+
35
+ 1
35
36
  end
36
37
  end
37
38
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: The Sentinel Leaf Node is used internally by the MergeTree
3
5
  #
@@ -31,7 +33,8 @@ module FlatKit
31
33
  # A sentinal node is always greater than any other node
32
34
  def <=>(other)
33
35
  return 0 if other.sentinel?
34
- return 1
36
+
37
+ 1
35
38
  end
36
39
  end
37
40
  end
data/lib/flat_kit/sort.rb CHANGED
@@ -1,13 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Sorts an Input and sends the sorted records to an Output
5
+ #
2
6
  class Sort
3
- attr_reader :reader
4
- attr_reader :writer
5
- attr_reader :compare_fields
6
-
7
- def initialize(input:, input_fallback: "auto",
8
- output:, output_fallback: "auto",
9
- compare_fields:)
7
+ attr_reader :reader, :writer, :compare_fields
10
8
 
9
+ def initialize(input:, output:, compare_fields:, input_fallback: "auto", output_fallback: "auto")
11
10
  @compare_fields = compare_fields
12
11
  @reader = ::FlatKit::Reader.create_reader_from_path(path: input, compare_fields: @compare_fields,
13
12
  fallback: input_fallback)
@@ -16,12 +15,8 @@ module FlatKit
16
15
  end
17
16
 
18
17
  def call
19
- ::FlatKit.logger.info "Sorting #{reader.source} into #{writer.destination} using key #{compare_fields.join(", ")}"
20
- records = Array.new.tap do |a|
21
- reader.each do |r|
22
- a << r
23
- end
24
- end
18
+ ::FlatKit.logger.info "Sorting #{reader.source} into #{writer.destination} using key #{compare_fields.join(', ')}"
19
+ records = reader.map { |r| r }
25
20
  ::FlatKit.logger.info "Read #{reader.count} records into #{records.size} element array"
26
21
  records.sort!
27
22
  ::FlatKit.logger.info "Sorted #{records.size} records"
@@ -1,21 +1,22 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class StatType
3
-
4
- # Status object to keep track of the count and frequency of values
5
+ # Internal: Status object to keep track of the count and frequency of values.
5
6
  #
6
7
  class NominalStats < StatType
7
-
8
8
  attr_reader :count
9
9
 
10
10
  def self.default_stats
11
- @default_stats ||= %w[ count ]
11
+ @default_stats ||= %w[count]
12
12
  end
13
13
 
14
14
  def self.all_stats
15
- @all_stats ||= %w[ count unique_count unique_values mode ]
15
+ @all_stats ||= %w[count unique_count unique_values mode]
16
16
  end
17
17
 
18
18
  def initialize(collecting_frequencies: false)
19
+ super()
19
20
  @mutex = Mutex.new
20
21
  @count = 0
21
22
  @collecting_frequencies = collecting_frequencies
@@ -24,26 +25,31 @@ module FlatKit
24
25
 
25
26
  def collected_stats
26
27
  return self.class.default_stats unless @collecting_frequencies
27
- return self.class.all_stats
28
+
29
+ self.class.all_stats
28
30
  end
29
31
 
30
32
  def mode
31
33
  return nil unless @collecting_frequencies
32
- @frequencies.max_by{ |item, item_count| item_count }.first
34
+
35
+ @frequencies.max_by { |_item, item_count| item_count }.first
33
36
  end
34
37
 
35
38
  def unique_count
36
39
  return nil unless @collecting_frequencies
40
+
37
41
  @frequencies.size
38
42
  end
39
43
 
40
44
  def unique_values
41
45
  return nil unless @collecting_frequencies
46
+
42
47
  @frequencies.keys
43
48
  end
44
49
 
45
50
  def frequencies
46
51
  return nil unless @collecting_frequencies
52
+
47
53
  @frequencies
48
54
  end
49
55