flat_kit 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +9 -0
  4. data/Manifest.txt +3 -42
  5. data/{bin → exe}/fk +2 -1
  6. data/flat_kit.gemspec +33 -0
  7. data/lib/flat_kit/cli.rb +46 -32
  8. data/lib/flat_kit/command/cat.rb +34 -32
  9. data/lib/flat_kit/command/merge.rb +37 -36
  10. data/lib/flat_kit/command/sort.rb +37 -37
  11. data/lib/flat_kit/command/stats.rb +41 -39
  12. data/lib/flat_kit/command.rb +10 -11
  13. data/lib/flat_kit/descendant_tracker.rb +9 -6
  14. data/lib/flat_kit/error.rb +4 -0
  15. data/lib/flat_kit/event_emitter.rb +5 -2
  16. data/lib/flat_kit/field_stats.rb +31 -26
  17. data/lib/flat_kit/field_type/boolean_type.rb +9 -5
  18. data/lib/flat_kit/field_type/date_type.rb +19 -17
  19. data/lib/flat_kit/field_type/float_type.rb +15 -9
  20. data/lib/flat_kit/field_type/guess_type.rb +9 -6
  21. data/lib/flat_kit/field_type/integer_type.rb +6 -4
  22. data/lib/flat_kit/field_type/null_type.rb +5 -1
  23. data/lib/flat_kit/field_type/string_type.rb +8 -6
  24. data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
  25. data/lib/flat_kit/field_type/unknown_type.rb +12 -8
  26. data/lib/flat_kit/field_type.rb +52 -44
  27. data/lib/flat_kit/format.rb +11 -5
  28. data/lib/flat_kit/input/file.rb +11 -9
  29. data/lib/flat_kit/input/io.rb +18 -21
  30. data/lib/flat_kit/input.rb +8 -7
  31. data/lib/flat_kit/internal_node.rb +22 -19
  32. data/lib/flat_kit/jsonl/format.rb +6 -2
  33. data/lib/flat_kit/jsonl/reader.rb +7 -4
  34. data/lib/flat_kit/jsonl/record.rb +15 -18
  35. data/lib/flat_kit/jsonl/writer.rb +8 -10
  36. data/lib/flat_kit/jsonl.rb +8 -4
  37. data/lib/flat_kit/leaf_node.rb +6 -5
  38. data/lib/flat_kit/log_formatter.rb +20 -0
  39. data/lib/flat_kit/logger.rb +12 -19
  40. data/lib/flat_kit/merge.rb +21 -18
  41. data/lib/flat_kit/merge_tree.rb +5 -6
  42. data/lib/flat_kit/output/file.rb +13 -9
  43. data/lib/flat_kit/output/io.rb +40 -35
  44. data/lib/flat_kit/output.rb +8 -7
  45. data/lib/flat_kit/position.rb +3 -4
  46. data/lib/flat_kit/reader.rb +8 -8
  47. data/lib/flat_kit/record.rb +12 -12
  48. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  49. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  50. data/lib/flat_kit/sort.rb +8 -9
  51. data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
  52. data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
  53. data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
  54. data/lib/flat_kit/stat_type.rb +18 -13
  55. data/lib/flat_kit/stats.rb +12 -14
  56. data/lib/flat_kit/writer.rb +5 -6
  57. data/lib/flat_kit/xsv/format.rb +6 -2
  58. data/lib/flat_kit/xsv/reader.rb +8 -6
  59. data/lib/flat_kit/xsv/record.rb +21 -15
  60. data/lib/flat_kit/xsv/writer.rb +13 -10
  61. data/lib/flat_kit/xsv.rb +7 -4
  62. data/lib/flat_kit.rb +31 -26
  63. metadata +20 -158
  64. data/Rakefile +0 -21
  65. data/examples/stream-active-record-to-csv.rb +0 -42
  66. data/tasks/default.rake +0 -242
  67. data/tasks/extension.rake +0 -38
  68. data/tasks/man.rake +0 -7
  69. data/tasks/this.rb +0 -208
  70. data/test/device_dataset.rb +0 -117
  71. data/test/field_type/test_boolean_type.rb +0 -65
  72. data/test/field_type/test_date_type.rb +0 -71
  73. data/test/field_type/test_float_type.rb +0 -56
  74. data/test/field_type/test_guess_type.rb +0 -14
  75. data/test/field_type/test_integer_type.rb +0 -52
  76. data/test/field_type/test_null_type.rb +0 -41
  77. data/test/field_type/test_string_type.rb +0 -18
  78. data/test/field_type/test_timestamp_type.rb +0 -108
  79. data/test/field_type/test_unknown_type.rb +0 -35
  80. data/test/input/test_file.rb +0 -73
  81. data/test/input/test_io.rb +0 -93
  82. data/test/jsonl/test_format.rb +0 -22
  83. data/test/jsonl/test_reader.rb +0 -49
  84. data/test/jsonl/test_record.rb +0 -61
  85. data/test/jsonl/test_writer.rb +0 -86
  86. data/test/output/test_file.rb +0 -60
  87. data/test/output/test_io.rb +0 -104
  88. data/test/run +0 -23
  89. data/test/stat_type/test_nominal_stats.rb +0 -69
  90. data/test/stat_type/test_numerical_stats.rb +0 -118
  91. data/test/stat_type/test_ordinal_stats.rb +0 -92
  92. data/test/test_conversions.rb +0 -45
  93. data/test/test_event_emitter.rb +0 -89
  94. data/test/test_field_stats.rb +0 -134
  95. data/test/test_field_type.rb +0 -34
  96. data/test/test_format.rb +0 -24
  97. data/test/test_helper.rb +0 -26
  98. data/test/test_merge.rb +0 -40
  99. data/test/test_merge_tree.rb +0 -64
  100. data/test/test_version.rb +0 -11
  101. data/test/xsv/test_format.rb +0 -22
  102. data/test/xsv/test_reader.rb +0 -61
  103. data/test/xsv/test_record.rb +0 -69
  104. data/test/xsv/test_writer.rb +0 -89
@@ -1,19 +1,12 @@
1
- require 'logger'
1
+ # frozen_string_literal: true
2
2
 
3
- module FlatKit
4
- class LogFormatter < ::Logger::Formatter
5
- FORMAT = "%s %5d %05s : %s\n".freeze
6
- DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ".freeze
7
- def initialize
8
- super
9
- self.datetime_format = DATETIME_FORMAT
10
- end
11
-
12
- def call(severity, time, progname, msg)
13
- FORMAT % [format_datetime(time.utc), Process.pid, severity, msg2str(msg)]
14
- end
15
- end
3
+ require "logger"
16
4
 
5
+ # Public: Top level namespace for the gem
6
+ #
7
+ module FlatKit
8
+ # Internal: Logger class
9
+ #
17
10
  class Logger
18
11
  def self.for_io(io)
19
12
  ::Logger.new(io, formatter: LogFormatter.new)
@@ -26,11 +19,11 @@ module FlatKit
26
19
  end
27
20
 
28
21
  def self.log_to(destination = $stderr)
29
- if destination.kind_of?(::IO) then
30
- @logger = ::FlatKit::Logger.for_io(destination)
31
- else
32
- @logger = ::FlatKit::Logger.for_path(destination)
33
- end
22
+ @logger = if destination.is_a?(::IO)
23
+ ::FlatKit::Logger.for_io(destination)
24
+ else
25
+ ::FlatKit::Logger.for_path(destination)
26
+ end
34
27
  end
35
28
 
36
29
  def self.logger
@@ -1,15 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Class implementing merging from N inputs and output to 1 output.
5
+ #
2
6
  class Merge
3
-
4
7
  include ::FlatKit::EventEmitter
5
8
 
6
- attr_reader :readers
7
- attr_reader :writer
8
- attr_reader :compare_fields
9
+ attr_reader :readers, :writer, :compare_fields
9
10
 
10
- def initialize(inputs:, input_fallback: "auto",
11
- output:, output_fallback: "auto",
12
- compare_fields:)
11
+ def initialize(inputs:, output:, compare_fields:, input_fallback: "auto", output_fallback: "auto")
13
12
  @compare_fields = compare_fields
14
13
  @readers = ::FlatKit::Reader.create_readers_from_paths(paths: inputs, compare_fields: @compare_fields,
15
14
  fallback: input_fallback)
@@ -19,21 +18,12 @@ module FlatKit
19
18
 
20
19
  def call
21
20
  ::FlatKit.logger.debug "Merging the following files into #{writer.destination}"
22
- ::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(", ")}"
21
+ ::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(', ')}"
23
22
  readers.each do |r|
24
23
  ::FlatKit.logger.debug " #{r.source}"
25
24
  end
26
25
 
27
- merge_tree = ::FlatKit::MergeTree.new(readers)
28
-
29
- notify_listeners(name: :start, data: :start)
30
- merge_tree.each do |record|
31
-
32
- position = writer.write(record)
33
- meta = { position: position }
34
- notify_listeners(name: :record, data: record, meta: meta)
35
- end
36
- notify_listeners(name: :stop, data: :stop)
26
+ run_merge(readers)
37
27
 
38
28
  readers.each do |r|
39
29
  ::FlatKit.logger.debug " #{r.source} produced #{r.count} records"
@@ -42,5 +32,18 @@ module FlatKit
42
32
  writer.close
43
33
  ::FlatKit.logger.debug "Wrote #{writer.count} records to #{writer.destination}"
44
34
  end
35
+
36
+ private
37
+
38
+ def run_merge(readers)
39
+ tree = ::FlatKit::MergeTree.new(readers)
40
+ notify_listeners(name: :start, data: :start)
41
+ tree.each do |record|
42
+ position = writer.write(record)
43
+ meta = { position: position }
44
+ notify_listeners(name: :record, data: record, meta: meta)
45
+ end
46
+ notify_listeners(name: :stop, data: :stop)
47
+ end
45
48
  end
46
49
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: Merge a list of sorted records from Readers into a single output Writer
3
5
  #
@@ -29,9 +31,7 @@ module FlatKit
29
31
  class MergeTree
30
32
  include Enumerable
31
33
 
32
- attr_reader :leaves
33
- attr_reader :levels
34
- attr_reader :readers
34
+ attr_reader :leaves, :levels, :readers
35
35
 
36
36
  def initialize(readers)
37
37
  @readers = readers
@@ -44,9 +44,7 @@ module FlatKit
44
44
 
45
45
  # Need to pad the leaves to an even number so that the slicing by 2 for
46
46
  # the tournament will work
47
- if @leaves.size.odd? then
48
- @leaves << SentinelLeafNode.new
49
- end
47
+ @leaves << SentinelLeafNode.new if @leaves.size.odd?
50
48
 
51
49
  init_tree
52
50
  end
@@ -94,6 +92,7 @@ module FlatKit
94
92
  def each
95
93
  loop do
96
94
  break if root.leaf.finished?
95
+
97
96
  yield root.value
98
97
  # consume the yielded value and have the tournament tree replay those
99
98
  # brackets affected
@@ -1,22 +1,31 @@
1
- require 'zlib'
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+ require "pathname"
2
5
 
3
6
  module FlatKit
4
7
  class Output
8
+ # Internal: File output implementation
9
+ #
5
10
  class File < Output
6
11
  attr_reader :path
7
12
 
13
+ # internal api method for testing purposes
14
+ attr_reader :io
15
+
8
16
  def self.handles?(obj)
9
17
  return true if obj.instance_of?(Pathname)
10
18
  return false unless obj.instance_of?(String)
11
19
 
12
20
  # incase these get loaded in different orders
13
- return false if ::FlatKit::Output::IO.is_stdout?(obj)
14
- return false if ::FlatKit::Output::IO.is_stderr?(obj)
21
+ return false if ::FlatKit::Output::IO.stdout?(obj)
22
+ return false if ::FlatKit::Output::IO.stderr?(obj)
15
23
 
16
- return true
24
+ true
17
25
  end
18
26
 
19
27
  def initialize(obj)
28
+ super()
20
29
  @path = Pathname.new(obj)
21
30
  path.dirname.mkpath
22
31
  @io = open_output(path)
@@ -30,11 +39,6 @@ module FlatKit
30
39
  @io.close
31
40
  end
32
41
 
33
- # internal api method for testing purposes
34
- def io
35
- @io
36
- end
37
-
38
42
  private
39
43
 
40
44
  # open the opropriate otuput type depending on the destination file name
@@ -1,73 +1,78 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class Output
5
+ # Internal: Non-file Output impelementation - this is genrally to stdout or stderr
6
+ #
3
7
  class IO < Output
4
- attr_reader :count
8
+ attr_reader :count, :name
9
+
10
+ # internal api method for testing
11
+ attr_reader :io
5
12
 
6
- STDOUTS = %w[ stdout STDOUT - <stdout> ]
7
- STDERRS = %w[ stderr STDERR <stderr> ]
13
+ STDOUTS = %w[stdout STDOUT - <stdout>].freeze
14
+ STDERRS = %w[stderr STDERR <stderr>].freeze
8
15
 
9
16
  def self.handles?(obj)
10
- return true if is_stderr?(obj)
11
- return true if is_stdout?(obj)
12
- return true if [ ::File, ::StringIO, ::IO ].any? { |klass| obj.kind_of?(klass) }
13
- return false
17
+ return true if stderr?(obj)
18
+ return true if stdout?(obj)
19
+ return true if [::File, ::StringIO, ::IO].any? { |klass| obj.is_a?(klass) }
20
+
21
+ false
14
22
  end
15
23
 
16
- def self.is_stderr?(obj)
24
+ def self.stderr?(obj)
17
25
  case obj
18
26
  when String
19
27
  return true if STDERRS.include?(obj)
20
28
  when ::IO
21
- return true if obj == ::STDERR
29
+ return true if obj == $stderr
22
30
  end
23
- return false
31
+ false
24
32
  end
25
33
 
26
- def self.is_stdout?(obj)
34
+ def self.stdout?(obj)
27
35
  case obj
28
36
  when String
29
37
  return true if STDOUTS.include?(obj)
30
38
  when ::IO
31
- return true if obj == ::STDOUT
39
+ return true if obj == $stdout
32
40
  end
33
- return false
41
+ false
34
42
  end
35
43
 
36
44
  def initialize(obj)
45
+ super()
37
46
  @count = 0
38
- if self.class.is_stdout?(obj) then
47
+ @name = nil
48
+ @io = nil
49
+ init_name_and_io(obj)
50
+ end
51
+
52
+ # this goes to an io stream and we are not in charge of opening it
53
+ def close
54
+ @io.close
55
+ end
56
+
57
+ private
58
+
59
+ def init_name_and_io(obj)
60
+ if self.class.stdout?(obj)
39
61
  @name = "<STDOUT>"
40
62
  @io = $stdout
41
- elsif self.class.is_stderr?(obj) then
63
+ elsif self.class.stderr?(obj)
42
64
  @name = "<STDERR>"
43
65
  @io = $stderr
44
- elsif obj.kind_of?(::File) then
45
- @name = obj.path
66
+ elsif obj.is_a?(::IO)
67
+ @name = (obj.respond_to?(:path) && obj.path) || obj.inspect
46
68
  @io = obj
47
- elsif obj.kind_of?(::StringIO) then
48
- @name = obj.inspect
49
- @io = obj
50
- elsif obj.kind_of?(::IO) then
69
+ elsif obj.is_a?(::StringIO)
51
70
  @name = obj.inspect
52
71
  @io = obj
53
72
  else
54
73
  raise ::FlatKit::Error, "Unable to create #{self.class} from #{obj.class} : #{obj.inspect}"
55
74
  end
56
75
  end
57
-
58
- def name
59
- @name
60
- end
61
-
62
- # this goes to an io stream and we are not in charge of opening it
63
- def close
64
- @io.close
65
- end
66
-
67
- # internal api method for testing
68
- def io
69
- @io
70
- end
71
76
  end
72
77
  end
73
78
  end
@@ -1,14 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Base clases for all output handlers
5
+ #
2
6
  class Output
3
7
  extend DescendantTracker
4
8
 
5
9
  def self.from(out)
6
- return out if out.kind_of?(::FlatKit::Output)
10
+ return out if out.is_a?(::FlatKit::Output)
7
11
 
8
12
  out_klass = find_child(:handles?, out)
9
- if out_klass then
10
- return out_klass.new(out)
11
- end
13
+ return out_klass.new(out) if out_klass
12
14
 
13
15
  raise FlatKit::Error, "Unable to create output from #{out.class} : #{out.inspect}"
14
16
  end
@@ -17,7 +19,6 @@ module FlatKit
17
19
  raise NotImplementedError, "#{self.class} must implement #name"
18
20
  end
19
21
 
20
- #
21
22
  def io
22
23
  raise NotImplementedError, "#{self.class} must implement #io"
23
24
  end
@@ -32,5 +33,5 @@ module FlatKit
32
33
  end
33
34
  end
34
35
 
35
- require 'flat_kit/output/io'
36
- require 'flat_kit/output/file'
36
+ require "flat_kit/output/io"
37
+ require "flat_kit/output/file"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # The information about the position of a record in an IO stream
3
5
  #
@@ -5,10 +7,7 @@ module FlatKit
5
7
  # information about the record that was just written
6
8
  #
7
9
  class Position
8
-
9
- attr_reader :index # zero based
10
- attr_reader :offset # byte offset in the IO stream
11
- attr_reader :bytesize # byte length of the record
10
+ attr_reader :index, :offset, :bytesize # zero based # byte offset in the IO stream # byte length of the record
12
11
 
13
12
  def initialize(index: nil, offset: nil, bytesize: nil)
14
13
  @index = index
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: the base class for all format readers.
3
5
  #
@@ -14,24 +16,21 @@ module FlatKit
14
16
  # API:
15
17
  #
16
18
  # initialize(source:, compare_fields:)
17
- # each -> Yields / returns
19
+ # each -> Yields / returns
18
20
  #
19
21
  class Reader
20
22
  include Enumerable
21
23
 
22
- attr_reader :source
23
- attr_reader :compare_fields
24
+ attr_reader :source, :compare_fields
24
25
 
25
26
  def self.create_reader_from_path(path: "-", fallback: "auto", compare_fields: :none)
26
27
  format = ::FlatKit::Format.for_with_fallback!(path: path, fallback: fallback)
27
- return format.reader.new(source: path, compare_fields: compare_fields)
28
+ format.reader.new(source: path, compare_fields: compare_fields)
28
29
  end
29
30
 
30
31
  def self.create_readers_from_paths(paths:, fallback: "auto", compare_fields: :none)
31
32
  # default to stdin if there are no paths
32
- if paths.empty? then
33
- paths << "-"
34
- end
33
+ paths << "-" if paths.empty?
35
34
 
36
35
  paths.map do |path|
37
36
  create_reader_from_path(path: path, fallback: fallback, compare_fields: compare_fields)
@@ -55,7 +54,8 @@ module FlatKit
55
54
 
56
55
  def resolve_compare_fields(value)
57
56
  return [] if value == :none
58
- return value
57
+
58
+ value
59
59
  end
60
60
  end
61
61
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: The base class that all record classes should inherit from.
3
5
  #
@@ -35,11 +37,9 @@ module FlatKit
35
37
  # # the initialize method must call super(data:, compare_fields:) to
36
38
  # initializa the root data structures
37
39
  class Record
38
-
39
40
  include Comparable
40
41
 
41
- attr_reader :data
42
- attr_reader :compare_fields
42
+ attr_reader :data, :compare_fields
43
43
 
44
44
  def initialize(data:, compare_fields:)
45
45
  @data = data
@@ -57,15 +57,15 @@ module FlatKit
57
57
  my_val = self[field]
58
58
  other_val = other[field]
59
59
 
60
- if my_val.nil? && other_val.nil? then
61
- compare_result = 0
62
- elsif my_val.nil?
63
- compare_result = -1
64
- elsif other_val.nil?
65
- compare_result = 1
66
- else
67
- compare_result = my_val.<=>(other_val)
68
- end
60
+ compare_result = if my_val.nil? && other_val.nil?
61
+ 0
62
+ elsif my_val.nil?
63
+ -1
64
+ elsif other_val.nil?
65
+ 1
66
+ else
67
+ my_val <=> (other_val)
68
+ end
69
69
 
70
70
  return compare_result unless compare_result.zero?
71
71
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: The Sentinel Internal Node is a private class used by the MergeTree
3
5
  # class.
@@ -8,12 +10,10 @@ module FlatKit
8
10
  class SentinelInternalNode
9
11
  include Comparable
10
12
 
11
- attr_reader :left
12
- attr_reader :right
13
- attr_reader :winner
13
+ attr_reader :left, :right, :winner
14
14
  attr_accessor :next_level
15
15
 
16
- def initialize(left: nil, right: nil)
16
+ def initialize(*)
17
17
  @left = nil
18
18
  @right = nil
19
19
  @winner = nil
@@ -31,7 +31,8 @@ module FlatKit
31
31
  # A sentinal node is always greater than any other node
32
32
  def <=>(other)
33
33
  return 0 if other.sentinel?
34
- return 1
34
+
35
+ 1
35
36
  end
36
37
  end
37
38
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: The Sentinel Leaf Node is used internally by the MergeTree
3
5
  #
@@ -31,7 +33,8 @@ module FlatKit
31
33
  # A sentinal node is always greater than any other node
32
34
  def <=>(other)
33
35
  return 0 if other.sentinel?
34
- return 1
36
+
37
+ 1
35
38
  end
36
39
  end
37
40
  end
data/lib/flat_kit/sort.rb CHANGED
@@ -1,13 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Sorts an Input and sends the sorted records to an Output
5
+ #
2
6
  class Sort
3
- attr_reader :reader
4
- attr_reader :writer
5
- attr_reader :compare_fields
6
-
7
- def initialize(input:, input_fallback: "auto",
8
- output:, output_fallback: "auto",
9
- compare_fields:)
7
+ attr_reader :reader, :writer, :compare_fields
10
8
 
9
+ def initialize(input:, output:, compare_fields:, input_fallback: "auto", output_fallback: "auto")
11
10
  @compare_fields = compare_fields
12
11
  @reader = ::FlatKit::Reader.create_reader_from_path(path: input, compare_fields: @compare_fields,
13
12
  fallback: input_fallback)
@@ -16,8 +15,8 @@ module FlatKit
16
15
  end
17
16
 
18
17
  def call
19
- ::FlatKit.logger.info "Sorting #{reader.source} into #{writer.destination} using key #{compare_fields.join(", ")}"
20
- records = Array.new.tap do |a|
18
+ ::FlatKit.logger.info "Sorting #{reader.source} into #{writer.destination} using key #{compare_fields.join(', ')}"
19
+ records = [].tap do |a|
21
20
  reader.each do |r|
22
21
  a << r
23
22
  end
@@ -1,21 +1,22 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class StatType
3
-
4
- # Status object to keep track of the count and frequency of values
5
+ # Internal: Status object to keep track of the count and frequency of values.
5
6
  #
6
7
  class NominalStats < StatType
7
-
8
8
  attr_reader :count
9
9
 
10
10
  def self.default_stats
11
- @default_stats ||= %w[ count ]
11
+ @default_stats ||= %w[count]
12
12
  end
13
13
 
14
14
  def self.all_stats
15
- @all_stats ||= %w[ count unique_count unique_values mode ]
15
+ @all_stats ||= %w[count unique_count unique_values mode]
16
16
  end
17
17
 
18
18
  def initialize(collecting_frequencies: false)
19
+ super()
19
20
  @mutex = Mutex.new
20
21
  @count = 0
21
22
  @collecting_frequencies = collecting_frequencies
@@ -24,26 +25,31 @@ module FlatKit
24
25
 
25
26
  def collected_stats
26
27
  return self.class.default_stats unless @collecting_frequencies
27
- return self.class.all_stats
28
+
29
+ self.class.all_stats
28
30
  end
29
31
 
30
32
  def mode
31
33
  return nil unless @collecting_frequencies
32
- @frequencies.max_by{ |item, item_count| item_count }.first
34
+
35
+ @frequencies.max_by { |_item, item_count| item_count }.first
33
36
  end
34
37
 
35
38
  def unique_count
36
39
  return nil unless @collecting_frequencies
40
+
37
41
  @frequencies.size
38
42
  end
39
43
 
40
44
  def unique_values
41
45
  return nil unless @collecting_frequencies
46
+
42
47
  @frequencies.keys
43
48
  end
44
49
 
45
50
  def frequencies
46
51
  return nil unless @collecting_frequencies
52
+
47
53
  @frequencies
48
54
  end
49
55
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #--
2
4
  # Copyright (c) 2008, 2009 Jeremy Hinegardner
3
5
  # All rights reserved. See LICENSE and/or COPYING for details.
@@ -5,16 +7,14 @@
5
7
  # Pulled from Hitimes, which I also wrote
6
8
  #++
7
9
 
8
- require 'thread'
9
- require 'oj'
10
+ require "oj"
10
11
 
11
12
  module FlatKit
12
13
  class StatType
13
- #
14
- # Stats object will keep track of the _min_, _max_, _count_, _sum_ and _sumsq_
14
+ # Internal: Stats object to keep track of the _min_, _max_, _count_, _sum_ and _sumsq_
15
15
  # and when you want you may also retrieve the _mean_, _stddev_ and _rate_.
16
16
  #
17
- # this contrived example shows getting a list of all the files in a directory
17
+ # This contrived example shows getting a list of all the files in a directory
18
18
  # and running stats on file sizes.
19
19
  #
20
20
  # s = FlatKit::Stats.new
@@ -33,17 +33,14 @@ module FlatKit
33
33
  class NumericalStats < NominalStats
34
34
  # A list of the available stats
35
35
 
36
- attr_reader :min
37
- attr_reader :max
38
- attr_reader :sum
39
- attr_reader :sumsq
36
+ attr_reader :min, :max, :sum, :sumsq
40
37
 
41
38
  def self.default_stats
42
- @default_stats ||= %w[ count max mean min rate stddev sum sumsq ]
39
+ @default_stats ||= %w[count max mean min rate stddev sum sumsq]
43
40
  end
44
41
 
45
42
  def self.all_stats
46
- @all_stats ||= %w[ count max mean min mode rate stddev sum sumsq unique_count unique_values ]
43
+ @all_stats ||= %w[count max mean min mode rate stddev sum sumsq unique_count unique_values]
47
44
  end
48
45
 
49
46
  def initialize(collecting_frequencies: false)
@@ -61,8 +58,8 @@ module FlatKit
61
58
  # Return the input value.
62
59
  def update(value)
63
60
  @mutex.synchronize do
64
- @min = (value < @min) ? value : @min
65
- @max = (value > @max) ? value : @max
61
+ @min = [value, @min].min
62
+ @max = [value, @max].max
66
63
 
67
64
  @count += 1
68
65
  @sum += value
@@ -72,17 +69,18 @@ module FlatKit
72
69
  @frequencies[value] += 1 if @collecting_frequencies
73
70
  end
74
71
 
75
- return value
72
+ value
76
73
  end
77
74
 
78
75
  # call-seq:
79
76
  # stat.mean -> Float
80
- #
77
+ #
81
78
  # Return the arithmetic mean of the values put into the Stats object. If no
82
79
  # values have passed through the stats object then 0.0 is returned;
83
80
  def mean
84
81
  return 0.0 if @count.zero?
85
- return @sum / @count
82
+
83
+ @sum / @count
86
84
  end
87
85
 
88
86
  # call-seq:
@@ -100,7 +98,8 @@ module FlatKit
100
98
  #
101
99
  def rate
102
100
  return 0.0 if @sum.zero?
103
- return @count / @sum
101
+
102
+ @count / @sum
104
103
  end
105
104
 
106
105
  #
@@ -113,7 +112,8 @@ module FlatKit
113
112
  #
114
113
  def stddev
115
114
  return 0.0 unless @count > 1
116
- Math.sqrt((@sumsq - ((@sum * @sum)/@count)) / (@count - 1))
115
+
116
+ Math.sqrt((@sumsq - ((@sum * @sum) / @count)) / (@count - 1))
117
117
  end
118
118
  end
119
119
  end