flat_kit 0.3.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +1 -2
  3. data/HISTORY.md +9 -0
  4. data/Manifest.txt +3 -42
  5. data/{bin → exe}/fk +2 -1
  6. data/flat_kit.gemspec +33 -0
  7. data/lib/flat_kit/cli.rb +46 -32
  8. data/lib/flat_kit/command/cat.rb +34 -32
  9. data/lib/flat_kit/command/merge.rb +37 -36
  10. data/lib/flat_kit/command/sort.rb +37 -37
  11. data/lib/flat_kit/command/stats.rb +41 -39
  12. data/lib/flat_kit/command.rb +10 -11
  13. data/lib/flat_kit/descendant_tracker.rb +9 -6
  14. data/lib/flat_kit/error.rb +4 -0
  15. data/lib/flat_kit/event_emitter.rb +5 -2
  16. data/lib/flat_kit/field_stats.rb +31 -26
  17. data/lib/flat_kit/field_type/boolean_type.rb +9 -5
  18. data/lib/flat_kit/field_type/date_type.rb +19 -17
  19. data/lib/flat_kit/field_type/float_type.rb +15 -9
  20. data/lib/flat_kit/field_type/guess_type.rb +9 -6
  21. data/lib/flat_kit/field_type/integer_type.rb +6 -4
  22. data/lib/flat_kit/field_type/null_type.rb +5 -1
  23. data/lib/flat_kit/field_type/string_type.rb +8 -6
  24. data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
  25. data/lib/flat_kit/field_type/unknown_type.rb +12 -8
  26. data/lib/flat_kit/field_type.rb +52 -44
  27. data/lib/flat_kit/format.rb +11 -5
  28. data/lib/flat_kit/input/file.rb +11 -9
  29. data/lib/flat_kit/input/io.rb +18 -21
  30. data/lib/flat_kit/input.rb +8 -7
  31. data/lib/flat_kit/internal_node.rb +22 -19
  32. data/lib/flat_kit/jsonl/format.rb +6 -2
  33. data/lib/flat_kit/jsonl/reader.rb +7 -4
  34. data/lib/flat_kit/jsonl/record.rb +15 -18
  35. data/lib/flat_kit/jsonl/writer.rb +8 -10
  36. data/lib/flat_kit/jsonl.rb +8 -4
  37. data/lib/flat_kit/leaf_node.rb +6 -5
  38. data/lib/flat_kit/log_formatter.rb +20 -0
  39. data/lib/flat_kit/logger.rb +12 -19
  40. data/lib/flat_kit/merge.rb +21 -18
  41. data/lib/flat_kit/merge_tree.rb +5 -6
  42. data/lib/flat_kit/output/file.rb +13 -9
  43. data/lib/flat_kit/output/io.rb +40 -35
  44. data/lib/flat_kit/output.rb +8 -7
  45. data/lib/flat_kit/position.rb +3 -4
  46. data/lib/flat_kit/reader.rb +8 -8
  47. data/lib/flat_kit/record.rb +12 -12
  48. data/lib/flat_kit/sentinel_internal_node.rb +6 -5
  49. data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
  50. data/lib/flat_kit/sort.rb +8 -9
  51. data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
  52. data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
  53. data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
  54. data/lib/flat_kit/stat_type.rb +18 -13
  55. data/lib/flat_kit/stats.rb +12 -14
  56. data/lib/flat_kit/writer.rb +5 -6
  57. data/lib/flat_kit/xsv/format.rb +6 -2
  58. data/lib/flat_kit/xsv/reader.rb +8 -6
  59. data/lib/flat_kit/xsv/record.rb +21 -15
  60. data/lib/flat_kit/xsv/writer.rb +13 -10
  61. data/lib/flat_kit/xsv.rb +7 -4
  62. data/lib/flat_kit.rb +31 -26
  63. metadata +20 -158
  64. data/Rakefile +0 -21
  65. data/examples/stream-active-record-to-csv.rb +0 -42
  66. data/tasks/default.rake +0 -242
  67. data/tasks/extension.rake +0 -38
  68. data/tasks/man.rake +0 -7
  69. data/tasks/this.rb +0 -208
  70. data/test/device_dataset.rb +0 -117
  71. data/test/field_type/test_boolean_type.rb +0 -65
  72. data/test/field_type/test_date_type.rb +0 -71
  73. data/test/field_type/test_float_type.rb +0 -56
  74. data/test/field_type/test_guess_type.rb +0 -14
  75. data/test/field_type/test_integer_type.rb +0 -52
  76. data/test/field_type/test_null_type.rb +0 -41
  77. data/test/field_type/test_string_type.rb +0 -18
  78. data/test/field_type/test_timestamp_type.rb +0 -108
  79. data/test/field_type/test_unknown_type.rb +0 -35
  80. data/test/input/test_file.rb +0 -73
  81. data/test/input/test_io.rb +0 -93
  82. data/test/jsonl/test_format.rb +0 -22
  83. data/test/jsonl/test_reader.rb +0 -49
  84. data/test/jsonl/test_record.rb +0 -61
  85. data/test/jsonl/test_writer.rb +0 -86
  86. data/test/output/test_file.rb +0 -60
  87. data/test/output/test_io.rb +0 -104
  88. data/test/run +0 -23
  89. data/test/stat_type/test_nominal_stats.rb +0 -69
  90. data/test/stat_type/test_numerical_stats.rb +0 -118
  91. data/test/stat_type/test_ordinal_stats.rb +0 -92
  92. data/test/test_conversions.rb +0 -45
  93. data/test/test_event_emitter.rb +0 -89
  94. data/test/test_field_stats.rb +0 -134
  95. data/test/test_field_type.rb +0 -34
  96. data/test/test_format.rb +0 -24
  97. data/test/test_helper.rb +0 -26
  98. data/test/test_merge.rb +0 -40
  99. data/test/test_merge_tree.rb +0 -64
  100. data/test/test_version.rb +0 -11
  101. data/test/xsv/test_format.rb +0 -22
  102. data/test/xsv/test_reader.rb +0 -61
  103. data/test/xsv/test_record.rb +0 -69
  104. data/test/xsv/test_writer.rb +0 -89
@@ -1,19 +1,12 @@
1
- require 'logger'
1
+ # frozen_string_literal: true
2
2
 
3
- module FlatKit
4
- class LogFormatter < ::Logger::Formatter
5
- FORMAT = "%s %5d %05s : %s\n".freeze
6
- DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ".freeze
7
- def initialize
8
- super
9
- self.datetime_format = DATETIME_FORMAT
10
- end
11
-
12
- def call(severity, time, progname, msg)
13
- FORMAT % [format_datetime(time.utc), Process.pid, severity, msg2str(msg)]
14
- end
15
- end
3
+ require "logger"
16
4
 
5
+ # Public: Top level namespace for the gem
6
+ #
7
+ module FlatKit
8
+ # Internal: Logger class
9
+ #
17
10
  class Logger
18
11
  def self.for_io(io)
19
12
  ::Logger.new(io, formatter: LogFormatter.new)
@@ -26,11 +19,11 @@ module FlatKit
26
19
  end
27
20
 
28
21
  def self.log_to(destination = $stderr)
29
- if destination.kind_of?(::IO) then
30
- @logger = ::FlatKit::Logger.for_io(destination)
31
- else
32
- @logger = ::FlatKit::Logger.for_path(destination)
33
- end
22
+ @logger = if destination.is_a?(::IO)
23
+ ::FlatKit::Logger.for_io(destination)
24
+ else
25
+ ::FlatKit::Logger.for_path(destination)
26
+ end
34
27
  end
35
28
 
36
29
  def self.logger
@@ -1,15 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Class implementing merging from N inputs and output to 1 output.
5
+ #
2
6
  class Merge
3
-
4
7
  include ::FlatKit::EventEmitter
5
8
 
6
- attr_reader :readers
7
- attr_reader :writer
8
- attr_reader :compare_fields
9
+ attr_reader :readers, :writer, :compare_fields
9
10
 
10
- def initialize(inputs:, input_fallback: "auto",
11
- output:, output_fallback: "auto",
12
- compare_fields:)
11
+ def initialize(inputs:, output:, compare_fields:, input_fallback: "auto", output_fallback: "auto")
13
12
  @compare_fields = compare_fields
14
13
  @readers = ::FlatKit::Reader.create_readers_from_paths(paths: inputs, compare_fields: @compare_fields,
15
14
  fallback: input_fallback)
@@ -19,21 +18,12 @@ module FlatKit
19
18
 
20
19
  def call
21
20
  ::FlatKit.logger.debug "Merging the following files into #{writer.destination}"
22
- ::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(", ")}"
21
+ ::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(', ')}"
23
22
  readers.each do |r|
24
23
  ::FlatKit.logger.debug " #{r.source}"
25
24
  end
26
25
 
27
- merge_tree = ::FlatKit::MergeTree.new(readers)
28
-
29
- notify_listeners(name: :start, data: :start)
30
- merge_tree.each do |record|
31
-
32
- position = writer.write(record)
33
- meta = { position: position }
34
- notify_listeners(name: :record, data: record, meta: meta)
35
- end
36
- notify_listeners(name: :stop, data: :stop)
26
+ run_merge(readers)
37
27
 
38
28
  readers.each do |r|
39
29
  ::FlatKit.logger.debug " #{r.source} produced #{r.count} records"
@@ -42,5 +32,18 @@ module FlatKit
42
32
  writer.close
43
33
  ::FlatKit.logger.debug "Wrote #{writer.count} records to #{writer.destination}"
44
34
  end
35
+
36
+ private
37
+
38
+ def run_merge(readers)
39
+ tree = ::FlatKit::MergeTree.new(readers)
40
+ notify_listeners(name: :start, data: :start)
41
+ tree.each do |record|
42
+ position = writer.write(record)
43
+ meta = { position: position }
44
+ notify_listeners(name: :record, data: record, meta: meta)
45
+ end
46
+ notify_listeners(name: :stop, data: :stop)
47
+ end
45
48
  end
46
49
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: Merge a list of sorted records from Readers into a single output Writer
3
5
  #
@@ -29,9 +31,7 @@ module FlatKit
29
31
  class MergeTree
30
32
  include Enumerable
31
33
 
32
- attr_reader :leaves
33
- attr_reader :levels
34
- attr_reader :readers
34
+ attr_reader :leaves, :levels, :readers
35
35
 
36
36
  def initialize(readers)
37
37
  @readers = readers
@@ -44,9 +44,7 @@ module FlatKit
44
44
 
45
45
  # Need to pad the leaves to an even number so that the slicing by 2 for
46
46
  # the tournament will work
47
- if @leaves.size.odd? then
48
- @leaves << SentinelLeafNode.new
49
- end
47
+ @leaves << SentinelLeafNode.new if @leaves.size.odd?
50
48
 
51
49
  init_tree
52
50
  end
@@ -94,6 +92,7 @@ module FlatKit
94
92
  def each
95
93
  loop do
96
94
  break if root.leaf.finished?
95
+
97
96
  yield root.value
98
97
  # consume the yielded value and have the tournament tree replay those
99
98
  # brackets affected
@@ -1,22 +1,31 @@
1
- require 'zlib'
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+ require "pathname"
2
5
 
3
6
  module FlatKit
4
7
  class Output
8
+ # Internal: File output implementation
9
+ #
5
10
  class File < Output
6
11
  attr_reader :path
7
12
 
13
+ # internal api method for testing purposes
14
+ attr_reader :io
15
+
8
16
  def self.handles?(obj)
9
17
  return true if obj.instance_of?(Pathname)
10
18
  return false unless obj.instance_of?(String)
11
19
 
12
20
  # incase these get loaded in different orders
13
- return false if ::FlatKit::Output::IO.is_stdout?(obj)
14
- return false if ::FlatKit::Output::IO.is_stderr?(obj)
21
+ return false if ::FlatKit::Output::IO.stdout?(obj)
22
+ return false if ::FlatKit::Output::IO.stderr?(obj)
15
23
 
16
- return true
24
+ true
17
25
  end
18
26
 
19
27
  def initialize(obj)
28
+ super()
20
29
  @path = Pathname.new(obj)
21
30
  path.dirname.mkpath
22
31
  @io = open_output(path)
@@ -30,11 +39,6 @@ module FlatKit
30
39
  @io.close
31
40
  end
32
41
 
33
- # internal api method for testing purposes
34
- def io
35
- @io
36
- end
37
-
38
42
  private
39
43
 
40
44
  # open the opropriate otuput type depending on the destination file name
@@ -1,73 +1,78 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class Output
5
+ # Internal: Non-file Output impelementation - this is genrally to stdout or stderr
6
+ #
3
7
  class IO < Output
4
- attr_reader :count
8
+ attr_reader :count, :name
9
+
10
+ # internal api method for testing
11
+ attr_reader :io
5
12
 
6
- STDOUTS = %w[ stdout STDOUT - <stdout> ]
7
- STDERRS = %w[ stderr STDERR <stderr> ]
13
+ STDOUTS = %w[stdout STDOUT - <stdout>].freeze
14
+ STDERRS = %w[stderr STDERR <stderr>].freeze
8
15
 
9
16
  def self.handles?(obj)
10
- return true if is_stderr?(obj)
11
- return true if is_stdout?(obj)
12
- return true if [ ::File, ::StringIO, ::IO ].any? { |klass| obj.kind_of?(klass) }
13
- return false
17
+ return true if stderr?(obj)
18
+ return true if stdout?(obj)
19
+ return true if [::File, ::StringIO, ::IO].any? { |klass| obj.is_a?(klass) }
20
+
21
+ false
14
22
  end
15
23
 
16
- def self.is_stderr?(obj)
24
+ def self.stderr?(obj)
17
25
  case obj
18
26
  when String
19
27
  return true if STDERRS.include?(obj)
20
28
  when ::IO
21
- return true if obj == ::STDERR
29
+ return true if obj == $stderr
22
30
  end
23
- return false
31
+ false
24
32
  end
25
33
 
26
- def self.is_stdout?(obj)
34
+ def self.stdout?(obj)
27
35
  case obj
28
36
  when String
29
37
  return true if STDOUTS.include?(obj)
30
38
  when ::IO
31
- return true if obj == ::STDOUT
39
+ return true if obj == $stdout
32
40
  end
33
- return false
41
+ false
34
42
  end
35
43
 
36
44
  def initialize(obj)
45
+ super()
37
46
  @count = 0
38
- if self.class.is_stdout?(obj) then
47
+ @name = nil
48
+ @io = nil
49
+ init_name_and_io(obj)
50
+ end
51
+
52
+ # this goes to an io stream and we are not in charge of opening it
53
+ def close
54
+ @io.close
55
+ end
56
+
57
+ private
58
+
59
+ def init_name_and_io(obj)
60
+ if self.class.stdout?(obj)
39
61
  @name = "<STDOUT>"
40
62
  @io = $stdout
41
- elsif self.class.is_stderr?(obj) then
63
+ elsif self.class.stderr?(obj)
42
64
  @name = "<STDERR>"
43
65
  @io = $stderr
44
- elsif obj.kind_of?(::File) then
45
- @name = obj.path
66
+ elsif obj.is_a?(::IO)
67
+ @name = (obj.respond_to?(:path) && obj.path) || obj.inspect
46
68
  @io = obj
47
- elsif obj.kind_of?(::StringIO) then
48
- @name = obj.inspect
49
- @io = obj
50
- elsif obj.kind_of?(::IO) then
69
+ elsif obj.is_a?(::StringIO)
51
70
  @name = obj.inspect
52
71
  @io = obj
53
72
  else
54
73
  raise ::FlatKit::Error, "Unable to create #{self.class} from #{obj.class} : #{obj.inspect}"
55
74
  end
56
75
  end
57
-
58
- def name
59
- @name
60
- end
61
-
62
- # this goes to an io stream and we are not in charge of opening it
63
- def close
64
- @io.close
65
- end
66
-
67
- # internal api method for testing
68
- def io
69
- @io
70
- end
71
76
  end
72
77
  end
73
78
  end
@@ -1,14 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Base clases for all output handlers
5
+ #
2
6
  class Output
3
7
  extend DescendantTracker
4
8
 
5
9
  def self.from(out)
6
- return out if out.kind_of?(::FlatKit::Output)
10
+ return out if out.is_a?(::FlatKit::Output)
7
11
 
8
12
  out_klass = find_child(:handles?, out)
9
- if out_klass then
10
- return out_klass.new(out)
11
- end
13
+ return out_klass.new(out) if out_klass
12
14
 
13
15
  raise FlatKit::Error, "Unable to create output from #{out.class} : #{out.inspect}"
14
16
  end
@@ -17,7 +19,6 @@ module FlatKit
17
19
  raise NotImplementedError, "#{self.class} must implement #name"
18
20
  end
19
21
 
20
- #
21
22
  def io
22
23
  raise NotImplementedError, "#{self.class} must implement #io"
23
24
  end
@@ -32,5 +33,5 @@ module FlatKit
32
33
  end
33
34
  end
34
35
 
35
- require 'flat_kit/output/io'
36
- require 'flat_kit/output/file'
36
+ require "flat_kit/output/io"
37
+ require "flat_kit/output/file"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # The information about the position of a record in an IO stream
3
5
  #
@@ -5,10 +7,7 @@ module FlatKit
5
7
  # information about the record that was just written
6
8
  #
7
9
  class Position
8
-
9
- attr_reader :index # zero based
10
- attr_reader :offset # byte offset in the IO stream
11
- attr_reader :bytesize # byte length of the record
10
+ attr_reader :index, :offset, :bytesize # zero based # byte offset in the IO stream # byte length of the record
12
11
 
13
12
  def initialize(index: nil, offset: nil, bytesize: nil)
14
13
  @index = index
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: the base class for all format readers.
3
5
  #
@@ -14,24 +16,21 @@ module FlatKit
14
16
  # API:
15
17
  #
16
18
  # initialize(source:, compare_fields:)
17
- # each -> Yields / returns
19
+ # each -> Yields / returns
18
20
  #
19
21
  class Reader
20
22
  include Enumerable
21
23
 
22
- attr_reader :source
23
- attr_reader :compare_fields
24
+ attr_reader :source, :compare_fields
24
25
 
25
26
  def self.create_reader_from_path(path: "-", fallback: "auto", compare_fields: :none)
26
27
  format = ::FlatKit::Format.for_with_fallback!(path: path, fallback: fallback)
27
- return format.reader.new(source: path, compare_fields: compare_fields)
28
+ format.reader.new(source: path, compare_fields: compare_fields)
28
29
  end
29
30
 
30
31
  def self.create_readers_from_paths(paths:, fallback: "auto", compare_fields: :none)
31
32
  # default to stdin if there are no paths
32
- if paths.empty? then
33
- paths << "-"
34
- end
33
+ paths << "-" if paths.empty?
35
34
 
36
35
  paths.map do |path|
37
36
  create_reader_from_path(path: path, fallback: fallback, compare_fields: compare_fields)
@@ -55,7 +54,8 @@ module FlatKit
55
54
 
56
55
  def resolve_compare_fields(value)
57
56
  return [] if value == :none
58
- return value
57
+
58
+ value
59
59
  end
60
60
  end
61
61
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Public: The base class that all record classes should inherit from.
3
5
  #
@@ -35,11 +37,9 @@ module FlatKit
35
37
  # # the initialize method must call super(data:, compare_fields:) to
36
38
  # initializa the root data structures
37
39
  class Record
38
-
39
40
  include Comparable
40
41
 
41
- attr_reader :data
42
- attr_reader :compare_fields
42
+ attr_reader :data, :compare_fields
43
43
 
44
44
  def initialize(data:, compare_fields:)
45
45
  @data = data
@@ -57,15 +57,15 @@ module FlatKit
57
57
  my_val = self[field]
58
58
  other_val = other[field]
59
59
 
60
- if my_val.nil? && other_val.nil? then
61
- compare_result = 0
62
- elsif my_val.nil?
63
- compare_result = -1
64
- elsif other_val.nil?
65
- compare_result = 1
66
- else
67
- compare_result = my_val.<=>(other_val)
68
- end
60
+ compare_result = if my_val.nil? && other_val.nil?
61
+ 0
62
+ elsif my_val.nil?
63
+ -1
64
+ elsif other_val.nil?
65
+ 1
66
+ else
67
+ my_val <=> (other_val)
68
+ end
69
69
 
70
70
  return compare_result unless compare_result.zero?
71
71
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: The Sentinel Internal Node is a private class used by the MergeTree
3
5
  # class.
@@ -8,12 +10,10 @@ module FlatKit
8
10
  class SentinelInternalNode
9
11
  include Comparable
10
12
 
11
- attr_reader :left
12
- attr_reader :right
13
- attr_reader :winner
13
+ attr_reader :left, :right, :winner
14
14
  attr_accessor :next_level
15
15
 
16
- def initialize(left: nil, right: nil)
16
+ def initialize(*)
17
17
  @left = nil
18
18
  @right = nil
19
19
  @winner = nil
@@ -31,7 +31,8 @@ module FlatKit
31
31
  # A sentinal node is always greater than any other node
32
32
  def <=>(other)
33
33
  return 0 if other.sentinel?
34
- return 1
34
+
35
+ 1
35
36
  end
36
37
  end
37
38
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  # Private: The Sentinel Leaf Node is used internally by the MergeTree
3
5
  #
@@ -31,7 +33,8 @@ module FlatKit
31
33
  # A sentinal node is always greater than any other node
32
34
  def <=>(other)
33
35
  return 0 if other.sentinel?
34
- return 1
36
+
37
+ 1
35
38
  end
36
39
  end
37
40
  end
data/lib/flat_kit/sort.rb CHANGED
@@ -1,13 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
4
+ # Internal: Sorts an Input and sends the sorted records to an Output
5
+ #
2
6
  class Sort
3
- attr_reader :reader
4
- attr_reader :writer
5
- attr_reader :compare_fields
6
-
7
- def initialize(input:, input_fallback: "auto",
8
- output:, output_fallback: "auto",
9
- compare_fields:)
7
+ attr_reader :reader, :writer, :compare_fields
10
8
 
9
+ def initialize(input:, output:, compare_fields:, input_fallback: "auto", output_fallback: "auto")
11
10
  @compare_fields = compare_fields
12
11
  @reader = ::FlatKit::Reader.create_reader_from_path(path: input, compare_fields: @compare_fields,
13
12
  fallback: input_fallback)
@@ -16,8 +15,8 @@ module FlatKit
16
15
  end
17
16
 
18
17
  def call
19
- ::FlatKit.logger.info "Sorting #{reader.source} into #{writer.destination} using key #{compare_fields.join(", ")}"
20
- records = Array.new.tap do |a|
18
+ ::FlatKit.logger.info "Sorting #{reader.source} into #{writer.destination} using key #{compare_fields.join(', ')}"
19
+ records = [].tap do |a|
21
20
  reader.each do |r|
22
21
  a << r
23
22
  end
@@ -1,21 +1,22 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module FlatKit
2
4
  class StatType
3
-
4
- # Status object to keep track of the count and frequency of values
5
+ # Internal: Status object to keep track of the count and frequency of values.
5
6
  #
6
7
  class NominalStats < StatType
7
-
8
8
  attr_reader :count
9
9
 
10
10
  def self.default_stats
11
- @default_stats ||= %w[ count ]
11
+ @default_stats ||= %w[count]
12
12
  end
13
13
 
14
14
  def self.all_stats
15
- @all_stats ||= %w[ count unique_count unique_values mode ]
15
+ @all_stats ||= %w[count unique_count unique_values mode]
16
16
  end
17
17
 
18
18
  def initialize(collecting_frequencies: false)
19
+ super()
19
20
  @mutex = Mutex.new
20
21
  @count = 0
21
22
  @collecting_frequencies = collecting_frequencies
@@ -24,26 +25,31 @@ module FlatKit
24
25
 
25
26
  def collected_stats
26
27
  return self.class.default_stats unless @collecting_frequencies
27
- return self.class.all_stats
28
+
29
+ self.class.all_stats
28
30
  end
29
31
 
30
32
  def mode
31
33
  return nil unless @collecting_frequencies
32
- @frequencies.max_by{ |item, item_count| item_count }.first
34
+
35
+ @frequencies.max_by { |_item, item_count| item_count }.first
33
36
  end
34
37
 
35
38
  def unique_count
36
39
  return nil unless @collecting_frequencies
40
+
37
41
  @frequencies.size
38
42
  end
39
43
 
40
44
  def unique_values
41
45
  return nil unless @collecting_frequencies
46
+
42
47
  @frequencies.keys
43
48
  end
44
49
 
45
50
  def frequencies
46
51
  return nil unless @collecting_frequencies
52
+
47
53
  @frequencies
48
54
  end
49
55
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #--
2
4
  # Copyright (c) 2008, 2009 Jeremy Hinegardner
3
5
  # All rights reserved. See LICENSE and/or COPYING for details.
@@ -5,16 +7,14 @@
5
7
  # Pulled from Hitimes, which I also wrote
6
8
  #++
7
9
 
8
- require 'thread'
9
- require 'oj'
10
+ require "oj"
10
11
 
11
12
  module FlatKit
12
13
  class StatType
13
- #
14
- # Stats object will keep track of the _min_, _max_, _count_, _sum_ and _sumsq_
14
+ # Internal: Stats object to keep track of the _min_, _max_, _count_, _sum_ and _sumsq_
15
15
  # and when you want you may also retrieve the _mean_, _stddev_ and _rate_.
16
16
  #
17
- # this contrived example shows getting a list of all the files in a directory
17
+ # This contrived example shows getting a list of all the files in a directory
18
18
  # and running stats on file sizes.
19
19
  #
20
20
  # s = FlatKit::Stats.new
@@ -33,17 +33,14 @@ module FlatKit
33
33
  class NumericalStats < NominalStats
34
34
  # A list of the available stats
35
35
 
36
- attr_reader :min
37
- attr_reader :max
38
- attr_reader :sum
39
- attr_reader :sumsq
36
+ attr_reader :min, :max, :sum, :sumsq
40
37
 
41
38
  def self.default_stats
42
- @default_stats ||= %w[ count max mean min rate stddev sum sumsq ]
39
+ @default_stats ||= %w[count max mean min rate stddev sum sumsq]
43
40
  end
44
41
 
45
42
  def self.all_stats
46
- @all_stats ||= %w[ count max mean min mode rate stddev sum sumsq unique_count unique_values ]
43
+ @all_stats ||= %w[count max mean min mode rate stddev sum sumsq unique_count unique_values]
47
44
  end
48
45
 
49
46
  def initialize(collecting_frequencies: false)
@@ -61,8 +58,8 @@ module FlatKit
61
58
  # Return the input value.
62
59
  def update(value)
63
60
  @mutex.synchronize do
64
- @min = (value < @min) ? value : @min
65
- @max = (value > @max) ? value : @max
61
+ @min = [value, @min].min
62
+ @max = [value, @max].max
66
63
 
67
64
  @count += 1
68
65
  @sum += value
@@ -72,17 +69,18 @@ module FlatKit
72
69
  @frequencies[value] += 1 if @collecting_frequencies
73
70
  end
74
71
 
75
- return value
72
+ value
76
73
  end
77
74
 
78
75
  # call-seq:
79
76
  # stat.mean -> Float
80
- #
77
+ #
81
78
  # Return the arithmetic mean of the values put into the Stats object. If no
82
79
  # values have passed through the stats object then 0.0 is returned;
83
80
  def mean
84
81
  return 0.0 if @count.zero?
85
- return @sum / @count
82
+
83
+ @sum / @count
86
84
  end
87
85
 
88
86
  # call-seq:
@@ -100,7 +98,8 @@ module FlatKit
100
98
  #
101
99
  def rate
102
100
  return 0.0 if @sum.zero?
103
- return @count / @sum
101
+
102
+ @count / @sum
104
103
  end
105
104
 
106
105
  #
@@ -113,7 +112,8 @@ module FlatKit
113
112
  #
114
113
  def stddev
115
114
  return 0.0 unless @count > 1
116
- Math.sqrt((@sumsq - ((@sum * @sum)/@count)) / (@count - 1))
115
+
116
+ Math.sqrt((@sumsq - ((@sum * @sum) / @count)) / (@count - 1))
117
117
  end
118
118
  end
119
119
  end