flat_kit 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.md +6 -0
  3. data/Manifest.txt +34 -0
  4. data/Rakefile +1 -0
  5. data/examples/stream-active-record-to-csv.rb +42 -0
  6. data/lib/flat_kit.rb +9 -2
  7. data/lib/flat_kit/cli.rb +11 -0
  8. data/lib/flat_kit/command.rb +1 -0
  9. data/lib/flat_kit/command/stats.rb +94 -0
  10. data/lib/flat_kit/descendant_tracker.rb +9 -0
  11. data/lib/flat_kit/event_emitter.rb +2 -2
  12. data/lib/flat_kit/field_stats.rb +241 -0
  13. data/lib/flat_kit/field_type.rb +75 -0
  14. data/lib/flat_kit/field_type/boolean_type.rb +48 -0
  15. data/lib/flat_kit/field_type/date_type.rb +179 -0
  16. data/lib/flat_kit/field_type/float_type.rb +37 -0
  17. data/lib/flat_kit/field_type/guess_type.rb +20 -0
  18. data/lib/flat_kit/field_type/integer_type.rb +34 -0
  19. data/lib/flat_kit/field_type/null_type.rb +35 -0
  20. data/lib/flat_kit/field_type/string_type.rb +22 -0
  21. data/lib/flat_kit/field_type/timestamp_type.rb +47 -0
  22. data/lib/flat_kit/field_type/unknown_type.rb +26 -0
  23. data/lib/flat_kit/jsonl/record.rb +2 -2
  24. data/lib/flat_kit/jsonl/writer.rb +18 -9
  25. data/lib/flat_kit/merge.rb +4 -2
  26. data/lib/flat_kit/output.rb +4 -0
  27. data/lib/flat_kit/position.rb +19 -0
  28. data/lib/flat_kit/stat_type.rb +65 -0
  29. data/lib/flat_kit/stat_type/nominal_stats.rb +58 -0
  30. data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
  31. data/lib/flat_kit/stat_type/ordinal_stats.rb +42 -0
  32. data/lib/flat_kit/stats.rb +66 -0
  33. data/lib/flat_kit/writer.rb +17 -2
  34. data/lib/flat_kit/xsv/writer.rb +24 -9
  35. data/test/field_type/test_boolean_type.rb +65 -0
  36. data/test/field_type/test_date_type.rb +71 -0
  37. data/test/field_type/test_float_type.rb +56 -0
  38. data/test/field_type/test_guess_type.rb +14 -0
  39. data/test/field_type/test_integer_type.rb +52 -0
  40. data/test/field_type/test_null_type.rb +41 -0
  41. data/test/field_type/test_string_type.rb +18 -0
  42. data/test/field_type/test_timestamp_type.rb +108 -0
  43. data/test/field_type/test_unknown_type.rb +35 -0
  44. data/test/jsonl/test_writer.rb +21 -3
  45. data/test/run +23 -0
  46. data/test/stat_type/test_nominal_stats.rb +69 -0
  47. data/test/stat_type/test_numerical_stats.rb +118 -0
  48. data/test/stat_type/test_ordinal_stats.rb +92 -0
  49. data/test/test_event_emitter.rb +19 -2
  50. data/test/test_field_stats.rb +134 -0
  51. data/test/test_field_type.rb +34 -0
  52. data/test/xsv/test_writer.rb +25 -4
  53. metadata +65 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 90aee146f0054a6afc302716d7bcbac4e013e0ec67bd09d390de326d9d090a27
4
- data.tar.gz: b34138e0095b751ff2e29322c3899f7c4df54b5bd9649e2c2c9d550966225acb
3
+ metadata.gz: 8ee02f6b5e9ed51f565da86c04a0d4600cc0af271e3c8b90f67e22f00ff450fd
4
+ data.tar.gz: ee58c4ee864c91dc2e11c429891a5c4a7455541f3747f745aa8de0203b6c2142
5
5
  SHA512:
6
- metadata.gz: 27f23b37c07b0702152904ffe4062269a091005dd112a570f374b902cedb9c489d5802e3d7cac366a275da435a5f1d38b25f3d20e34b8e12f3185330bdd65b0e
7
- data.tar.gz: 9f11eda352f16db26ec54793836ff0e5153fb624659989d710789b7afb07a46be284ccbe235e649fb0bcb676327a040fc35f91a63c20c9772cccb3e50517e150
6
+ metadata.gz: 1cbcb7d15633b06818d0647935475f1e87f9944baa2c0c8450fb9bb54bcad3c4f82a178c5ef3eb43529f05ef61091958f1eee751291dcf45f3632a18c2b0bfeb
7
+ data.tar.gz: ff78cd8f3e0795da93f50fc796b209a996a10b4fda7524ebf0878895ca467bbb178e2f4bf6da9547d08ba7c5fff1c8791a22c5f4341a59f569af0b22e8d8858c
data/HISTORY.md CHANGED
@@ -1,4 +1,10 @@
1
1
  # FlatKit Changelog
2
+ ## Version 0.3.0
3
+
4
+ * Changing the event listening api to include meta data about the event
5
+ * Add field type detection
6
+ * Add a 'stats' command to generate stats about the data file
7
+
2
8
  ## Version 0.2.0
3
9
 
4
10
  * add in event listening to allow for additional integrations
data/Manifest.txt CHANGED
@@ -5,15 +5,28 @@ Manifest.txt
5
5
  README.md
6
6
  Rakefile
7
7
  bin/fk
8
+ examples/stream-active-record-to-csv.rb
8
9
  lib/flat_kit.rb
9
10
  lib/flat_kit/cli.rb
10
11
  lib/flat_kit/command.rb
11
12
  lib/flat_kit/command/cat.rb
12
13
  lib/flat_kit/command/merge.rb
13
14
  lib/flat_kit/command/sort.rb
15
+ lib/flat_kit/command/stats.rb
14
16
  lib/flat_kit/descendant_tracker.rb
15
17
  lib/flat_kit/error.rb
16
18
  lib/flat_kit/event_emitter.rb
19
+ lib/flat_kit/field_stats.rb
20
+ lib/flat_kit/field_type.rb
21
+ lib/flat_kit/field_type/boolean_type.rb
22
+ lib/flat_kit/field_type/date_type.rb
23
+ lib/flat_kit/field_type/float_type.rb
24
+ lib/flat_kit/field_type/guess_type.rb
25
+ lib/flat_kit/field_type/integer_type.rb
26
+ lib/flat_kit/field_type/null_type.rb
27
+ lib/flat_kit/field_type/string_type.rb
28
+ lib/flat_kit/field_type/timestamp_type.rb
29
+ lib/flat_kit/field_type/unknown_type.rb
17
30
  lib/flat_kit/format.rb
18
31
  lib/flat_kit/input.rb
19
32
  lib/flat_kit/input/file.rb
@@ -31,11 +44,17 @@ lib/flat_kit/merge_tree.rb
31
44
  lib/flat_kit/output.rb
32
45
  lib/flat_kit/output/file.rb
33
46
  lib/flat_kit/output/io.rb
47
+ lib/flat_kit/position.rb
34
48
  lib/flat_kit/reader.rb
35
49
  lib/flat_kit/record.rb
36
50
  lib/flat_kit/sentinel_internal_node.rb
37
51
  lib/flat_kit/sentinel_leaf_node.rb
38
52
  lib/flat_kit/sort.rb
53
+ lib/flat_kit/stat_type.rb
54
+ lib/flat_kit/stat_type/nominal_stats.rb
55
+ lib/flat_kit/stat_type/numerical_stats.rb
56
+ lib/flat_kit/stat_type/ordinal_stats.rb
57
+ lib/flat_kit/stats.rb
39
58
  lib/flat_kit/writer.rb
40
59
  lib/flat_kit/xsv.rb
41
60
  lib/flat_kit/xsv/format.rb
@@ -47,6 +66,15 @@ tasks/extension.rake
47
66
  tasks/man.rake
48
67
  tasks/this.rb
49
68
  test/device_dataset.rb
69
+ test/field_type/test_boolean_type.rb
70
+ test/field_type/test_date_type.rb
71
+ test/field_type/test_float_type.rb
72
+ test/field_type/test_guess_type.rb
73
+ test/field_type/test_integer_type.rb
74
+ test/field_type/test_null_type.rb
75
+ test/field_type/test_string_type.rb
76
+ test/field_type/test_timestamp_type.rb
77
+ test/field_type/test_unknown_type.rb
50
78
  test/input/test_file.rb
51
79
  test/input/test_io.rb
52
80
  test/jsonl/test_format.rb
@@ -55,8 +83,14 @@ test/jsonl/test_record.rb
55
83
  test/jsonl/test_writer.rb
56
84
  test/output/test_file.rb
57
85
  test/output/test_io.rb
86
+ test/run
87
+ test/stat_type/test_nominal_stats.rb
88
+ test/stat_type/test_numerical_stats.rb
89
+ test/stat_type/test_ordinal_stats.rb
58
90
  test/test_conversions.rb
59
91
  test/test_event_emitter.rb
92
+ test/test_field_stats.rb
93
+ test/test_field_type.rb
60
94
  test/test_format.rb
61
95
  test/test_helper.rb
62
96
  test/test_merge.rb
data/Rakefile CHANGED
@@ -10,6 +10,7 @@ This.ruby_gemspec do |spec|
10
10
  spec.add_dependency('oj', '~> 3.0')
11
11
  spec.add_dependency('optimist', '~> 3.0')
12
12
 
13
+ spec.add_development_dependency( 'faker' , '~> 2.16')
13
14
  spec.add_development_dependency( 'rake' , '~> 13.0')
14
15
  spec.add_development_dependency( 'minitest' , '~> 5.11' )
15
16
  spec.add_development_dependency( 'minitest-focus' , '~> 1.2' )
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env
2
+
3
+ #------------------------------------------------------------------------------
4
+ # This is an example to show how to stream an active record scope to a CSV file
5
+ # using FlatKit.
6
+ #------------------------------------------------------------------------------
7
+
8
+ require 'flat_kit' # gem 'flat_kit'
9
+ require 'progress_bar' # gem 'progress-bar'
10
+
11
+ # get an appropriate scope from one of your models - or any scope for that
12
+ # matter
13
+ scope = MyActiveRecordModel.all
14
+
15
+ # Output to a file that is csv, and automatically gzipped
16
+ #
17
+ output_csv = ::FlatKit::Xsv::Writer.new(destination: "export.csv.gz")
18
+
19
+ # handy progress bar
20
+ bar = ProgressBar.new(scope.count)
21
+
22
+ # using active record in batches to not pull all the recors from the database at
23
+ # once
24
+ #
25
+ # https://api.rubyonrails.org/classes/ActiveRecord/Batches.html#method-i-find_each
26
+ scope.find_each do |record|
27
+
28
+ # generate an XSV Record by pulling hte attributes out of the active record
29
+ # model. You may also want to generate a hash from a query or something
30
+ # along those lines. In any case pass in a Hash to complete_structured_data:
31
+ # and nil to data.
32
+ xsv_record = ::FlatKit::Xsv::Record.new(data: nil, complete_structured_data: record.attributes)
33
+
34
+ # FlatKit will automatically handle writing out the header line based upon
35
+ # the fields in the first record.
36
+ output_csv.write(xsv_record)
37
+
38
+ bar.increment!
39
+ end
40
+
41
+ # close the output file explicitly
42
+ output_csv.close
data/lib/flat_kit.rb CHANGED
@@ -1,11 +1,14 @@
1
1
  module FlatKit
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
4
4
  require 'flat_kit/error'
5
+ require 'flat_kit/descendant_tracker'
5
6
  require 'flat_kit/logger'
6
7
  require 'flat_kit/event_emitter'
7
- require 'flat_kit/descendant_tracker'
8
+
9
+ require 'flat_kit/field_type'
8
10
  require 'flat_kit/format'
11
+ require 'flat_kit/position'
9
12
  require 'flat_kit/record'
10
13
  require 'flat_kit/reader'
11
14
  require 'flat_kit/writer'
@@ -16,6 +19,10 @@ require 'flat_kit/xsv'
16
19
  require 'flat_kit/jsonl'
17
20
  require 'flat_kit/merge'
18
21
  require 'flat_kit/sort'
22
+ require 'flat_kit/stats'
23
+
24
+ require 'flat_kit/stat_type'
25
+ require 'flat_kit/field_stats'
19
26
 
20
27
  require 'flat_kit/merge_tree'
21
28
  require 'flat_kit/internal_node'
data/lib/flat_kit/cli.rb CHANGED
@@ -72,7 +72,18 @@ module FlatKit
72
72
  ::FlatKit.logger.debug argv
73
73
 
74
74
  command_name = argv.shift
75
+ if command_name.downcase == "help" then
76
+ parser.educate
77
+ exit 0
78
+ end
79
+
75
80
  command_klass = FlatKit::Command.for(command_name)
81
+ if command_klass.nil? then
82
+ $stdout.puts "ERROR: Unknown command '#{command_name}'"
83
+ parser.educate
84
+ exit 0
85
+ end
86
+
76
87
  command = command_klass.new(argv: argv, logger: ::FlatKit.logger, env: env)
77
88
  command.call
78
89
  end
@@ -51,3 +51,4 @@ end
51
51
  require 'flat_kit/command/cat'
52
52
  require 'flat_kit/command/merge'
53
53
  require 'flat_kit/command/sort'
54
+ require 'flat_kit/command/stats'
@@ -0,0 +1,94 @@
1
+ module FlatKit
2
+ class Command
3
+ class Stats < ::FlatKit::Command
4
+
5
+ def self.name
6
+ "stats"
7
+ end
8
+
9
+ def self.description
10
+ "Collect and report stats on the inputfile"
11
+ end
12
+
13
+ def self.parser
14
+ ::Optimist::Parser.new do
15
+ banner "#{Sort.description}"
16
+ banner ""
17
+
18
+ banner <<~BANNER
19
+ Given an input file collect basic statistics.
20
+
21
+ The statistics can vary based upon the datatype of the field.
22
+
23
+ Numeric fields will report the basic count, min, max, mean, standard deviation and sum.
24
+ Non-numeric fields that are comparable, like dates, will report count, min and max.
25
+ Other non-numeric fields will only report the count.
26
+
27
+ Adding --cardinality will report the count, and frequency of distinct values in the result.
28
+ This will allow for reporting the median value.
29
+
30
+ The fields upon which stats are collected may be selected with the --fields parameter.
31
+ By default statistics are collected on all fields.
32
+
33
+ The flatfile type(s) will be automatically determined by the file name.
34
+
35
+ The output can be dumped as a CSV, JSON or a a formated ascii table.
36
+
37
+ BANNER
38
+
39
+ banner <<~USAGE
40
+
41
+ Usage:
42
+ fk stats --everything file.json
43
+ fk stats --select surname,given_name file.csv
44
+ fk stats --select surname,given_name --output-format json file.csv > stats.json
45
+ fk stats --select field1,field2 --output-format json input.csv
46
+ fk stats --select field1 file.json.gz -o stats.csv
47
+ gunzip -c file.json.gz | fk stats --input-format json --output-format text
48
+
49
+ USAGE
50
+
51
+ banner <<~OPTIONS
52
+
53
+ Options:
54
+
55
+ OPTIONS
56
+
57
+ opt :output, "Send the output to the given path instead of standard out.", default: "<stdout>"
58
+ opt :input_format, "Input format, csv or json", default: "auto", short: :none
59
+ opt :output_format, "Output format, csv or json", default: "auto", short: :none
60
+ opt :select, "The comma separted list of field(s) to report stats on", required: false, type: :string
61
+ opt :everything, "Show all statistics that are possible", default: false
62
+ opt :cardinality, "Show the cardinality of the fields, this requires additional memory", default: false
63
+ end
64
+ end
65
+
66
+ def parse
67
+ parser = self.class.parser
68
+ ::Optimist::with_standard_exception_handling(parser) do
69
+ begin
70
+ opts = parser.parse(argv)
71
+ fields = ::FlatKit::Stats::AllFields
72
+ fields = CSV.parse_line(opts[:select]) if opts[:select]
73
+
74
+ stats = [FieldStats::CORE_STATS]
75
+ stats << FieldStats::CARDINALITY_STATS if opts[:cardinality] || opts[:everything]
76
+
77
+ paths = parser.leftovers
78
+ raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1
79
+ path = paths.first || "-" # default to stdin
80
+ @stats = ::FlatKit::Stats.new(input: path, input_fallback: opts[:input_format],
81
+ output: opts[:output], output_fallback: opts[:output_format],
82
+ fields_to_stat: fields, stats_to_collect: stats)
83
+ rescue ::FlatKit::Error => e
84
+ raise ::Optimist::CommandlineError, e.message
85
+ end
86
+ end
87
+ end
88
+
89
+ def call
90
+ @stats.call
91
+ end
92
+ end
93
+ end
94
+ end
@@ -23,5 +23,14 @@ module FlatKit
23
23
  child_klass.send(method, *args)
24
24
  end
25
25
  end
26
+
27
+ #
28
+ # Find all the children that return truthy from the given method with args
29
+ #
30
+ def find_children(method, *args)
31
+ children.select do |child_klass|
32
+ child_klass.send(method, *args)
33
+ end
34
+ end
26
35
  end
27
36
  end
@@ -20,9 +20,9 @@ module FlatKit
20
20
  _listeners.clear
21
21
  end
22
22
 
23
- def notify_listeners(name:, data:)
23
+ def notify_listeners(name:, data:, meta: nil)
24
24
  _listeners.each do |l|
25
- l.on_event(name: name, data: data)
25
+ l.on_event(name: name, data: data, meta: meta)
26
26
  end
27
27
  end
28
28
 
@@ -0,0 +1,241 @@
1
+ module FlatKit
2
+ # Collect stats on a single field. We may not know what the field data type is
3
+ # to start with, so collect a bunch of values until we have the threshold, and
4
+ # then calculte states based upon the data types determined by the guess
5
+ #
6
+ class FieldStats
7
+ DEFAULT_GUESS_THRESHOLD = 1000
8
+
9
+ CORE_STATS = :core
10
+ CARDINALITY_STATS = :cardinality
11
+
12
+ ALL_STATS = [ CORE_STATS, CARDINALITY_STATS ]
13
+
14
+ EXPORT_FIELDS = %w[
15
+ name
16
+ type
17
+ count
18
+ max
19
+ mean
20
+ min
21
+ stddev
22
+ sum
23
+ mode
24
+ unique_count
25
+
26
+ max_length
27
+ mean_length
28
+ min_length
29
+ stddev_length
30
+ mode_length
31
+ unique_count_lengths
32
+
33
+ null_count
34
+ unknown_count
35
+ out_of_type_count
36
+ total_count
37
+ null_percent
38
+ unknown_percent
39
+ ]
40
+
41
+
42
+ attr_reader :type_counts
43
+ attr_reader :field_type
44
+ attr_reader :name
45
+
46
+ def initialize(name:, stats_to_collect: CORE_STATS, type: ::FlatKit::FieldType::GuessType, guess_threshold: DEFAULT_GUESS_THRESHOLD)
47
+ @name = name
48
+ @field_type = type
49
+ @guess_threshold = guess_threshold
50
+ @type_counts = Hash.new(0)
51
+ @out_of_type_count = 0
52
+ @values = []
53
+ @stats = nil
54
+ @length_stats = nil
55
+ @stats_to_collect = [stats_to_collect].flatten
56
+
57
+ @stats_to_collect.each do |collection_set|
58
+ next if ALL_STATS.include?(collection_set)
59
+ raise ArgumentError, "#{collection_set} is not a valid stats collection set, must be one of #{ALL_STATS.map { |s| s.to_s }.join(", ") }"
60
+ end
61
+ raise ArgumentError, "type: must be FieldType subclasses - not #{type}" unless type.kind_of?(Class) && (type.superclass == ::FlatKit::FieldType)
62
+ end
63
+
64
+ def field_type_determined?
65
+ @field_type != ::FlatKit::FieldType::GuessType
66
+ end
67
+
68
+ def update(value)
69
+ update_type_count(value)
70
+
71
+ if field_type_determined? then
72
+ update_stats(value)
73
+ else
74
+ @values << value
75
+
76
+ if @values.size >= @guess_threshold then
77
+ resolve_guess
78
+ end
79
+ end
80
+ end
81
+
82
+ def collecting_frequencies?
83
+ @stats_to_collect.include?(CARDINALITY_STATS)
84
+ end
85
+
86
+ def type
87
+ @field_type.type_name
88
+ end
89
+
90
+ def count
91
+ stats.count
92
+ end
93
+
94
+ def max
95
+ stats.max if stats.respond_to?(:max)
96
+ end
97
+
98
+ def mean
99
+ stats.mean if stats.respond_to?(:mean)
100
+ end
101
+
102
+ def min
103
+ stats.min if stats.respond_to?(:min)
104
+ end
105
+
106
+ def stddev
107
+ stats.stddev if stats.respond_to?(:stddev)
108
+ end
109
+
110
+ def sum
111
+ stats.sum if stats.respond_to?(:sum)
112
+ end
113
+
114
+ def mode
115
+ stats.mode if collecting_frequencies?
116
+ end
117
+
118
+ def unique_count
119
+ stats.unique_count if collecting_frequencies?
120
+ end
121
+
122
+ def unique_values
123
+ stats.unique_values if collecting_frequencies?
124
+ end
125
+
126
+ def frequencies
127
+ stats.frequencies if collecting_frequencies?
128
+ end
129
+
130
+ def min_length
131
+ length_stats.min if @length_stats
132
+ end
133
+
134
+ def max_length
135
+ length_stats.max if @length_stats
136
+ end
137
+
138
+ def mean_length
139
+ length_stats.mean if @length_stats
140
+ end
141
+
142
+ def stddev_length
143
+ length_stats.stddev if @length_stats
144
+ end
145
+
146
+ def mode_length
147
+ length_stats.mode if @length_stats && collecting_frequencies?
148
+ end
149
+
150
+ def unique_count_lengths
151
+ length_stats.unique_count if @length_stats && collecting_frequencies?
152
+ end
153
+
154
+ def unique_values_lengths
155
+ length_stats.unique_values if @length_stats && collecting_frequencies?
156
+ end
157
+
158
+ def length_frequencies
159
+ length_stats.frequencies if @length_stats && collecting_frequencies?
160
+ end
161
+
162
+ def null_count
163
+ type_counts[FieldType::NullType]
164
+ end
165
+
166
+ def total_count
167
+ stats.count + @out_of_type_count
168
+ end
169
+
170
+ def out_of_type_count
171
+ @out_of_type_count
172
+ end
173
+
174
+ def null_percent
175
+ return 0 if total_count.zero?
176
+ ((null_count.to_f / total_count) * 100.0).truncate(2)
177
+ end
178
+
179
+ def unknown_count
180
+ type_counts[FieldType::UnknownType]
181
+ end
182
+
183
+ def unknown_percent
184
+ return 0 if total_count.zero?
185
+ ((unknown_count.to_f / total_count) * 100.0).truncate(2)
186
+ end
187
+
188
+ def to_hash
189
+ resolve_guess
190
+
191
+ Hash.new.tap do |h|
192
+ EXPORT_FIELDS.each do |n|
193
+ h[n] = self.send(n)
194
+ end
195
+ end
196
+ end
197
+
198
+ private
199
+
200
+ def stats
201
+ resolve_guess
202
+ @stats
203
+ end
204
+
205
+ def length_stats
206
+ resolve_guess
207
+ @length_stats
208
+ end
209
+
210
+ def update_stats(value)
211
+ coerced_value = @field_type.coerce(value)
212
+ if coerced_value == FieldType::CoerceFailure then
213
+ @out_of_type_count += 1
214
+ return
215
+ end
216
+
217
+ @stats.update(coerced_value)
218
+ @length_stats.update(coerced_value.to_s.length) if @length_stats
219
+ end
220
+
221
+ def update_type_count(value)
222
+ guess = FieldType.best_guess(value)
223
+ type_counts[guess] += 1
224
+ return guess
225
+ end
226
+
227
+ def resolve_guess
228
+ return if field_type_determined?
229
+ best_guess_type, _best_guess_count = type_counts.max_by { |k, v| v }
230
+ @field_type = best_guess_type
231
+ @stats = StatType.for(@field_type).new(collecting_frequencies: collecting_frequencies?)
232
+ if @field_type == ::FlatKit::FieldType::StringType then
233
+ @length_stats = ::FlatKit::StatType::NumericalStats.new(collecting_frequencies: collecting_frequencies?)
234
+ end
235
+ @values.each do |v|
236
+ update_stats(v)
237
+ end
238
+ @values.clear
239
+ end
240
+ end
241
+ end