flat_kit 0.3.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +13 -0
- data/Manifest.txt +3 -42
- data/README.md +2 -0
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +46 -32
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +41 -39
- data/lib/flat_kit/command.rb +10 -11
- data/lib/flat_kit/descendant_tracker.rb +9 -6
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +6 -3
- data/lib/flat_kit/field_stats.rb +31 -26
- data/lib/flat_kit/field_type/boolean_type.rb +9 -5
- data/lib/flat_kit/field_type/date_type.rb +19 -17
- data/lib/flat_kit/field_type/float_type.rb +15 -9
- data/lib/flat_kit/field_type/guess_type.rb +9 -6
- data/lib/flat_kit/field_type/integer_type.rb +6 -4
- data/lib/flat_kit/field_type/null_type.rb +5 -1
- data/lib/flat_kit/field_type/string_type.rb +8 -6
- data/lib/flat_kit/field_type/timestamp_type.rb +11 -10
- data/lib/flat_kit/field_type/unknown_type.rb +12 -8
- data/lib/flat_kit/field_type.rb +52 -44
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +15 -18
- data/lib/flat_kit/jsonl/writer.rb +8 -10
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +13 -21
- data/lib/flat_kit/merge.rb +21 -18
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +8 -7
- data/lib/flat_kit/position.rb +3 -4
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -13
- data/lib/flat_kit/stat_type/nominal_stats.rb +13 -7
- data/lib/flat_kit/stat_type/numerical_stats.rb +18 -18
- data/lib/flat_kit/stat_type/ordinal_stats.rb +8 -13
- data/lib/flat_kit/stat_type.rb +18 -13
- data/lib/flat_kit/stats.rb +12 -15
- data/lib/flat_kit/writer.rb +5 -6
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +22 -18
- data/lib/flat_kit/xsv/writer.rb +13 -10
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +31 -26
- metadata +20 -161
- data/Rakefile +0 -21
- data/examples/stream-active-record-to-csv.rb +0 -42
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/field_type/test_boolean_type.rb +0 -65
- data/test/field_type/test_date_type.rb +0 -71
- data/test/field_type/test_float_type.rb +0 -56
- data/test/field_type/test_guess_type.rb +0 -14
- data/test/field_type/test_integer_type.rb +0 -52
- data/test/field_type/test_null_type.rb +0 -41
- data/test/field_type/test_string_type.rb +0 -18
- data/test/field_type/test_timestamp_type.rb +0 -108
- data/test/field_type/test_unknown_type.rb +0 -35
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -86
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/run +0 -23
- data/test/stat_type/test_nominal_stats.rb +0 -69
- data/test/stat_type/test_numerical_stats.rb +0 -118
- data/test/stat_type/test_ordinal_stats.rb +0 -92
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -89
- data/test/test_field_stats.rb +0 -134
- data/test/test_field_type.rb +0 -34
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -89
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
#--
|
|
2
4
|
# Copyright (c) 2008, 2009 Jeremy Hinegardner
|
|
3
5
|
# All rights reserved. See LICENSE and/or COPYING for details.
|
|
@@ -5,16 +7,14 @@
|
|
|
5
7
|
# Pulled from Hitimes, which I also wrote
|
|
6
8
|
#++
|
|
7
9
|
|
|
8
|
-
require
|
|
9
|
-
require 'oj'
|
|
10
|
+
require "oj"
|
|
10
11
|
|
|
11
12
|
module FlatKit
|
|
12
13
|
class StatType
|
|
13
|
-
#
|
|
14
|
-
# Stats object will keep track of the _min_, _max_, _count_, _sum_ and _sumsq_
|
|
14
|
+
# Internal: Stats object to keep track of the _min_, _max_, _count_, _sum_ and _sumsq_
|
|
15
15
|
# and when you want you may also retrieve the _mean_, _stddev_ and _rate_.
|
|
16
16
|
#
|
|
17
|
-
#
|
|
17
|
+
# This contrived example shows getting a list of all the files in a directory
|
|
18
18
|
# and running stats on file sizes.
|
|
19
19
|
#
|
|
20
20
|
# s = FlatKit::Stats.new
|
|
@@ -33,17 +33,14 @@ module FlatKit
|
|
|
33
33
|
class NumericalStats < NominalStats
|
|
34
34
|
# A list of the available stats
|
|
35
35
|
|
|
36
|
-
attr_reader :min
|
|
37
|
-
attr_reader :max
|
|
38
|
-
attr_reader :sum
|
|
39
|
-
attr_reader :sumsq
|
|
36
|
+
attr_reader :min, :max, :sum, :sumsq
|
|
40
37
|
|
|
41
38
|
def self.default_stats
|
|
42
|
-
@default_stats ||= %w[
|
|
39
|
+
@default_stats ||= %w[count max mean min rate stddev sum sumsq]
|
|
43
40
|
end
|
|
44
41
|
|
|
45
42
|
def self.all_stats
|
|
46
|
-
@all_stats ||= %w[
|
|
43
|
+
@all_stats ||= %w[count max mean min mode rate stddev sum sumsq unique_count unique_values]
|
|
47
44
|
end
|
|
48
45
|
|
|
49
46
|
def initialize(collecting_frequencies: false)
|
|
@@ -61,8 +58,8 @@ module FlatKit
|
|
|
61
58
|
# Return the input value.
|
|
62
59
|
def update(value)
|
|
63
60
|
@mutex.synchronize do
|
|
64
|
-
@min =
|
|
65
|
-
@max =
|
|
61
|
+
@min = [value, @min].min
|
|
62
|
+
@max = [value, @max].max
|
|
66
63
|
|
|
67
64
|
@count += 1
|
|
68
65
|
@sum += value
|
|
@@ -72,17 +69,18 @@ module FlatKit
|
|
|
72
69
|
@frequencies[value] += 1 if @collecting_frequencies
|
|
73
70
|
end
|
|
74
71
|
|
|
75
|
-
|
|
72
|
+
value
|
|
76
73
|
end
|
|
77
74
|
|
|
78
75
|
# call-seq:
|
|
79
76
|
# stat.mean -> Float
|
|
80
|
-
#
|
|
77
|
+
#
|
|
81
78
|
# Return the arithmetic mean of the values put into the Stats object. If no
|
|
82
79
|
# values have passed through the stats object then 0.0 is returned;
|
|
83
80
|
def mean
|
|
84
81
|
return 0.0 if @count.zero?
|
|
85
|
-
|
|
82
|
+
|
|
83
|
+
@sum / @count
|
|
86
84
|
end
|
|
87
85
|
|
|
88
86
|
# call-seq:
|
|
@@ -100,7 +98,8 @@ module FlatKit
|
|
|
100
98
|
#
|
|
101
99
|
def rate
|
|
102
100
|
return 0.0 if @sum.zero?
|
|
103
|
-
|
|
101
|
+
|
|
102
|
+
@count / @sum
|
|
104
103
|
end
|
|
105
104
|
|
|
106
105
|
#
|
|
@@ -113,7 +112,8 @@ module FlatKit
|
|
|
113
112
|
#
|
|
114
113
|
def stddev
|
|
115
114
|
return 0.0 unless @count > 1
|
|
116
|
-
|
|
115
|
+
|
|
116
|
+
Math.sqrt((@sumsq - ((@sum * @sum) / @count)) / (@count - 1))
|
|
117
117
|
end
|
|
118
118
|
end
|
|
119
119
|
end
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module FlatKit
|
|
2
4
|
class StatType
|
|
3
|
-
# Same as NominalStats and also collects min and max
|
|
5
|
+
# Internal: Same as NominalStats and also collects min and max
|
|
4
6
|
#
|
|
5
7
|
class OrdinalStats < NominalStats
|
|
6
|
-
|
|
7
|
-
attr_reader :min
|
|
8
|
-
attr_reader :max
|
|
8
|
+
attr_reader :min, :max
|
|
9
9
|
|
|
10
10
|
def self.default_stats
|
|
11
|
-
@default_stats ||= %w[
|
|
11
|
+
@default_stats ||= %w[count max min]
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def self.all_stats
|
|
15
|
-
@all_stats ||= %w[
|
|
15
|
+
@all_stats ||= %w[count max min unique_count unique_values mode]
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def initialize(collecting_frequencies: false)
|
|
@@ -23,14 +23,9 @@ module FlatKit
|
|
|
23
23
|
|
|
24
24
|
def update(value)
|
|
25
25
|
@mutex.synchronize do
|
|
26
|
+
@min = value if @min.nil? || (value < @min)
|
|
26
27
|
|
|
27
|
-
if @
|
|
28
|
-
@min = value
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
if @max.nil? || (value > @max) then
|
|
32
|
-
@max = value
|
|
33
|
-
end
|
|
28
|
+
@max = value if @max.nil? || (value > @max)
|
|
34
29
|
|
|
35
30
|
@count += 1
|
|
36
31
|
|
data/lib/flat_kit/stat_type.rb
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module FlatKit
|
|
4
|
+
# Internal: Base class of all the statistic types
|
|
5
|
+
#
|
|
2
6
|
class StatType
|
|
3
7
|
def self.nominal_types
|
|
4
|
-
[FieldType::BooleanType, FieldType::StringType, FieldType::NullType
|
|
8
|
+
[FieldType::BooleanType, FieldType::StringType, FieldType::NullType]
|
|
5
9
|
end
|
|
6
10
|
|
|
7
11
|
def self.ordinal_types
|
|
@@ -16,6 +20,7 @@ module FlatKit
|
|
|
16
20
|
return OrdinalStats if ordinal_types.include?(type)
|
|
17
21
|
return NominalStats if nominal_types.include?(type)
|
|
18
22
|
return NumericalStats if numerical_types.include?(type)
|
|
23
|
+
|
|
19
24
|
raise ArgumentError, "Unknown stat type for #{type}"
|
|
20
25
|
end
|
|
21
26
|
|
|
@@ -23,26 +28,26 @@ module FlatKit
|
|
|
23
28
|
raise NotImplementedError, "#{self.class.name} must implement #collected_stats"
|
|
24
29
|
end
|
|
25
30
|
|
|
26
|
-
#
|
|
31
|
+
#
|
|
27
32
|
# call-seq:
|
|
28
33
|
# stat.to_hash -> Hash
|
|
29
34
|
# stat.to_hash( %w[ count max mean ]) -> Hash
|
|
30
35
|
#
|
|
31
36
|
# return a hash of the stats. By default this returns a hash of all stats
|
|
32
37
|
# but passing in an array of items will limit the stats returned to only
|
|
33
|
-
# those in the Array.
|
|
38
|
+
# those in the Array.
|
|
34
39
|
#
|
|
35
40
|
# If passed in an empty array or nil to to_hash then STATS is assumed to be
|
|
36
41
|
# the list of stats to return in the hash.
|
|
37
42
|
#
|
|
38
|
-
def to_hash(
|
|
43
|
+
def to_hash(*args)
|
|
39
44
|
h = {}
|
|
40
|
-
args = [
|
|
41
|
-
args =
|
|
45
|
+
args = [args].flatten
|
|
46
|
+
args = collected_stats if args.empty?
|
|
42
47
|
args.each do |meth|
|
|
43
|
-
h[meth] =
|
|
48
|
+
h[meth] = send(meth)
|
|
44
49
|
end
|
|
45
|
-
|
|
50
|
+
h
|
|
46
51
|
end
|
|
47
52
|
|
|
48
53
|
#
|
|
@@ -54,12 +59,12 @@ module FlatKit
|
|
|
54
59
|
# of all the stats. If an array of items is passed in, those that match the
|
|
55
60
|
# known stats will be all that is included in the json output.
|
|
56
61
|
#
|
|
57
|
-
def to_json(
|
|
58
|
-
h = to_hash(
|
|
62
|
+
def to_json(*args)
|
|
63
|
+
h = to_hash(*args)
|
|
59
64
|
Oj.dump(h)
|
|
60
65
|
end
|
|
61
66
|
end
|
|
62
67
|
end
|
|
63
|
-
require
|
|
64
|
-
require
|
|
65
|
-
require
|
|
68
|
+
require "flat_kit/stat_type/nominal_stats"
|
|
69
|
+
require "flat_kit/stat_type/ordinal_stats"
|
|
70
|
+
require "flat_kit/stat_type/numerical_stats"
|
data/lib/flat_kit/stats.rb
CHANGED
|
@@ -1,22 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module FlatKit
|
|
4
|
+
# Internal: Collects stats from an Input and sends thos stats to an Output
|
|
5
|
+
#
|
|
2
6
|
class Stats
|
|
3
7
|
include ::FlatKit::EventEmitter
|
|
4
8
|
|
|
5
9
|
AllFields = Class.new.freeze
|
|
6
10
|
|
|
7
|
-
attr_reader :reader
|
|
8
|
-
attr_reader :writer
|
|
9
|
-
attr_reader :fields_to_stat
|
|
10
|
-
attr_reader :stats_to_collect
|
|
11
|
-
attr_reader :stats_by_field
|
|
11
|
+
attr_reader :reader, :writer, :fields_to_stat, :stats_to_collect, :stats_by_field
|
|
12
12
|
|
|
13
|
-
def initialize(input:, input_fallback: "auto",
|
|
14
|
-
output:, output_fallback: "auto",
|
|
13
|
+
def initialize(input:, output:, input_fallback: "auto", output_fallback: "auto",
|
|
15
14
|
fields_to_stat: AllFields, stats_to_collect: FieldStats::CORE_STATS)
|
|
16
|
-
|
|
17
15
|
@fields_to_stat = fields_to_stat
|
|
18
16
|
@stats_to_collect = stats_to_collect
|
|
19
|
-
@stats_by_field =
|
|
17
|
+
@stats_by_field = {}
|
|
20
18
|
@record_count = 0
|
|
21
19
|
|
|
22
20
|
@reader = ::FlatKit::Reader.create_reader_from_path(path: input, fallback: input_fallback)
|
|
@@ -32,7 +30,8 @@ module FlatKit
|
|
|
32
30
|
|
|
33
31
|
def collecting_stats_on_field?(name)
|
|
34
32
|
return true if @fields_to_stat == AllFields
|
|
35
|
-
|
|
33
|
+
|
|
34
|
+
@fields_to_stat.include?(name)
|
|
36
35
|
end
|
|
37
36
|
|
|
38
37
|
private
|
|
@@ -41,9 +40,7 @@ module FlatKit
|
|
|
41
40
|
::FlatKit.logger.debug "Calculating statistics on #{reader.source}"
|
|
42
41
|
reader.each do |record|
|
|
43
42
|
record.to_hash.each do |field_name, field_value|
|
|
44
|
-
if collecting_stats_on_field?(field_name)
|
|
45
|
-
update_stats_for_field(name: field_name, value: field_value)
|
|
46
|
-
end
|
|
43
|
+
update_stats_for_field(name: field_name, value: field_value) if collecting_stats_on_field?(field_name)
|
|
47
44
|
end
|
|
48
45
|
@record_count += 1
|
|
49
46
|
end
|
|
@@ -55,8 +52,8 @@ module FlatKit
|
|
|
55
52
|
end
|
|
56
53
|
|
|
57
54
|
def write_stat_records
|
|
58
|
-
@stats_by_field.
|
|
59
|
-
h = stats.to_hash.merge({"total_record_count" => @record_count })
|
|
55
|
+
@stats_by_field.each_value do |stats|
|
|
56
|
+
h = stats.to_hash.merge({ "total_record_count" => @record_count })
|
|
60
57
|
record = ::FlatKit::Jsonl::Record.new(data: nil, complete_structured_data: h)
|
|
61
58
|
|
|
62
59
|
@writer.write(record)
|
data/lib/flat_kit/writer.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module FlatKit
|
|
2
4
|
# Public: The base class for all format writers.
|
|
3
5
|
#
|
|
@@ -11,10 +13,7 @@ module FlatKit
|
|
|
11
13
|
# See the Xsv::Writer and Jsonl::Writer for examples.
|
|
12
14
|
#
|
|
13
15
|
class Writer
|
|
14
|
-
attr_reader :destination
|
|
15
|
-
attr_reader :output
|
|
16
|
-
attr_reader :count
|
|
17
|
-
attr_reader :last_position
|
|
16
|
+
attr_reader :destination, :output, :count, :last_position
|
|
18
17
|
|
|
19
18
|
def self.create_writer_from_path(path:, fallback:, reader_format:)
|
|
20
19
|
fallback = reader_format if fallback == "auto"
|
|
@@ -34,9 +33,9 @@ module FlatKit
|
|
|
34
33
|
end
|
|
35
34
|
|
|
36
35
|
def current_position
|
|
37
|
-
::FlatKit::Position.new(index: @count,
|
|
36
|
+
::FlatKit::Position.new(index: @count, # since this hasn't been written yet its the right index
|
|
38
37
|
offset: output.tell,
|
|
39
|
-
bytesize: 0)
|
|
38
|
+
bytesize: 0) # nothing has been written yet
|
|
40
39
|
end
|
|
41
40
|
|
|
42
41
|
# The write method MUST return a Position object detailing the location the
|
data/lib/flat_kit/xsv/format.rb
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module FlatKit
|
|
2
4
|
module Xsv
|
|
5
|
+
# Internal: xsv format class holding the metadata about the xsv format utilities
|
|
6
|
+
#
|
|
3
7
|
class Format < ::FlatKit::Format
|
|
4
8
|
def self.format_name
|
|
5
9
|
"xsv"
|
|
@@ -7,10 +11,10 @@ module FlatKit
|
|
|
7
11
|
|
|
8
12
|
def self.handles?(filename)
|
|
9
13
|
parts = filename.split(".")
|
|
10
|
-
%w[
|
|
14
|
+
%w[csv tsv txt].each do |ext|
|
|
11
15
|
return true if parts.include?(ext)
|
|
12
16
|
end
|
|
13
|
-
|
|
17
|
+
false
|
|
14
18
|
end
|
|
15
19
|
|
|
16
20
|
def self.reader
|
data/lib/flat_kit/xsv/reader.rb
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
2
4
|
|
|
3
5
|
module FlatKit
|
|
4
6
|
module Xsv
|
|
7
|
+
# Internal: Reader class that parses and yields records from xsv files
|
|
8
|
+
#
|
|
5
9
|
class Reader < ::FlatKit::Reader
|
|
6
|
-
attr_reader :input
|
|
7
|
-
attr_reader :count
|
|
8
|
-
attr_reader :fields
|
|
10
|
+
attr_reader :input, :count, :fields
|
|
9
11
|
|
|
10
12
|
def self.format_name
|
|
11
13
|
::FlatKit::Xsv::Format.format_name
|
|
@@ -15,7 +17,7 @@ module FlatKit
|
|
|
15
17
|
{
|
|
16
18
|
headers: :first_row,
|
|
17
19
|
converters: :numeric,
|
|
18
|
-
return_headers: false
|
|
20
|
+
return_headers: false,
|
|
19
21
|
}
|
|
20
22
|
end
|
|
21
23
|
|
|
@@ -36,7 +38,7 @@ module FlatKit
|
|
|
36
38
|
yield record
|
|
37
39
|
end
|
|
38
40
|
input.close
|
|
39
|
-
rescue => e
|
|
41
|
+
rescue StandardError => e
|
|
40
42
|
::FlatKit.logger.error "Error reading xsv records from #{input.name}: #{e}"
|
|
41
43
|
raise ::FlatKit::Error, e
|
|
42
44
|
end
|
data/lib/flat_kit/xsv/record.rb
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
require "flat_kit/record"
|
|
3
5
|
|
|
4
6
|
module FlatKit
|
|
5
7
|
module Xsv
|
|
8
|
+
# Internal: Class that exposes the data from an XSV format record to the flatkit API
|
|
9
|
+
#
|
|
6
10
|
class Record < ::FlatKit::Record
|
|
7
11
|
attr_reader :ordered_fields
|
|
8
12
|
|
|
@@ -10,8 +14,8 @@ module FlatKit
|
|
|
10
14
|
::FlatKit::Xsv::Format.format_name
|
|
11
15
|
end
|
|
12
16
|
|
|
13
|
-
def self.from_record(record
|
|
14
|
-
if record.instance_of?(FlatKit::Xsv::Record)
|
|
17
|
+
def self.from_record(record)
|
|
18
|
+
if record.instance_of?(FlatKit::Xsv::Record)
|
|
15
19
|
new(data: record.data, compare_fields: record.compare_fields)
|
|
16
20
|
else
|
|
17
21
|
new(data: nil, compare_fields: record.compare_fields,
|
|
@@ -28,9 +32,9 @@ module FlatKit
|
|
|
28
32
|
@complete_structured_data = complete_structured_data
|
|
29
33
|
@ordered_fields = ordered_fields
|
|
30
34
|
|
|
31
|
-
if data.nil? && (complete_structured_data.nil? || complete_structured_data.empty?)
|
|
35
|
+
if data.nil? && (complete_structured_data.nil? || complete_structured_data.empty?)
|
|
32
36
|
raise FlatKit::Error,
|
|
33
|
-
|
|
37
|
+
"#{self.class} requires initialization from data: or complete_structured_data:"
|
|
34
38
|
end
|
|
35
39
|
|
|
36
40
|
resolve_ordered_fields
|
|
@@ -38,7 +42,8 @@ module FlatKit
|
|
|
38
42
|
|
|
39
43
|
def [](key)
|
|
40
44
|
return nil unless @compare_fields.include?(key)
|
|
41
|
-
|
|
45
|
+
|
|
46
|
+
if data.nil? && !@complete_structured_data.nil?
|
|
42
47
|
@complete_structured_data[key]
|
|
43
48
|
else
|
|
44
49
|
data[key]
|
|
@@ -53,10 +58,8 @@ module FlatKit
|
|
|
53
58
|
def to_a
|
|
54
59
|
return data.fields unless data.nil?
|
|
55
60
|
|
|
56
|
-
|
|
57
|
-
@
|
|
58
|
-
a << @complete_structured_data[field]
|
|
59
|
-
end
|
|
61
|
+
@ordered_fields.map do |field|
|
|
62
|
+
@complete_structured_data[field]
|
|
60
63
|
end
|
|
61
64
|
end
|
|
62
65
|
|
|
@@ -71,19 +74,20 @@ module FlatKit
|
|
|
71
74
|
# values in that order.
|
|
72
75
|
def to_s
|
|
73
76
|
return data.to_csv unless data.nil?
|
|
77
|
+
|
|
74
78
|
CSV.generate_line(to_a)
|
|
75
79
|
end
|
|
76
80
|
|
|
77
81
|
private
|
|
78
82
|
|
|
79
83
|
def resolve_ordered_fields
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
84
|
+
return unless (@ordered_fields == :auto) || (@ordered_fields.nil? || @ordered_fields.empty?)
|
|
85
|
+
|
|
86
|
+
@ordered_fields = if @data.nil? || @data.empty?
|
|
87
|
+
complete_structured_data.keys
|
|
88
|
+
else
|
|
89
|
+
@data.headers
|
|
90
|
+
end
|
|
87
91
|
end
|
|
88
92
|
end
|
|
89
93
|
end
|
data/lib/flat_kit/xsv/writer.rb
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module FlatKit
|
|
2
4
|
module Xsv
|
|
5
|
+
# Internal: Write that takes flatkit records and writes them to XSV Output
|
|
6
|
+
#
|
|
3
7
|
class Writer < ::FlatKit::Writer
|
|
4
|
-
attr_reader :fields
|
|
5
|
-
attr_reader :header_bytes
|
|
8
|
+
attr_reader :fields, :header_bytes
|
|
6
9
|
|
|
7
10
|
def self.format_name
|
|
8
11
|
::FlatKit::Xsv::Format.format_name
|
|
@@ -11,7 +14,7 @@ module FlatKit
|
|
|
11
14
|
def self.default_csv_options
|
|
12
15
|
{
|
|
13
16
|
headers: nil,
|
|
14
|
-
write_headers: true
|
|
17
|
+
write_headers: true,
|
|
15
18
|
}
|
|
16
19
|
end
|
|
17
20
|
|
|
@@ -21,10 +24,10 @@ module FlatKit
|
|
|
21
24
|
@we_write_the_header = nil
|
|
22
25
|
@csv_options = Writer.default_csv_options.dup
|
|
23
26
|
|
|
24
|
-
if @fields == :auto
|
|
27
|
+
if @fields == :auto
|
|
25
28
|
@we_write_the_header = true
|
|
26
29
|
else
|
|
27
|
-
@csv_options
|
|
30
|
+
@csv_options[:headers] = fields
|
|
28
31
|
@we_write_the_header = false
|
|
29
32
|
end
|
|
30
33
|
|
|
@@ -48,17 +51,17 @@ module FlatKit
|
|
|
48
51
|
else
|
|
49
52
|
raise FlatKit::Error, "Unable to write records of type #{record.class}"
|
|
50
53
|
end
|
|
51
|
-
rescue FlatKit::Error =>
|
|
52
|
-
raise
|
|
53
|
-
rescue => e
|
|
54
|
-
::FlatKit.logger.error "Error
|
|
54
|
+
rescue FlatKit::Error => e
|
|
55
|
+
raise e
|
|
56
|
+
rescue StandardError => e
|
|
57
|
+
::FlatKit.logger.error "Error writing xsv records to #{output.name}: #{e}"
|
|
55
58
|
raise ::FlatKit::Error, e
|
|
56
59
|
end
|
|
57
60
|
|
|
58
61
|
private
|
|
59
62
|
|
|
60
63
|
def write_record(record)
|
|
61
|
-
if @we_write_the_header && @count
|
|
64
|
+
if @we_write_the_header && @count.zero?
|
|
62
65
|
@csv << record.ordered_fields
|
|
63
66
|
@header_bytes = output.tell
|
|
64
67
|
end
|
data/lib/flat_kit/xsv.rb
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module FlatKit
|
|
4
|
+
# Internal: Namespace for the xsv file fromat handling
|
|
2
5
|
module Xsv
|
|
3
6
|
end
|
|
4
7
|
end
|
|
5
|
-
require
|
|
6
|
-
require
|
|
7
|
-
require
|
|
8
|
-
require
|
|
8
|
+
require "flat_kit/xsv/record"
|
|
9
|
+
require "flat_kit/xsv/reader"
|
|
10
|
+
require "flat_kit/xsv/writer"
|
|
11
|
+
require "flat_kit/xsv/format"
|
data/lib/flat_kit.rb
CHANGED
|
@@ -1,31 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Public: Top level namespace for this gem
|
|
4
|
+
#
|
|
1
5
|
module FlatKit
|
|
2
|
-
VERSION = "
|
|
6
|
+
VERSION = "1.1.0"
|
|
3
7
|
end
|
|
4
|
-
require
|
|
5
|
-
require
|
|
6
|
-
require
|
|
7
|
-
require
|
|
8
|
+
require "flat_kit/error"
|
|
9
|
+
require "flat_kit/descendant_tracker"
|
|
10
|
+
require "flat_kit/log_formatter"
|
|
11
|
+
require "flat_kit/logger"
|
|
12
|
+
require "flat_kit/event_emitter"
|
|
8
13
|
|
|
9
|
-
require
|
|
10
|
-
require
|
|
11
|
-
require
|
|
12
|
-
require
|
|
13
|
-
require
|
|
14
|
-
require
|
|
15
|
-
require
|
|
16
|
-
require
|
|
17
|
-
require
|
|
18
|
-
require
|
|
19
|
-
require
|
|
20
|
-
require
|
|
21
|
-
require
|
|
22
|
-
require
|
|
14
|
+
require "flat_kit/field_type"
|
|
15
|
+
require "flat_kit/format"
|
|
16
|
+
require "flat_kit/position"
|
|
17
|
+
require "flat_kit/record"
|
|
18
|
+
require "flat_kit/reader"
|
|
19
|
+
require "flat_kit/writer"
|
|
20
|
+
require "flat_kit/input"
|
|
21
|
+
require "flat_kit/output"
|
|
22
|
+
require "flat_kit/cli"
|
|
23
|
+
require "flat_kit/xsv"
|
|
24
|
+
require "flat_kit/jsonl"
|
|
25
|
+
require "flat_kit/merge"
|
|
26
|
+
require "flat_kit/sort"
|
|
27
|
+
require "flat_kit/stats"
|
|
23
28
|
|
|
24
|
-
require
|
|
25
|
-
require
|
|
29
|
+
require "flat_kit/stat_type"
|
|
30
|
+
require "flat_kit/field_stats"
|
|
26
31
|
|
|
27
|
-
require
|
|
28
|
-
require
|
|
29
|
-
require
|
|
30
|
-
require
|
|
31
|
-
require
|
|
32
|
+
require "flat_kit/merge_tree"
|
|
33
|
+
require "flat_kit/internal_node"
|
|
34
|
+
require "flat_kit/sentinel_internal_node"
|
|
35
|
+
require "flat_kit/sentinel_leaf_node"
|
|
36
|
+
require "flat_kit/leaf_node"
|