flat_kit 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
@@ -1,8 +1,12 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "oj"
|
4
|
+
require "flat_kit/record"
|
3
5
|
|
4
6
|
module FlatKit
|
5
7
|
module Jsonl
|
8
|
+
# Internal: Class that exposes data from a JSONL format record to the flatkit api
|
9
|
+
#
|
6
10
|
class Record < ::FlatKit::Record
|
7
11
|
attr_reader :compare_data
|
8
12
|
|
@@ -11,7 +15,7 @@ module FlatKit
|
|
11
15
|
end
|
12
16
|
|
13
17
|
def self.from_record(record)
|
14
|
-
if record.instance_of?(FlatKit::Jsonl::Record)
|
18
|
+
if record.instance_of?(FlatKit::Jsonl::Record)
|
15
19
|
|
16
20
|
structured = record.complete_structured_data? ? record.complete_structured_data : nil
|
17
21
|
|
@@ -25,22 +29,20 @@ module FlatKit
|
|
25
29
|
end
|
26
30
|
|
27
31
|
def initialize(data:, compare_fields: :none,
|
28
|
-
compare_data:
|
32
|
+
compare_data: {},
|
29
33
|
complete_structured_data: nil)
|
30
34
|
super(data: data, compare_fields: compare_fields)
|
31
35
|
|
32
36
|
@complete_structured_data = complete_structured_data
|
33
37
|
|
34
|
-
if complete_structured_data? && (compare_data.nil? || compare_data.empty?)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
38
|
+
@compare_data = if complete_structured_data? && (compare_data.nil? || compare_data.empty?)
|
39
|
+
complete_structured_data
|
40
|
+
else
|
41
|
+
compare_data
|
42
|
+
end
|
39
43
|
|
40
44
|
# only load compare data if it dosn't exist
|
41
|
-
if data && compare_data.empty?
|
42
|
-
quick_parse
|
43
|
-
end
|
45
|
+
quick_parse if data && compare_data.empty?
|
44
46
|
end
|
45
47
|
|
46
48
|
def [](key)
|
@@ -48,7 +50,7 @@ module FlatKit
|
|
48
50
|
end
|
49
51
|
|
50
52
|
def complete_structured_data
|
51
|
-
@complete_structured_data ||= Oj.load(data)
|
53
|
+
@complete_structured_data ||= Oj.load(data, mode: :strict)
|
52
54
|
end
|
53
55
|
alias to_hash complete_structured_data
|
54
56
|
|
@@ -59,9 +61,7 @@ module FlatKit
|
|
59
61
|
# overriding parent accessor since we may be initialized without raw bytes
|
60
62
|
# to parse
|
61
63
|
def data
|
62
|
-
if @data.nil? && complete_structured_data?
|
63
|
-
@data = Oj.dump(complete_structured_data)
|
64
|
-
end
|
64
|
+
@data = Oj.dump(complete_structured_data, mode: :json) if @data.nil? && complete_structured_data?
|
65
65
|
@data
|
66
66
|
end
|
67
67
|
alias to_s data
|
@@ -79,6 +79,3 @@ module FlatKit
|
|
79
79
|
end
|
80
80
|
end
|
81
81
|
end
|
82
|
-
|
83
|
-
|
84
|
-
|
@@ -1,19 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
module Jsonl
|
5
|
+
# Internal: Class that writes flatkit records to JSONL files
|
6
|
+
#
|
3
7
|
class Writer < ::FlatKit::Writer
|
4
|
-
attr_reader :output
|
5
|
-
attr_reader :count
|
6
|
-
|
7
8
|
def self.format_name
|
8
9
|
::FlatKit::Jsonl::Format.format_name
|
9
10
|
end
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
@output = ::FlatKit::Output.from(@destination)
|
14
|
-
@count = 0
|
15
|
-
end
|
16
|
-
|
12
|
+
# write the record and return the Position the record was written
|
13
|
+
#
|
17
14
|
def write(record)
|
18
15
|
case record
|
19
16
|
when FlatKit::Jsonl::Record
|
@@ -24,21 +21,31 @@ module FlatKit
|
|
24
21
|
else
|
25
22
|
raise FlatKit::Error, "Unable to write records of type #{record.class}"
|
26
23
|
end
|
27
|
-
rescue FlatKit::Error =>
|
28
|
-
raise
|
29
|
-
rescue => e
|
30
|
-
::FlatKit.logger.error "Error
|
24
|
+
rescue FlatKit::Error => e
|
25
|
+
raise e
|
26
|
+
rescue StandardError => e
|
27
|
+
::FlatKit.logger.error "Error writing jsonl records to #{output.name}: #{e}"
|
31
28
|
raise ::FlatKit::Error, e
|
32
29
|
end
|
33
30
|
|
34
|
-
def close
|
35
|
-
@output.close
|
36
|
-
end
|
37
|
-
|
38
31
|
def write_record(record)
|
39
|
-
#
|
32
|
+
# the index of the record being written is the same as the count of records written so far
|
33
|
+
record_index = @count
|
34
|
+
|
35
|
+
# get the current output stream position to calculate bytes written
|
36
|
+
start_offset = output.io.tell
|
37
|
+
|
38
|
+
# enforces ending in newline if it doesn't already have one
|
40
39
|
output.io.puts record.to_s
|
40
|
+
|
41
|
+
ending_offset = output.io.tell
|
42
|
+
bytes_written = (ending_offset - start_offset)
|
43
|
+
|
41
44
|
@count += 1
|
45
|
+
|
46
|
+
@last_position = ::FlatKit::Position.new(index: record_index,
|
47
|
+
offset: start_offset,
|
48
|
+
bytesize: bytes_written)
|
42
49
|
end
|
43
50
|
end
|
44
51
|
end
|
data/lib/flat_kit/jsonl.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: Top level namespace for the newline-oriented JSON format
|
5
|
+
#
|
2
6
|
module Jsonl
|
3
7
|
end
|
4
8
|
end
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
+
require "flat_kit/jsonl/record"
|
10
|
+
require "flat_kit/jsonl/reader"
|
11
|
+
require "flat_kit/jsonl/writer"
|
12
|
+
require "flat_kit/jsonl/format"
|
data/lib/flat_kit/leaf_node.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
# Private: The LeafNode is a wrapper around a Reader object to enable
|
3
5
|
# a consistent api for use in the MergeTree
|
@@ -9,11 +11,9 @@ module FlatKit
|
|
9
11
|
# If all the data is used up from the reader, it also notifies the next level
|
10
12
|
# of that so the next level can remove it from the tree.
|
11
13
|
class LeafNode
|
12
|
-
|
13
14
|
include Comparable
|
14
15
|
|
15
|
-
attr_reader :reader
|
16
|
-
attr_reader :value
|
16
|
+
attr_reader :reader, :value
|
17
17
|
|
18
18
|
attr_accessor :next_level
|
19
19
|
|
@@ -43,7 +43,7 @@ module FlatKit
|
|
43
43
|
|
44
44
|
def update_and_replay
|
45
45
|
self.next
|
46
|
-
if finished?
|
46
|
+
if finished?
|
47
47
|
::FlatKit.logger.debug "#{reader.source} has finished reading #{reader.count} records"
|
48
48
|
next_level.player_finished(self)
|
49
49
|
end
|
@@ -65,7 +65,8 @@ module FlatKit
|
|
65
65
|
|
66
66
|
def <=>(other)
|
67
67
|
return -1 if other.sentinel?
|
68
|
-
|
68
|
+
|
69
|
+
value <=> (other.value)
|
69
70
|
end
|
70
71
|
end
|
71
72
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "logger"
|
4
|
+
|
5
|
+
module FlatKit
|
6
|
+
# Internal: Log formatting class for FlatKit
|
7
|
+
#
|
8
|
+
class LogFormatter < ::Logger::Formatter
|
9
|
+
FORMAT = "%s %5d %05s : %s\n"
|
10
|
+
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
|
11
|
+
def initialize
|
12
|
+
super
|
13
|
+
self.datetime_format = DATETIME_FORMAT
|
14
|
+
end
|
15
|
+
|
16
|
+
def call(severity, time, _progname, msg)
|
17
|
+
format(FORMAT, format_datetime(time.utc), Process.pid, severity, msg2str(msg))
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/flat_kit/logger.rb
CHANGED
@@ -1,19 +1,12 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
class LogFormatter < ::Logger::Formatter
|
5
|
-
FORMAT = "%s %5d %05s : %s\n".freeze
|
6
|
-
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ".freeze
|
7
|
-
def initialize
|
8
|
-
super
|
9
|
-
self.datetime_format = DATETIME_FORMAT
|
10
|
-
end
|
11
|
-
|
12
|
-
def call(severity, time, progname, msg)
|
13
|
-
FORMAT % [format_datetime(time.utc), Process.pid, severity, msg2str(msg)]
|
14
|
-
end
|
15
|
-
end
|
3
|
+
require "logger"
|
16
4
|
|
5
|
+
# Public: Top level namespace for the gem
|
6
|
+
#
|
7
|
+
module FlatKit
|
8
|
+
# Internal: Logger class
|
9
|
+
#
|
17
10
|
class Logger
|
18
11
|
def self.for_io(io)
|
19
12
|
::Logger.new(io, formatter: LogFormatter.new)
|
@@ -26,11 +19,11 @@ module FlatKit
|
|
26
19
|
end
|
27
20
|
|
28
21
|
def self.log_to(destination = $stderr)
|
29
|
-
if destination.
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
22
|
+
@logger = if destination.is_a?(::IO)
|
23
|
+
::FlatKit::Logger.for_io(destination)
|
24
|
+
else
|
25
|
+
::FlatKit::Logger.for_path(destination)
|
26
|
+
end
|
34
27
|
end
|
35
28
|
|
36
29
|
def self.logger
|
data/lib/flat_kit/merge.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: Class implementing merging from N inputs and output to 1 output.
|
5
|
+
#
|
2
6
|
class Merge
|
3
|
-
|
4
7
|
include ::FlatKit::EventEmitter
|
5
8
|
|
6
|
-
attr_reader :readers
|
7
|
-
attr_reader :writer
|
8
|
-
attr_reader :compare_fields
|
9
|
+
attr_reader :readers, :writer, :compare_fields
|
9
10
|
|
10
|
-
def initialize(inputs:, input_fallback: "auto",
|
11
|
-
output:, output_fallback: "auto",
|
12
|
-
compare_fields:)
|
11
|
+
def initialize(inputs:, output:, compare_fields:, input_fallback: "auto", output_fallback: "auto")
|
13
12
|
@compare_fields = compare_fields
|
14
13
|
@readers = ::FlatKit::Reader.create_readers_from_paths(paths: inputs, compare_fields: @compare_fields,
|
15
14
|
fallback: input_fallback)
|
@@ -19,19 +18,12 @@ module FlatKit
|
|
19
18
|
|
20
19
|
def call
|
21
20
|
::FlatKit.logger.debug "Merging the following files into #{writer.destination}"
|
22
|
-
::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(
|
21
|
+
::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(', ')}"
|
23
22
|
readers.each do |r|
|
24
23
|
::FlatKit.logger.debug " #{r.source}"
|
25
24
|
end
|
26
25
|
|
27
|
-
|
28
|
-
|
29
|
-
notify_listeners(name: :start, data: :start)
|
30
|
-
merge_tree.each do |record|
|
31
|
-
writer.write(record)
|
32
|
-
notify_listeners(name: :record, data: record)
|
33
|
-
end
|
34
|
-
notify_listeners(name: :stop, data: :stop)
|
26
|
+
run_merge(readers)
|
35
27
|
|
36
28
|
readers.each do |r|
|
37
29
|
::FlatKit.logger.debug " #{r.source} produced #{r.count} records"
|
@@ -40,5 +32,18 @@ module FlatKit
|
|
40
32
|
writer.close
|
41
33
|
::FlatKit.logger.debug "Wrote #{writer.count} records to #{writer.destination}"
|
42
34
|
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def run_merge(readers)
|
39
|
+
tree = ::FlatKit::MergeTree.new(readers)
|
40
|
+
notify_listeners(name: :start, data: :start)
|
41
|
+
tree.each do |record|
|
42
|
+
position = writer.write(record)
|
43
|
+
meta = { position: position }
|
44
|
+
notify_listeners(name: :record, data: record, meta: meta)
|
45
|
+
end
|
46
|
+
notify_listeners(name: :stop, data: :stop)
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
data/lib/flat_kit/merge_tree.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
# Public: Merge a list of sorted records from Readers into a single output Writer
|
3
5
|
#
|
@@ -29,9 +31,7 @@ module FlatKit
|
|
29
31
|
class MergeTree
|
30
32
|
include Enumerable
|
31
33
|
|
32
|
-
attr_reader :leaves
|
33
|
-
attr_reader :levels
|
34
|
-
attr_reader :readers
|
34
|
+
attr_reader :leaves, :levels, :readers
|
35
35
|
|
36
36
|
def initialize(readers)
|
37
37
|
@readers = readers
|
@@ -44,9 +44,7 @@ module FlatKit
|
|
44
44
|
|
45
45
|
# Need to pad the leaves to an even number so that the slicing by 2 for
|
46
46
|
# the tournament will work
|
47
|
-
if @leaves.size.odd?
|
48
|
-
@leaves << SentinelLeafNode.new
|
49
|
-
end
|
47
|
+
@leaves << SentinelLeafNode.new if @leaves.size.odd?
|
50
48
|
|
51
49
|
init_tree
|
52
50
|
end
|
@@ -94,6 +92,7 @@ module FlatKit
|
|
94
92
|
def each
|
95
93
|
loop do
|
96
94
|
break if root.leaf.finished?
|
95
|
+
|
97
96
|
yield root.value
|
98
97
|
# consume the yielded value and have the tournament tree replay those
|
99
98
|
# brackets affected
|
data/lib/flat_kit/output/file.rb
CHANGED
@@ -1,22 +1,31 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "zlib"
|
4
|
+
require "pathname"
|
2
5
|
|
3
6
|
module FlatKit
|
4
7
|
class Output
|
8
|
+
# Internal: File output implementation
|
9
|
+
#
|
5
10
|
class File < Output
|
6
11
|
attr_reader :path
|
7
12
|
|
13
|
+
# internal api method for testing purposes
|
14
|
+
attr_reader :io
|
15
|
+
|
8
16
|
def self.handles?(obj)
|
9
17
|
return true if obj.instance_of?(Pathname)
|
10
18
|
return false unless obj.instance_of?(String)
|
11
19
|
|
12
20
|
# incase these get loaded in different orders
|
13
|
-
return false if ::FlatKit::Output::IO.
|
14
|
-
return false if ::FlatKit::Output::IO.
|
21
|
+
return false if ::FlatKit::Output::IO.stdout?(obj)
|
22
|
+
return false if ::FlatKit::Output::IO.stderr?(obj)
|
15
23
|
|
16
|
-
|
24
|
+
true
|
17
25
|
end
|
18
26
|
|
19
27
|
def initialize(obj)
|
28
|
+
super()
|
20
29
|
@path = Pathname.new(obj)
|
21
30
|
path.dirname.mkpath
|
22
31
|
@io = open_output(path)
|
@@ -30,11 +39,6 @@ module FlatKit
|
|
30
39
|
@io.close
|
31
40
|
end
|
32
41
|
|
33
|
-
# internal api method for testing purposes
|
34
|
-
def io
|
35
|
-
@io
|
36
|
-
end
|
37
|
-
|
38
42
|
private
|
39
43
|
|
40
44
|
# open the opropriate otuput type depending on the destination file name
|
data/lib/flat_kit/output/io.rb
CHANGED
@@ -1,73 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class Output
|
5
|
+
# Internal: Non-file Output impelementation - this is genrally to stdout or stderr
|
6
|
+
#
|
3
7
|
class IO < Output
|
4
|
-
attr_reader :count
|
8
|
+
attr_reader :count, :name
|
9
|
+
|
10
|
+
# internal api method for testing
|
11
|
+
attr_reader :io
|
5
12
|
|
6
|
-
STDOUTS = %w[
|
7
|
-
STDERRS = %w[
|
13
|
+
STDOUTS = %w[stdout STDOUT - <stdout>].freeze
|
14
|
+
STDERRS = %w[stderr STDERR <stderr>].freeze
|
8
15
|
|
9
16
|
def self.handles?(obj)
|
10
|
-
return true if
|
11
|
-
return true if
|
12
|
-
return true if [
|
13
|
-
|
17
|
+
return true if stderr?(obj)
|
18
|
+
return true if stdout?(obj)
|
19
|
+
return true if [::File, ::StringIO, ::IO].any? { |klass| obj.is_a?(klass) }
|
20
|
+
|
21
|
+
false
|
14
22
|
end
|
15
23
|
|
16
|
-
def self.
|
24
|
+
def self.stderr?(obj)
|
17
25
|
case obj
|
18
26
|
when String
|
19
27
|
return true if STDERRS.include?(obj)
|
20
28
|
when ::IO
|
21
|
-
return true if obj ==
|
29
|
+
return true if obj == $stderr
|
22
30
|
end
|
23
|
-
|
31
|
+
false
|
24
32
|
end
|
25
33
|
|
26
|
-
def self.
|
34
|
+
def self.stdout?(obj)
|
27
35
|
case obj
|
28
36
|
when String
|
29
37
|
return true if STDOUTS.include?(obj)
|
30
38
|
when ::IO
|
31
|
-
return true if obj ==
|
39
|
+
return true if obj == $stdout
|
32
40
|
end
|
33
|
-
|
41
|
+
false
|
34
42
|
end
|
35
43
|
|
36
44
|
def initialize(obj)
|
45
|
+
super()
|
37
46
|
@count = 0
|
38
|
-
|
47
|
+
@name = nil
|
48
|
+
@io = nil
|
49
|
+
init_name_and_io(obj)
|
50
|
+
end
|
51
|
+
|
52
|
+
# this goes to an io stream and we are not in charge of opening it
|
53
|
+
def close
|
54
|
+
@io.close
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def init_name_and_io(obj)
|
60
|
+
if self.class.stdout?(obj)
|
39
61
|
@name = "<STDOUT>"
|
40
62
|
@io = $stdout
|
41
|
-
elsif self.class.
|
63
|
+
elsif self.class.stderr?(obj)
|
42
64
|
@name = "<STDERR>"
|
43
65
|
@io = $stderr
|
44
|
-
elsif obj.
|
45
|
-
@name = obj.path
|
66
|
+
elsif obj.is_a?(::IO)
|
67
|
+
@name = (obj.respond_to?(:path) && obj.path) || obj.inspect
|
46
68
|
@io = obj
|
47
|
-
elsif obj.
|
48
|
-
@name = obj.inspect
|
49
|
-
@io = obj
|
50
|
-
elsif obj.kind_of?(::IO) then
|
69
|
+
elsif obj.is_a?(::StringIO)
|
51
70
|
@name = obj.inspect
|
52
71
|
@io = obj
|
53
72
|
else
|
54
73
|
raise ::FlatKit::Error, "Unable to create #{self.class} from #{obj.class} : #{obj.inspect}"
|
55
74
|
end
|
56
75
|
end
|
57
|
-
|
58
|
-
def name
|
59
|
-
@name
|
60
|
-
end
|
61
|
-
|
62
|
-
# this goes to an io stream and we are not in charge of opening it
|
63
|
-
def close
|
64
|
-
@io.close
|
65
|
-
end
|
66
|
-
|
67
|
-
# internal api method for testing
|
68
|
-
def io
|
69
|
-
@io
|
70
|
-
end
|
71
76
|
end
|
72
77
|
end
|
73
78
|
end
|
data/lib/flat_kit/output.rb
CHANGED
@@ -1,14 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: Base clases for all output handlers
|
5
|
+
#
|
2
6
|
class Output
|
3
7
|
extend DescendantTracker
|
4
8
|
|
5
9
|
def self.from(out)
|
6
|
-
return out if out.
|
10
|
+
return out if out.is_a?(::FlatKit::Output)
|
7
11
|
|
8
12
|
out_klass = find_child(:handles?, out)
|
9
|
-
if out_klass
|
10
|
-
return out_klass.new(out)
|
11
|
-
end
|
13
|
+
return out_klass.new(out) if out_klass
|
12
14
|
|
13
15
|
raise FlatKit::Error, "Unable to create output from #{out.class} : #{out.inspect}"
|
14
16
|
end
|
@@ -17,16 +19,19 @@ module FlatKit
|
|
17
19
|
raise NotImplementedError, "#{self.class} must implement #name"
|
18
20
|
end
|
19
21
|
|
20
|
-
#
|
21
22
|
def io
|
22
23
|
raise NotImplementedError, "#{self.class} must implement #io"
|
23
24
|
end
|
24
25
|
|
26
|
+
def tell
|
27
|
+
io.tell
|
28
|
+
end
|
29
|
+
|
25
30
|
def close
|
26
31
|
raise NotImplementedError, "#{self.class} must implement #close"
|
27
32
|
end
|
28
33
|
end
|
29
34
|
end
|
30
35
|
|
31
|
-
require
|
32
|
-
require
|
36
|
+
require "flat_kit/output/io"
|
37
|
+
require "flat_kit/output/file"
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module FlatKit
|
4
|
+
# The information about the position of a record in an IO stream
|
5
|
+
#
|
6
|
+
# Generally this is going to be returned by a write_record method to return
|
7
|
+
# information about the record that was just written
|
8
|
+
#
|
9
|
+
class Position
|
10
|
+
attr_reader :index, :offset, :bytesize # zero based # byte offset in the IO stream # byte length of the record
|
11
|
+
|
12
|
+
def initialize(index: nil, offset: nil, bytesize: nil)
|
13
|
+
@index = index
|
14
|
+
@offset = offset
|
15
|
+
@bytesize = bytesize
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/flat_kit/reader.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
# Public: the base class for all format readers.
|
3
5
|
#
|
@@ -14,24 +16,21 @@ module FlatKit
|
|
14
16
|
# API:
|
15
17
|
#
|
16
18
|
# initialize(source:, compare_fields:)
|
17
|
-
# each -> Yields / returns
|
19
|
+
# each -> Yields / returns
|
18
20
|
#
|
19
21
|
class Reader
|
20
22
|
include Enumerable
|
21
23
|
|
22
|
-
attr_reader :source
|
23
|
-
attr_reader :compare_fields
|
24
|
+
attr_reader :source, :compare_fields
|
24
25
|
|
25
26
|
def self.create_reader_from_path(path: "-", fallback: "auto", compare_fields: :none)
|
26
27
|
format = ::FlatKit::Format.for_with_fallback!(path: path, fallback: fallback)
|
27
|
-
|
28
|
+
format.reader.new(source: path, compare_fields: compare_fields)
|
28
29
|
end
|
29
30
|
|
30
31
|
def self.create_readers_from_paths(paths:, fallback: "auto", compare_fields: :none)
|
31
32
|
# default to stdin if there are no paths
|
32
|
-
if paths.empty?
|
33
|
-
paths << "-"
|
34
|
-
end
|
33
|
+
paths << "-" if paths.empty?
|
35
34
|
|
36
35
|
paths.map do |path|
|
37
36
|
create_reader_from_path(path: path, fallback: fallback, compare_fields: compare_fields)
|
@@ -55,7 +54,8 @@ module FlatKit
|
|
55
54
|
|
56
55
|
def resolve_compare_fields(value)
|
57
56
|
return [] if value == :none
|
58
|
-
|
57
|
+
|
58
|
+
value
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
data/lib/flat_kit/record.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
# Public: The base class that all record classes should inherit from.
|
3
5
|
#
|
@@ -35,11 +37,9 @@ module FlatKit
|
|
35
37
|
# # the initialize method must call super(data:, compare_fields:) to
|
36
38
|
# initializa the root data structures
|
37
39
|
class Record
|
38
|
-
|
39
40
|
include Comparable
|
40
41
|
|
41
|
-
attr_reader :data
|
42
|
-
attr_reader :compare_fields
|
42
|
+
attr_reader :data, :compare_fields
|
43
43
|
|
44
44
|
def initialize(data:, compare_fields:)
|
45
45
|
@data = data
|
@@ -57,15 +57,15 @@ module FlatKit
|
|
57
57
|
my_val = self[field]
|
58
58
|
other_val = other[field]
|
59
59
|
|
60
|
-
if my_val.nil? && other_val.nil?
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
60
|
+
compare_result = if my_val.nil? && other_val.nil?
|
61
|
+
0
|
62
|
+
elsif my_val.nil?
|
63
|
+
-1
|
64
|
+
elsif other_val.nil?
|
65
|
+
1
|
66
|
+
else
|
67
|
+
my_val <=> (other_val)
|
68
|
+
end
|
69
69
|
|
70
70
|
return compare_result unless compare_result.zero?
|
71
71
|
end
|