flat_kit 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
@@ -1,8 +1,12 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "oj"
|
4
|
+
require "flat_kit/record"
|
3
5
|
|
4
6
|
module FlatKit
|
5
7
|
module Jsonl
|
8
|
+
# Internal: Class that exposes data from a JSONL format record to the flatkit api
|
9
|
+
#
|
6
10
|
class Record < ::FlatKit::Record
|
7
11
|
attr_reader :compare_data
|
8
12
|
|
@@ -11,7 +15,7 @@ module FlatKit
|
|
11
15
|
end
|
12
16
|
|
13
17
|
def self.from_record(record)
|
14
|
-
if record.instance_of?(FlatKit::Jsonl::Record)
|
18
|
+
if record.instance_of?(FlatKit::Jsonl::Record)
|
15
19
|
|
16
20
|
structured = record.complete_structured_data? ? record.complete_structured_data : nil
|
17
21
|
|
@@ -25,22 +29,20 @@ module FlatKit
|
|
25
29
|
end
|
26
30
|
|
27
31
|
def initialize(data:, compare_fields: :none,
|
28
|
-
compare_data:
|
32
|
+
compare_data: {},
|
29
33
|
complete_structured_data: nil)
|
30
34
|
super(data: data, compare_fields: compare_fields)
|
31
35
|
|
32
36
|
@complete_structured_data = complete_structured_data
|
33
37
|
|
34
|
-
if complete_structured_data? && (compare_data.nil? || compare_data.empty?)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
38
|
+
@compare_data = if complete_structured_data? && (compare_data.nil? || compare_data.empty?)
|
39
|
+
complete_structured_data
|
40
|
+
else
|
41
|
+
compare_data
|
42
|
+
end
|
39
43
|
|
40
44
|
# only load compare data if it dosn't exist
|
41
|
-
if data && compare_data.empty?
|
42
|
-
quick_parse
|
43
|
-
end
|
45
|
+
quick_parse if data && compare_data.empty?
|
44
46
|
end
|
45
47
|
|
46
48
|
def [](key)
|
@@ -48,7 +50,7 @@ module FlatKit
|
|
48
50
|
end
|
49
51
|
|
50
52
|
def complete_structured_data
|
51
|
-
@complete_structured_data ||= Oj.load(data)
|
53
|
+
@complete_structured_data ||= Oj.load(data, mode: :strict)
|
52
54
|
end
|
53
55
|
alias to_hash complete_structured_data
|
54
56
|
|
@@ -59,9 +61,7 @@ module FlatKit
|
|
59
61
|
# overriding parent accessor since we may be initialized without raw bytes
|
60
62
|
# to parse
|
61
63
|
def data
|
62
|
-
if @data.nil? && complete_structured_data?
|
63
|
-
@data = Oj.dump(complete_structured_data)
|
64
|
-
end
|
64
|
+
@data = Oj.dump(complete_structured_data, mode: :json) if @data.nil? && complete_structured_data?
|
65
65
|
@data
|
66
66
|
end
|
67
67
|
alias to_s data
|
@@ -79,6 +79,3 @@ module FlatKit
|
|
79
79
|
end
|
80
80
|
end
|
81
81
|
end
|
82
|
-
|
83
|
-
|
84
|
-
|
@@ -1,19 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
module Jsonl
|
5
|
+
# Internal: Class that writes flatkit records to JSONL files
|
6
|
+
#
|
3
7
|
class Writer < ::FlatKit::Writer
|
4
|
-
attr_reader :output
|
5
|
-
attr_reader :count
|
6
|
-
|
7
8
|
def self.format_name
|
8
9
|
::FlatKit::Jsonl::Format.format_name
|
9
10
|
end
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
@output = ::FlatKit::Output.from(@destination)
|
14
|
-
@count = 0
|
15
|
-
end
|
16
|
-
|
12
|
+
# write the record and return the Position the record was written
|
13
|
+
#
|
17
14
|
def write(record)
|
18
15
|
case record
|
19
16
|
when FlatKit::Jsonl::Record
|
@@ -24,21 +21,31 @@ module FlatKit
|
|
24
21
|
else
|
25
22
|
raise FlatKit::Error, "Unable to write records of type #{record.class}"
|
26
23
|
end
|
27
|
-
rescue FlatKit::Error =>
|
28
|
-
raise
|
29
|
-
rescue => e
|
30
|
-
::FlatKit.logger.error "Error
|
24
|
+
rescue FlatKit::Error => e
|
25
|
+
raise e
|
26
|
+
rescue StandardError => e
|
27
|
+
::FlatKit.logger.error "Error writing jsonl records to #{output.name}: #{e}"
|
31
28
|
raise ::FlatKit::Error, e
|
32
29
|
end
|
33
30
|
|
34
|
-
def close
|
35
|
-
@output.close
|
36
|
-
end
|
37
|
-
|
38
31
|
def write_record(record)
|
39
|
-
#
|
32
|
+
# the index of the record being written is the same as the count of records written so far
|
33
|
+
record_index = @count
|
34
|
+
|
35
|
+
# get the current output stream position to calculate bytes written
|
36
|
+
start_offset = output.io.tell
|
37
|
+
|
38
|
+
# enforces ending in newline if it doesn't already have one
|
40
39
|
output.io.puts record.to_s
|
40
|
+
|
41
|
+
ending_offset = output.io.tell
|
42
|
+
bytes_written = (ending_offset - start_offset)
|
43
|
+
|
41
44
|
@count += 1
|
45
|
+
|
46
|
+
@last_position = ::FlatKit::Position.new(index: record_index,
|
47
|
+
offset: start_offset,
|
48
|
+
bytesize: bytes_written)
|
42
49
|
end
|
43
50
|
end
|
44
51
|
end
|
data/lib/flat_kit/jsonl.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: Top level namespace for the newline-oriented JSON format
|
5
|
+
#
|
2
6
|
module Jsonl
|
3
7
|
end
|
4
8
|
end
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
+
require "flat_kit/jsonl/record"
|
10
|
+
require "flat_kit/jsonl/reader"
|
11
|
+
require "flat_kit/jsonl/writer"
|
12
|
+
require "flat_kit/jsonl/format"
|
data/lib/flat_kit/leaf_node.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
# Private: The LeafNode is a wrapper around a Reader object to enable
|
3
5
|
# a consistent api for use in the MergeTree
|
@@ -9,11 +11,9 @@ module FlatKit
|
|
9
11
|
# If all the data is used up from the reader, it also notifies the next level
|
10
12
|
# of that so the next level can remove it from the tree.
|
11
13
|
class LeafNode
|
12
|
-
|
13
14
|
include Comparable
|
14
15
|
|
15
|
-
attr_reader :reader
|
16
|
-
attr_reader :value
|
16
|
+
attr_reader :reader, :value
|
17
17
|
|
18
18
|
attr_accessor :next_level
|
19
19
|
|
@@ -43,7 +43,7 @@ module FlatKit
|
|
43
43
|
|
44
44
|
def update_and_replay
|
45
45
|
self.next
|
46
|
-
if finished?
|
46
|
+
if finished?
|
47
47
|
::FlatKit.logger.debug "#{reader.source} has finished reading #{reader.count} records"
|
48
48
|
next_level.player_finished(self)
|
49
49
|
end
|
@@ -65,7 +65,8 @@ module FlatKit
|
|
65
65
|
|
66
66
|
def <=>(other)
|
67
67
|
return -1 if other.sentinel?
|
68
|
-
|
68
|
+
|
69
|
+
value <=> (other.value)
|
69
70
|
end
|
70
71
|
end
|
71
72
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "logger"
|
4
|
+
|
5
|
+
module FlatKit
|
6
|
+
# Internal: Log formatting class for FlatKit
|
7
|
+
#
|
8
|
+
class LogFormatter < ::Logger::Formatter
|
9
|
+
FORMAT = "%s %5d %05s : %s\n"
|
10
|
+
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
|
11
|
+
def initialize
|
12
|
+
super
|
13
|
+
self.datetime_format = DATETIME_FORMAT
|
14
|
+
end
|
15
|
+
|
16
|
+
def call(severity, time, _progname, msg)
|
17
|
+
format(FORMAT, format_datetime(time.utc), Process.pid, severity, msg2str(msg))
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/flat_kit/logger.rb
CHANGED
@@ -1,19 +1,12 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
class LogFormatter < ::Logger::Formatter
|
5
|
-
FORMAT = "%s %5d %05s : %s\n".freeze
|
6
|
-
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ".freeze
|
7
|
-
def initialize
|
8
|
-
super
|
9
|
-
self.datetime_format = DATETIME_FORMAT
|
10
|
-
end
|
11
|
-
|
12
|
-
def call(severity, time, progname, msg)
|
13
|
-
FORMAT % [format_datetime(time.utc), Process.pid, severity, msg2str(msg)]
|
14
|
-
end
|
15
|
-
end
|
3
|
+
require "logger"
|
16
4
|
|
5
|
+
# Public: Top level namespace for the gem
|
6
|
+
#
|
7
|
+
module FlatKit
|
8
|
+
# Internal: Logger class
|
9
|
+
#
|
17
10
|
class Logger
|
18
11
|
def self.for_io(io)
|
19
12
|
::Logger.new(io, formatter: LogFormatter.new)
|
@@ -26,11 +19,11 @@ module FlatKit
|
|
26
19
|
end
|
27
20
|
|
28
21
|
def self.log_to(destination = $stderr)
|
29
|
-
if destination.
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
22
|
+
@logger = if destination.is_a?(::IO)
|
23
|
+
::FlatKit::Logger.for_io(destination)
|
24
|
+
else
|
25
|
+
::FlatKit::Logger.for_path(destination)
|
26
|
+
end
|
34
27
|
end
|
35
28
|
|
36
29
|
def self.logger
|
data/lib/flat_kit/merge.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: Class implementing merging from N inputs and output to 1 output.
|
5
|
+
#
|
2
6
|
class Merge
|
3
|
-
|
4
7
|
include ::FlatKit::EventEmitter
|
5
8
|
|
6
|
-
attr_reader :readers
|
7
|
-
attr_reader :writer
|
8
|
-
attr_reader :compare_fields
|
9
|
+
attr_reader :readers, :writer, :compare_fields
|
9
10
|
|
10
|
-
def initialize(inputs:, input_fallback: "auto",
|
11
|
-
output:, output_fallback: "auto",
|
12
|
-
compare_fields:)
|
11
|
+
def initialize(inputs:, output:, compare_fields:, input_fallback: "auto", output_fallback: "auto")
|
13
12
|
@compare_fields = compare_fields
|
14
13
|
@readers = ::FlatKit::Reader.create_readers_from_paths(paths: inputs, compare_fields: @compare_fields,
|
15
14
|
fallback: input_fallback)
|
@@ -19,19 +18,12 @@ module FlatKit
|
|
19
18
|
|
20
19
|
def call
|
21
20
|
::FlatKit.logger.debug "Merging the following files into #{writer.destination}"
|
22
|
-
::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(
|
21
|
+
::FlatKit.logger.debug "Using this key for sorting: #{compare_fields.join(', ')}"
|
23
22
|
readers.each do |r|
|
24
23
|
::FlatKit.logger.debug " #{r.source}"
|
25
24
|
end
|
26
25
|
|
27
|
-
|
28
|
-
|
29
|
-
notify_listeners(name: :start, data: :start)
|
30
|
-
merge_tree.each do |record|
|
31
|
-
writer.write(record)
|
32
|
-
notify_listeners(name: :record, data: record)
|
33
|
-
end
|
34
|
-
notify_listeners(name: :stop, data: :stop)
|
26
|
+
run_merge(readers)
|
35
27
|
|
36
28
|
readers.each do |r|
|
37
29
|
::FlatKit.logger.debug " #{r.source} produced #{r.count} records"
|
@@ -40,5 +32,18 @@ module FlatKit
|
|
40
32
|
writer.close
|
41
33
|
::FlatKit.logger.debug "Wrote #{writer.count} records to #{writer.destination}"
|
42
34
|
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def run_merge(readers)
|
39
|
+
tree = ::FlatKit::MergeTree.new(readers)
|
40
|
+
notify_listeners(name: :start, data: :start)
|
41
|
+
tree.each do |record|
|
42
|
+
position = writer.write(record)
|
43
|
+
meta = { position: position }
|
44
|
+
notify_listeners(name: :record, data: record, meta: meta)
|
45
|
+
end
|
46
|
+
notify_listeners(name: :stop, data: :stop)
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
data/lib/flat_kit/merge_tree.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
# Public: Merge a list of sorted records from Readers into a single output Writer
|
3
5
|
#
|
@@ -29,9 +31,7 @@ module FlatKit
|
|
29
31
|
class MergeTree
|
30
32
|
include Enumerable
|
31
33
|
|
32
|
-
attr_reader :leaves
|
33
|
-
attr_reader :levels
|
34
|
-
attr_reader :readers
|
34
|
+
attr_reader :leaves, :levels, :readers
|
35
35
|
|
36
36
|
def initialize(readers)
|
37
37
|
@readers = readers
|
@@ -44,9 +44,7 @@ module FlatKit
|
|
44
44
|
|
45
45
|
# Need to pad the leaves to an even number so that the slicing by 2 for
|
46
46
|
# the tournament will work
|
47
|
-
if @leaves.size.odd?
|
48
|
-
@leaves << SentinelLeafNode.new
|
49
|
-
end
|
47
|
+
@leaves << SentinelLeafNode.new if @leaves.size.odd?
|
50
48
|
|
51
49
|
init_tree
|
52
50
|
end
|
@@ -94,6 +92,7 @@ module FlatKit
|
|
94
92
|
def each
|
95
93
|
loop do
|
96
94
|
break if root.leaf.finished?
|
95
|
+
|
97
96
|
yield root.value
|
98
97
|
# consume the yielded value and have the tournament tree replay those
|
99
98
|
# brackets affected
|
data/lib/flat_kit/output/file.rb
CHANGED
@@ -1,22 +1,31 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "zlib"
|
4
|
+
require "pathname"
|
2
5
|
|
3
6
|
module FlatKit
|
4
7
|
class Output
|
8
|
+
# Internal: File output implementation
|
9
|
+
#
|
5
10
|
class File < Output
|
6
11
|
attr_reader :path
|
7
12
|
|
13
|
+
# internal api method for testing purposes
|
14
|
+
attr_reader :io
|
15
|
+
|
8
16
|
def self.handles?(obj)
|
9
17
|
return true if obj.instance_of?(Pathname)
|
10
18
|
return false unless obj.instance_of?(String)
|
11
19
|
|
12
20
|
# incase these get loaded in different orders
|
13
|
-
return false if ::FlatKit::Output::IO.
|
14
|
-
return false if ::FlatKit::Output::IO.
|
21
|
+
return false if ::FlatKit::Output::IO.stdout?(obj)
|
22
|
+
return false if ::FlatKit::Output::IO.stderr?(obj)
|
15
23
|
|
16
|
-
|
24
|
+
true
|
17
25
|
end
|
18
26
|
|
19
27
|
def initialize(obj)
|
28
|
+
super()
|
20
29
|
@path = Pathname.new(obj)
|
21
30
|
path.dirname.mkpath
|
22
31
|
@io = open_output(path)
|
@@ -30,11 +39,6 @@ module FlatKit
|
|
30
39
|
@io.close
|
31
40
|
end
|
32
41
|
|
33
|
-
# internal api method for testing purposes
|
34
|
-
def io
|
35
|
-
@io
|
36
|
-
end
|
37
|
-
|
38
42
|
private
|
39
43
|
|
40
44
|
# open the opropriate otuput type depending on the destination file name
|
data/lib/flat_kit/output/io.rb
CHANGED
@@ -1,73 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class Output
|
5
|
+
# Internal: Non-file Output impelementation - this is genrally to stdout or stderr
|
6
|
+
#
|
3
7
|
class IO < Output
|
4
|
-
attr_reader :count
|
8
|
+
attr_reader :count, :name
|
9
|
+
|
10
|
+
# internal api method for testing
|
11
|
+
attr_reader :io
|
5
12
|
|
6
|
-
STDOUTS = %w[
|
7
|
-
STDERRS = %w[
|
13
|
+
STDOUTS = %w[stdout STDOUT - <stdout>].freeze
|
14
|
+
STDERRS = %w[stderr STDERR <stderr>].freeze
|
8
15
|
|
9
16
|
def self.handles?(obj)
|
10
|
-
return true if
|
11
|
-
return true if
|
12
|
-
return true if [
|
13
|
-
|
17
|
+
return true if stderr?(obj)
|
18
|
+
return true if stdout?(obj)
|
19
|
+
return true if [::File, ::StringIO, ::IO].any? { |klass| obj.is_a?(klass) }
|
20
|
+
|
21
|
+
false
|
14
22
|
end
|
15
23
|
|
16
|
-
def self.
|
24
|
+
def self.stderr?(obj)
|
17
25
|
case obj
|
18
26
|
when String
|
19
27
|
return true if STDERRS.include?(obj)
|
20
28
|
when ::IO
|
21
|
-
return true if obj ==
|
29
|
+
return true if obj == $stderr
|
22
30
|
end
|
23
|
-
|
31
|
+
false
|
24
32
|
end
|
25
33
|
|
26
|
-
def self.
|
34
|
+
def self.stdout?(obj)
|
27
35
|
case obj
|
28
36
|
when String
|
29
37
|
return true if STDOUTS.include?(obj)
|
30
38
|
when ::IO
|
31
|
-
return true if obj ==
|
39
|
+
return true if obj == $stdout
|
32
40
|
end
|
33
|
-
|
41
|
+
false
|
34
42
|
end
|
35
43
|
|
36
44
|
def initialize(obj)
|
45
|
+
super()
|
37
46
|
@count = 0
|
38
|
-
|
47
|
+
@name = nil
|
48
|
+
@io = nil
|
49
|
+
init_name_and_io(obj)
|
50
|
+
end
|
51
|
+
|
52
|
+
# this goes to an io stream and we are not in charge of opening it
|
53
|
+
def close
|
54
|
+
@io.close
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def init_name_and_io(obj)
|
60
|
+
if self.class.stdout?(obj)
|
39
61
|
@name = "<STDOUT>"
|
40
62
|
@io = $stdout
|
41
|
-
elsif self.class.
|
63
|
+
elsif self.class.stderr?(obj)
|
42
64
|
@name = "<STDERR>"
|
43
65
|
@io = $stderr
|
44
|
-
elsif obj.
|
45
|
-
@name = obj.path
|
66
|
+
elsif obj.is_a?(::IO)
|
67
|
+
@name = (obj.respond_to?(:path) && obj.path) || obj.inspect
|
46
68
|
@io = obj
|
47
|
-
elsif obj.
|
48
|
-
@name = obj.inspect
|
49
|
-
@io = obj
|
50
|
-
elsif obj.kind_of?(::IO) then
|
69
|
+
elsif obj.is_a?(::StringIO)
|
51
70
|
@name = obj.inspect
|
52
71
|
@io = obj
|
53
72
|
else
|
54
73
|
raise ::FlatKit::Error, "Unable to create #{self.class} from #{obj.class} : #{obj.inspect}"
|
55
74
|
end
|
56
75
|
end
|
57
|
-
|
58
|
-
def name
|
59
|
-
@name
|
60
|
-
end
|
61
|
-
|
62
|
-
# this goes to an io stream and we are not in charge of opening it
|
63
|
-
def close
|
64
|
-
@io.close
|
65
|
-
end
|
66
|
-
|
67
|
-
# internal api method for testing
|
68
|
-
def io
|
69
|
-
@io
|
70
|
-
end
|
71
76
|
end
|
72
77
|
end
|
73
78
|
end
|
data/lib/flat_kit/output.rb
CHANGED
@@ -1,14 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
4
|
+
# Internal: Base clases for all output handlers
|
5
|
+
#
|
2
6
|
class Output
|
3
7
|
extend DescendantTracker
|
4
8
|
|
5
9
|
def self.from(out)
|
6
|
-
return out if out.
|
10
|
+
return out if out.is_a?(::FlatKit::Output)
|
7
11
|
|
8
12
|
out_klass = find_child(:handles?, out)
|
9
|
-
if out_klass
|
10
|
-
return out_klass.new(out)
|
11
|
-
end
|
13
|
+
return out_klass.new(out) if out_klass
|
12
14
|
|
13
15
|
raise FlatKit::Error, "Unable to create output from #{out.class} : #{out.inspect}"
|
14
16
|
end
|
@@ -17,16 +19,19 @@ module FlatKit
|
|
17
19
|
raise NotImplementedError, "#{self.class} must implement #name"
|
18
20
|
end
|
19
21
|
|
20
|
-
#
|
21
22
|
def io
|
22
23
|
raise NotImplementedError, "#{self.class} must implement #io"
|
23
24
|
end
|
24
25
|
|
26
|
+
def tell
|
27
|
+
io.tell
|
28
|
+
end
|
29
|
+
|
25
30
|
def close
|
26
31
|
raise NotImplementedError, "#{self.class} must implement #close"
|
27
32
|
end
|
28
33
|
end
|
29
34
|
end
|
30
35
|
|
31
|
-
require
|
32
|
-
require
|
36
|
+
require "flat_kit/output/io"
|
37
|
+
require "flat_kit/output/file"
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module FlatKit
|
4
|
+
# The information about the position of a record in an IO stream
|
5
|
+
#
|
6
|
+
# Generally this is going to be returned by a write_record method to return
|
7
|
+
# information about the record that was just written
|
8
|
+
#
|
9
|
+
class Position
|
10
|
+
attr_reader :index, :offset, :bytesize # zero based # byte offset in the IO stream # byte length of the record
|
11
|
+
|
12
|
+
def initialize(index: nil, offset: nil, bytesize: nil)
|
13
|
+
@index = index
|
14
|
+
@offset = offset
|
15
|
+
@bytesize = bytesize
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/flat_kit/reader.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
# Public: the base class for all format readers.
|
3
5
|
#
|
@@ -14,24 +16,21 @@ module FlatKit
|
|
14
16
|
# API:
|
15
17
|
#
|
16
18
|
# initialize(source:, compare_fields:)
|
17
|
-
# each -> Yields / returns
|
19
|
+
# each -> Yields / returns
|
18
20
|
#
|
19
21
|
class Reader
|
20
22
|
include Enumerable
|
21
23
|
|
22
|
-
attr_reader :source
|
23
|
-
attr_reader :compare_fields
|
24
|
+
attr_reader :source, :compare_fields
|
24
25
|
|
25
26
|
def self.create_reader_from_path(path: "-", fallback: "auto", compare_fields: :none)
|
26
27
|
format = ::FlatKit::Format.for_with_fallback!(path: path, fallback: fallback)
|
27
|
-
|
28
|
+
format.reader.new(source: path, compare_fields: compare_fields)
|
28
29
|
end
|
29
30
|
|
30
31
|
def self.create_readers_from_paths(paths:, fallback: "auto", compare_fields: :none)
|
31
32
|
# default to stdin if there are no paths
|
32
|
-
if paths.empty?
|
33
|
-
paths << "-"
|
34
|
-
end
|
33
|
+
paths << "-" if paths.empty?
|
35
34
|
|
36
35
|
paths.map do |path|
|
37
36
|
create_reader_from_path(path: path, fallback: fallback, compare_fields: compare_fields)
|
@@ -55,7 +54,8 @@ module FlatKit
|
|
55
54
|
|
56
55
|
def resolve_compare_fields(value)
|
57
56
|
return [] if value == :none
|
58
|
-
|
57
|
+
|
58
|
+
value
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
data/lib/flat_kit/record.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
# Public: The base class that all record classes should inherit from.
|
3
5
|
#
|
@@ -35,11 +37,9 @@ module FlatKit
|
|
35
37
|
# # the initialize method must call super(data:, compare_fields:) to
|
36
38
|
# initializa the root data structures
|
37
39
|
class Record
|
38
|
-
|
39
40
|
include Comparable
|
40
41
|
|
41
|
-
attr_reader :data
|
42
|
-
attr_reader :compare_fields
|
42
|
+
attr_reader :data, :compare_fields
|
43
43
|
|
44
44
|
def initialize(data:, compare_fields:)
|
45
45
|
@data = data
|
@@ -57,15 +57,15 @@ module FlatKit
|
|
57
57
|
my_val = self[field]
|
58
58
|
other_val = other[field]
|
59
59
|
|
60
|
-
if my_val.nil? && other_val.nil?
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
60
|
+
compare_result = if my_val.nil? && other_val.nil?
|
61
|
+
0
|
62
|
+
elsif my_val.nil?
|
63
|
+
-1
|
64
|
+
elsif other_val.nil?
|
65
|
+
1
|
66
|
+
else
|
67
|
+
my_val <=> (other_val)
|
68
|
+
end
|
69
69
|
|
70
70
|
return compare_result unless compare_result.zero?
|
71
71
|
end
|