flat_kit 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f30ca1aa08d445cdae1e2da95f32bcf3185e0ab5f003675d777cce2741ed3b8
|
4
|
+
data.tar.gz: 42606f14ad0e846e83734388f8250c0045a80208b9a91034360dc86c088121a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6b318485580e129ff979004c17ef21a4223aaf870224348b26353af7220ba8e57dfe11d21ad0870c9c0ccedd9ebcb1a1b40fc80417fef40dc8d27cdd569c205
|
7
|
+
data.tar.gz: b91c41e7778e598d2b3ab388b063e527dabceab2323c6ceb64ae8a1b68da4f13aceb79e3b400f4bbc56eb41456d717c4a498247777c6839e403f7998901f254e
|
data/CONTRIBUTING.md
CHANGED
@@ -27,8 +27,7 @@ easiest way to contribute.
|
|
27
27
|
* Fork the [repo][].
|
28
28
|
* Create a new branch for your issue: `git checkout -b issue/my-issue`
|
29
29
|
* Lovingly craft your contribution:
|
30
|
-
* `
|
31
|
-
* `bundle install` to install dependencies.
|
30
|
+
* `bin/setup` to bootstrap development.
|
32
31
|
* `rake test` to run tests
|
33
32
|
* Make sure that `rake test` passes. It's important, I said it twice.
|
34
33
|
* Add yourself to the contributors section below.
|
data/HISTORY.md
CHANGED
@@ -1,4 +1,19 @@
|
|
1
1
|
# FlatKit Changelog
|
2
|
+
|
3
|
+
## Version 1.0.0
|
4
|
+
|
5
|
+
* Setup semaphore testing
|
6
|
+
* Fix typos
|
7
|
+
* Fix commandline error if no sub command is given
|
8
|
+
* Add rubocop
|
9
|
+
* Update supported to Ruby 3.0 or greater
|
10
|
+
|
11
|
+
## Version 0.3.0
|
12
|
+
|
13
|
+
* Changing the event listening api to include meta data about the event
|
14
|
+
* Add field type detection
|
15
|
+
* Add a 'stats' command to generate stats about the data file
|
16
|
+
|
2
17
|
## Version 0.2.0
|
3
18
|
|
4
19
|
* add in event listening to allow for additional integrations
|
data/Manifest.txt
CHANGED
@@ -3,17 +3,29 @@ HISTORY.md
|
|
3
3
|
LICENSE.txt
|
4
4
|
Manifest.txt
|
5
5
|
README.md
|
6
|
-
|
7
|
-
|
6
|
+
exe/fk
|
7
|
+
flat_kit.gemspec
|
8
8
|
lib/flat_kit.rb
|
9
9
|
lib/flat_kit/cli.rb
|
10
10
|
lib/flat_kit/command.rb
|
11
11
|
lib/flat_kit/command/cat.rb
|
12
12
|
lib/flat_kit/command/merge.rb
|
13
13
|
lib/flat_kit/command/sort.rb
|
14
|
+
lib/flat_kit/command/stats.rb
|
14
15
|
lib/flat_kit/descendant_tracker.rb
|
15
16
|
lib/flat_kit/error.rb
|
16
17
|
lib/flat_kit/event_emitter.rb
|
18
|
+
lib/flat_kit/field_stats.rb
|
19
|
+
lib/flat_kit/field_type.rb
|
20
|
+
lib/flat_kit/field_type/boolean_type.rb
|
21
|
+
lib/flat_kit/field_type/date_type.rb
|
22
|
+
lib/flat_kit/field_type/float_type.rb
|
23
|
+
lib/flat_kit/field_type/guess_type.rb
|
24
|
+
lib/flat_kit/field_type/integer_type.rb
|
25
|
+
lib/flat_kit/field_type/null_type.rb
|
26
|
+
lib/flat_kit/field_type/string_type.rb
|
27
|
+
lib/flat_kit/field_type/timestamp_type.rb
|
28
|
+
lib/flat_kit/field_type/unknown_type.rb
|
17
29
|
lib/flat_kit/format.rb
|
18
30
|
lib/flat_kit/input.rb
|
19
31
|
lib/flat_kit/input/file.rb
|
@@ -25,44 +37,27 @@ lib/flat_kit/jsonl/reader.rb
|
|
25
37
|
lib/flat_kit/jsonl/record.rb
|
26
38
|
lib/flat_kit/jsonl/writer.rb
|
27
39
|
lib/flat_kit/leaf_node.rb
|
40
|
+
lib/flat_kit/log_formatter.rb
|
28
41
|
lib/flat_kit/logger.rb
|
29
42
|
lib/flat_kit/merge.rb
|
30
43
|
lib/flat_kit/merge_tree.rb
|
31
44
|
lib/flat_kit/output.rb
|
32
45
|
lib/flat_kit/output/file.rb
|
33
46
|
lib/flat_kit/output/io.rb
|
47
|
+
lib/flat_kit/position.rb
|
34
48
|
lib/flat_kit/reader.rb
|
35
49
|
lib/flat_kit/record.rb
|
36
50
|
lib/flat_kit/sentinel_internal_node.rb
|
37
51
|
lib/flat_kit/sentinel_leaf_node.rb
|
38
52
|
lib/flat_kit/sort.rb
|
53
|
+
lib/flat_kit/stat_type.rb
|
54
|
+
lib/flat_kit/stat_type/nominal_stats.rb
|
55
|
+
lib/flat_kit/stat_type/numerical_stats.rb
|
56
|
+
lib/flat_kit/stat_type/ordinal_stats.rb
|
57
|
+
lib/flat_kit/stats.rb
|
39
58
|
lib/flat_kit/writer.rb
|
40
59
|
lib/flat_kit/xsv.rb
|
41
60
|
lib/flat_kit/xsv/format.rb
|
42
61
|
lib/flat_kit/xsv/reader.rb
|
43
62
|
lib/flat_kit/xsv/record.rb
|
44
63
|
lib/flat_kit/xsv/writer.rb
|
45
|
-
tasks/default.rake
|
46
|
-
tasks/extension.rake
|
47
|
-
tasks/man.rake
|
48
|
-
tasks/this.rb
|
49
|
-
test/device_dataset.rb
|
50
|
-
test/input/test_file.rb
|
51
|
-
test/input/test_io.rb
|
52
|
-
test/jsonl/test_format.rb
|
53
|
-
test/jsonl/test_reader.rb
|
54
|
-
test/jsonl/test_record.rb
|
55
|
-
test/jsonl/test_writer.rb
|
56
|
-
test/output/test_file.rb
|
57
|
-
test/output/test_io.rb
|
58
|
-
test/test_conversions.rb
|
59
|
-
test/test_event_emitter.rb
|
60
|
-
test/test_format.rb
|
61
|
-
test/test_helper.rb
|
62
|
-
test/test_merge.rb
|
63
|
-
test/test_merge_tree.rb
|
64
|
-
test/test_version.rb
|
65
|
-
test/xsv/test_format.rb
|
66
|
-
test/xsv/test_reader.rb
|
67
|
-
test/xsv/test_record.rb
|
68
|
-
test/xsv/test_writer.rb
|
data/{bin → exe}/fk
RENAMED
data/flat_kit.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# DO NOT EDIT - This file is automatically generated
|
2
|
+
# Make changes to Manifest.txt and/or Rakefile and regenerate
|
3
|
+
# -*- encoding: utf-8 -*-
|
4
|
+
# stub: flat_kit 1.0.0 ruby lib
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "flat_kit".freeze
|
8
|
+
s.version = "1.0.0".freeze
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
11
|
+
s.metadata = { "bug_tracker_uri" => "https://github.com/copiousfreetime/flat_kit/issues", "changelog_uri" => "https://github.com/copiousfreetime/flat_kit/blob/master/HISTORY.md", "homepage_uri" => "https://github.com/copiousfreetime/flat_kit", "label" => "flat_kit", "rubygems_mfa_required" => "true", "source_code_uri" => "https://github.com/copiousfreetime/flat_kit" } if s.respond_to? :metadata=
|
12
|
+
s.require_paths = ["lib".freeze]
|
13
|
+
s.authors = ["Jeremy Hinegardner".freeze]
|
14
|
+
s.bindir = "exe".freeze
|
15
|
+
s.date = "2024-04-28"
|
16
|
+
s.description = "A library and commandline program for reading, writing, indexing, sorting, and merging CSV, TSV, JSON and other flat-file formats.".freeze
|
17
|
+
s.email = "jeremy@copiousfreetime.org".freeze
|
18
|
+
s.executables = ["fk".freeze]
|
19
|
+
s.extra_rdoc_files = ["CONTRIBUTING.md".freeze, "HISTORY.md".freeze, "LICENSE.txt".freeze, "Manifest.txt".freeze, "README.md".freeze]
|
20
|
+
s.files = ["CONTRIBUTING.md".freeze, "HISTORY.md".freeze, "LICENSE.txt".freeze, "Manifest.txt".freeze, "README.md".freeze, "exe/fk".freeze, "flat_kit.gemspec".freeze, "lib/flat_kit.rb".freeze, "lib/flat_kit/cli.rb".freeze, "lib/flat_kit/command.rb".freeze, "lib/flat_kit/command/cat.rb".freeze, "lib/flat_kit/command/merge.rb".freeze, "lib/flat_kit/command/sort.rb".freeze, "lib/flat_kit/command/stats.rb".freeze, "lib/flat_kit/descendant_tracker.rb".freeze, "lib/flat_kit/error.rb".freeze, "lib/flat_kit/event_emitter.rb".freeze, "lib/flat_kit/field_stats.rb".freeze, "lib/flat_kit/field_type.rb".freeze, "lib/flat_kit/field_type/boolean_type.rb".freeze, "lib/flat_kit/field_type/date_type.rb".freeze, "lib/flat_kit/field_type/float_type.rb".freeze, "lib/flat_kit/field_type/guess_type.rb".freeze, "lib/flat_kit/field_type/integer_type.rb".freeze, "lib/flat_kit/field_type/null_type.rb".freeze, "lib/flat_kit/field_type/string_type.rb".freeze, "lib/flat_kit/field_type/timestamp_type.rb".freeze, "lib/flat_kit/field_type/unknown_type.rb".freeze, "lib/flat_kit/format.rb".freeze, "lib/flat_kit/input.rb".freeze, "lib/flat_kit/input/file.rb".freeze, "lib/flat_kit/input/io.rb".freeze, "lib/flat_kit/internal_node.rb".freeze, "lib/flat_kit/jsonl.rb".freeze, "lib/flat_kit/jsonl/format.rb".freeze, "lib/flat_kit/jsonl/reader.rb".freeze, "lib/flat_kit/jsonl/record.rb".freeze, "lib/flat_kit/jsonl/writer.rb".freeze, "lib/flat_kit/leaf_node.rb".freeze, "lib/flat_kit/log_formatter.rb".freeze, "lib/flat_kit/logger.rb".freeze, "lib/flat_kit/merge.rb".freeze, "lib/flat_kit/merge_tree.rb".freeze, "lib/flat_kit/output.rb".freeze, "lib/flat_kit/output/file.rb".freeze, "lib/flat_kit/output/io.rb".freeze, "lib/flat_kit/position.rb".freeze, "lib/flat_kit/reader.rb".freeze, "lib/flat_kit/record.rb".freeze, "lib/flat_kit/sentinel_internal_node.rb".freeze, "lib/flat_kit/sentinel_leaf_node.rb".freeze, "lib/flat_kit/sort.rb".freeze, "lib/flat_kit/stat_type.rb".freeze, "lib/flat_kit/stat_type/nominal_stats.rb".freeze, "lib/flat_kit/stat_type/numerical_stats.rb".freeze, "lib/flat_kit/stat_type/ordinal_stats.rb".freeze, "lib/flat_kit/stats.rb".freeze, "lib/flat_kit/writer.rb".freeze, "lib/flat_kit/xsv.rb".freeze, "lib/flat_kit/xsv/format.rb".freeze, "lib/flat_kit/xsv/reader.rb".freeze, "lib/flat_kit/xsv/record.rb".freeze, "lib/flat_kit/xsv/writer.rb".freeze]
|
21
|
+
s.homepage = "http://github.com/copiousfreetime/flat_kit".freeze
|
22
|
+
s.licenses = ["MIT".freeze]
|
23
|
+
s.rdoc_options = ["--main".freeze, "README.md".freeze, "--markup".freeze, "tomdoc".freeze]
|
24
|
+
s.required_ruby_version = Gem::Requirement.new(">= 3.0.0".freeze)
|
25
|
+
s.rubygems_version = "3.5.9".freeze
|
26
|
+
s.summary = "A library and commandline program for reading, writing, indexing, sorting, and merging CSV, TSV, JSON and other flat-file formats.".freeze
|
27
|
+
|
28
|
+
s.specification_version = 4
|
29
|
+
|
30
|
+
s.add_runtime_dependency(%q<oj>.freeze, ["~> 3.0".freeze])
|
31
|
+
s.add_runtime_dependency(%q<optimist>.freeze, ["~> 3.0".freeze])
|
32
|
+
s.add_runtime_dependency(%q<csv>.freeze, ["~> 3.3".freeze])
|
33
|
+
end
|
data/lib/flat_kit/cli.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "optimist"
|
4
|
+
require_relative "../flat_kit/command"
|
3
5
|
|
4
6
|
module FlatKit
|
7
|
+
# Public: The the main entry point for the command line interface
|
8
|
+
#
|
5
9
|
class Cli
|
6
10
|
attr_reader :options
|
7
11
|
|
@@ -13,18 +17,18 @@ module FlatKit
|
|
13
17
|
@parser ||= ::Optimist::Parser.new do
|
14
18
|
version ::FlatKit::VERSION
|
15
19
|
|
16
|
-
banner "fk v#{
|
20
|
+
banner "fk v#{version}"
|
17
21
|
|
18
22
|
banner <<~USAGE
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
Usage:
|
25
|
+
fk <command> [<args>...]
|
26
|
+
fk [options]
|
23
27
|
USAGE
|
24
28
|
|
25
29
|
banner <<~OPTIONS
|
26
30
|
|
27
|
-
|
31
|
+
Options:
|
28
32
|
|
29
33
|
OPTIONS
|
30
34
|
|
@@ -40,8 +44,8 @@ module FlatKit
|
|
40
44
|
end
|
41
45
|
|
42
46
|
def self.commands_banner
|
43
|
-
sorted_commands = FlatKit::Command.children.sort_by
|
44
|
-
left_width = sorted_commands.map { |c| c.name.length }.
|
47
|
+
sorted_commands = FlatKit::Command.children.sort_by(&:name)
|
48
|
+
left_width = sorted_commands.map { |c| c.name.length }.max
|
45
49
|
banner = StringIO.new
|
46
50
|
banner.puts
|
47
51
|
banner.puts "Commands:"
|
@@ -54,27 +58,48 @@ module FlatKit
|
|
54
58
|
end
|
55
59
|
|
56
60
|
def run(argv: ARGV, env: ENV)
|
57
|
-
opts =
|
61
|
+
opts = parse_opts(argv)
|
62
|
+
init_logging(opts)
|
63
|
+
::FlatKit.logger.debug(argv)
|
64
|
+
|
65
|
+
command_name = argv.shift
|
66
|
+
exit_if_help(command_name)
|
67
|
+
|
68
|
+
command_klass = command_klass_or_exit(command_name)
|
69
|
+
command = command_klass.new(argv: argv, logger: ::FlatKit.logger, env: env)
|
70
|
+
command.call
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def parse_opts(argv)
|
76
|
+
::Optimist.with_standard_exception_handling(parser) do
|
58
77
|
parser.parse(argv)
|
59
78
|
end
|
79
|
+
end
|
60
80
|
|
61
|
-
|
62
|
-
|
63
|
-
end
|
81
|
+
def init_logging(opts)
|
82
|
+
::FlatKit.log_to(opts[:log]) if opts[:log_given]
|
64
83
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
end
|
84
|
+
::FlatKit.logger.level = opts[:verbose] ? :debug : :info
|
85
|
+
|
86
|
+
::FlatKit.logger.debug(opts)
|
87
|
+
end
|
70
88
|
|
71
|
-
|
72
|
-
|
89
|
+
def exit_if_help(command_name)
|
90
|
+
return unless command_name.nil? || command_name.downcase == "help"
|
73
91
|
|
74
|
-
|
92
|
+
parser.educate
|
93
|
+
exit 0
|
94
|
+
end
|
95
|
+
|
96
|
+
def command_class_or_exit(command_name)
|
75
97
|
command_klass = FlatKit::Command.for(command_name)
|
76
|
-
|
77
|
-
|
98
|
+
return command_klass unless command_klass.nil?
|
99
|
+
|
100
|
+
$stdout.puts "ERROR: Unknown command '#{command_name}'"
|
101
|
+
parser.educate
|
102
|
+
exit 0
|
78
103
|
end
|
79
104
|
end
|
80
105
|
end
|
data/lib/flat_kit/command/cat.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class Command
|
5
|
+
# Internal: The implementation of the cat command.
|
6
|
+
#
|
3
7
|
# TODO: Implement the --flatten commandline switch
|
4
8
|
class Cat < ::FlatKit::Command
|
5
9
|
def self.name
|
@@ -12,43 +16,43 @@ module FlatKit
|
|
12
16
|
|
13
17
|
def self.parser
|
14
18
|
::Optimist::Parser.new do
|
15
|
-
banner
|
19
|
+
banner Cat.description.to_s
|
16
20
|
banner ""
|
17
21
|
|
18
22
|
banner <<~BANNER
|
19
|
-
|
20
|
-
|
21
|
-
|
23
|
+
Concatenates files that have the same field structure together into
|
24
|
+
a single file. The files can be of different formats, but must have
|
25
|
+
the same fields and names.
|
22
26
|
|
23
|
-
|
24
|
-
|
27
|
+
This is probably most easily usable as a way to convert CSV to JSON
|
28
|
+
and vice versa.
|
25
29
|
|
26
|
-
|
27
|
-
|
28
|
-
|
30
|
+
The flatfile type(s) will be automatically determined by the file name.
|
31
|
+
If the inputs or output is not a file, but from stdin or stdout then
|
32
|
+
the input and output types must be specified.
|
29
33
|
|
30
|
-
|
31
|
-
|
32
|
-
|
34
|
+
NOTE: If converting from JSON to CSV and the input JSON does not have
|
35
|
+
every possible field on ever record, then the output csv iwll
|
36
|
+
be corrupted.
|
33
37
|
|
34
|
-
|
35
|
-
|
36
|
-
|
38
|
+
In this case the input json should be fed through 'flatten' first
|
39
|
+
or use the '--flatten' flag which will require an additional pass
|
40
|
+
through the input to gather all the fields
|
37
41
|
BANNER
|
38
42
|
|
39
43
|
banner <<~USAGE
|
40
44
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
Usage:
|
46
|
+
fk cat file1.csv file2.csv > combinded.csv
|
47
|
+
fk cat --output-format json file1.csv
|
48
|
+
fk cat file1.csv.gzip -o file2.json.gzip
|
49
|
+
fk cat file1.csv.gzip --output-format json | gzip -c > file1.jsonl.gz
|
46
50
|
|
47
51
|
USAGE
|
48
52
|
|
49
53
|
banner <<~OPTIONS
|
50
54
|
|
51
|
-
|
55
|
+
Options:
|
52
56
|
|
53
57
|
OPTIONS
|
54
58
|
|
@@ -60,17 +64,15 @@ module FlatKit
|
|
60
64
|
|
61
65
|
def parse
|
62
66
|
parser = self.class.parser
|
63
|
-
::Optimist
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
raise ::Optimist::CommandlineError, e.message
|
73
|
-
end
|
67
|
+
::Optimist.with_standard_exception_handling(parser) do
|
68
|
+
@opts = parser.parse(argv)
|
69
|
+
paths = parser.leftovers
|
70
|
+
|
71
|
+
@readers = ::FlatKit::Reader.create_readers_from_paths(paths: paths, fallback: opts[:input_format])
|
72
|
+
@writer = ::FlatKit::Writer.create_writer_from_path(path: opts[:output], fallback: opts[:output_format],
|
73
|
+
reader_format: @readers.first.format_name)
|
74
|
+
rescue ::FlatKit::Error => e
|
75
|
+
raise ::Optimist::CommandlineError, e.message
|
74
76
|
end
|
75
77
|
end
|
76
78
|
|
@@ -1,8 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
2
4
|
module FlatKit
|
3
5
|
class Command
|
6
|
+
# Internal: The implementation of the merge command.
|
7
|
+
#
|
4
8
|
class Merge < ::FlatKit::Command
|
5
|
-
|
6
9
|
def self.name
|
7
10
|
"merge"
|
8
11
|
end
|
@@ -13,43 +16,43 @@ module FlatKit
|
|
13
16
|
|
14
17
|
def self.parser
|
15
18
|
::Optimist::Parser.new do
|
16
|
-
banner
|
19
|
+
banner Merge.description.to_s
|
17
20
|
banner ""
|
18
21
|
|
19
22
|
banner <<~BANNER
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
Given a set of input files that have the same structure, and are already
|
24
|
+
sorted by a set of keys. The Merge command will merge all those files
|
25
|
+
into a single output file.
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
The --key parameter is required, and it must be a comma separated list
|
28
|
+
of field nams on the input on which to use as the sort key for the merge
|
29
|
+
process.
|
27
30
|
|
28
|
-
|
29
|
-
|
31
|
+
There must also be at least 2 input files. Merging only 1 file into an
|
32
|
+
output file is the same as the 'cat' command.
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
34
|
+
The flatfile type(s) will be automatically determined by the file name.
|
35
|
+
If the output is not a file, but to stdout then the output type will
|
36
|
+
be the same as the first input file, or it can be specified as a commandline
|
37
|
+
switch.
|
35
38
|
|
36
|
-
|
37
|
-
|
39
|
+
The merge will do a single pass through the input to generate the
|
40
|
+
output.
|
38
41
|
BANNER
|
39
42
|
|
40
43
|
banner <<~USAGE
|
41
44
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
Usage:
|
46
|
+
fk merge --key surname,given_name file1.csv file2.csv > all.csv
|
47
|
+
fk merge --key surname,given_name --output-format json file1.csv file2.csv > all.json
|
48
|
+
fk merge --key field1,field2 --output-format json input*.csv | gzip -c > all.json.gz
|
49
|
+
fk merge --key field12 file*.json.gz -o all.json.gz
|
47
50
|
|
48
51
|
USAGE
|
49
52
|
|
50
53
|
banner <<~OPTIONS
|
51
54
|
|
52
|
-
|
55
|
+
Options:
|
53
56
|
|
54
57
|
OPTIONS
|
55
58
|
|
@@ -64,19 +67,17 @@ module FlatKit
|
|
64
67
|
|
65
68
|
def parse
|
66
69
|
parser = self.class.parser
|
67
|
-
::Optimist
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
raise ::Optimist::CommandlineError, e.message
|
79
|
-
end
|
70
|
+
::Optimist.with_standard_exception_handling(parser) do
|
71
|
+
@opts = parser.parse(argv)
|
72
|
+
@compare_keys = CSV.parse_line(opts[:key])
|
73
|
+
paths = parser.leftovers
|
74
|
+
raise ::Optimist::CommandlineError, "At least 2 input files are required" if paths.size < 2
|
75
|
+
|
76
|
+
@merge = ::FlatKit::Merge.new(inputs: paths, input_fallback: opts[:input_format],
|
77
|
+
compare_fields: @compare_keys,
|
78
|
+
output: opts[:output], output_fallback: opts[:output_format])
|
79
|
+
rescue ::FlatKit::Error => e
|
80
|
+
raise ::Optimist::CommandlineError, e.message
|
80
81
|
end
|
81
82
|
end
|
82
83
|
|
@@ -1,8 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
2
4
|
module FlatKit
|
3
5
|
class Command
|
6
|
+
# Internal: The implementation of the sort command.
|
7
|
+
#
|
4
8
|
class Sort < ::FlatKit::Command
|
5
|
-
|
6
9
|
def self.name
|
7
10
|
"sort"
|
8
11
|
end
|
@@ -13,41 +16,41 @@ module FlatKit
|
|
13
16
|
|
14
17
|
def self.parser
|
15
18
|
::Optimist::Parser.new do
|
16
|
-
banner
|
19
|
+
banner Sort.description.to_s
|
17
20
|
banner ""
|
18
21
|
|
19
22
|
banner <<~BANNER
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
Given an input file and a sort key, order the records in that file by that
|
24
|
+
key. If no input file is given the stdin is assumed. If no output file
|
25
|
+
is given then stdout is assumed.
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
The --key parameter is required, and it must be a comma separated list
|
28
|
+
of field nams on the input on which to use as the sort key for the merge
|
29
|
+
process.
|
27
30
|
|
28
|
-
|
31
|
+
There must also be only 1 input files.
|
29
32
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
33
|
+
The flatfile type(s) will be automatically determined by the file name.
|
34
|
+
If the output is not a file, but to stdout then the output type will
|
35
|
+
be the same as the first input file, or it can be specified as a commandline
|
36
|
+
switch.
|
34
37
|
|
35
38
|
BANNER
|
36
39
|
|
37
40
|
banner <<~USAGE
|
38
41
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
Usage:
|
43
|
+
fk sort --key surname,given_name file.csv > sorted.csv
|
44
|
+
fk sort --key surname,given_name --output-format json file.csv > sorted.json
|
45
|
+
fk sort --key field1,field2 --output-format json input.csv | gzip -c > sorted.json.gz
|
46
|
+
fk sort --key field1 file.json.gz -o sorted.json.gz
|
47
|
+
gunzip -c file.json.gz | fk sort --key field1 --input-format json --output-format json > gzip -c sorted.json.gz
|
45
48
|
|
46
49
|
USAGE
|
47
50
|
|
48
51
|
banner <<~OPTIONS
|
49
52
|
|
50
|
-
|
53
|
+
Options:
|
51
54
|
|
52
55
|
OPTIONS
|
53
56
|
|
@@ -58,25 +61,22 @@ module FlatKit
|
|
58
61
|
end
|
59
62
|
end
|
60
63
|
|
61
|
-
attr_reader :compare_keys
|
62
|
-
attr_reader :reader
|
63
|
-
attr_reader :sort
|
64
|
+
attr_reader :compare_keys, :reader, :sort
|
64
65
|
|
65
66
|
def parse
|
66
67
|
parser = self.class.parser
|
67
|
-
::Optimist
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
end
|
68
|
+
::Optimist.with_standard_exception_handling(parser) do
|
69
|
+
@opts = parser.parse(argv)
|
70
|
+
@compare_keys = CSV.parse_line(opts[:key])
|
71
|
+
paths = parser.leftovers
|
72
|
+
raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1
|
73
|
+
|
74
|
+
path = paths.first || "-" # default to stdin
|
75
|
+
@sort = ::FlatKit::Sort.new(input: path, input_fallback: opts[:input_format],
|
76
|
+
output: opts[:output], output_fallback: opts[:output_format],
|
77
|
+
compare_fields: @compare_keys)
|
78
|
+
rescue ::FlatKit::Error => e
|
79
|
+
raise ::Optimist::CommandlineError, e.message
|
80
80
|
end
|
81
81
|
end
|
82
82
|
|