flat_kit 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -2
- data/HISTORY.md +15 -0
- data/Manifest.txt +21 -26
- data/{bin → exe}/fk +2 -1
- data/flat_kit.gemspec +33 -0
- data/lib/flat_kit/cli.rb +48 -23
- data/lib/flat_kit/command/cat.rb +34 -32
- data/lib/flat_kit/command/merge.rb +37 -36
- data/lib/flat_kit/command/sort.rb +37 -37
- data/lib/flat_kit/command/stats.rb +96 -0
- data/lib/flat_kit/command.rb +10 -10
- data/lib/flat_kit/descendant_tracker.rb +17 -5
- data/lib/flat_kit/error.rb +4 -0
- data/lib/flat_kit/event_emitter.rb +7 -4
- data/lib/flat_kit/field_stats.rb +246 -0
- data/lib/flat_kit/field_type/boolean_type.rb +52 -0
- data/lib/flat_kit/field_type/date_type.rb +181 -0
- data/lib/flat_kit/field_type/float_type.rb +43 -0
- data/lib/flat_kit/field_type/guess_type.rb +23 -0
- data/lib/flat_kit/field_type/integer_type.rb +36 -0
- data/lib/flat_kit/field_type/null_type.rb +39 -0
- data/lib/flat_kit/field_type/string_type.rb +24 -0
- data/lib/flat_kit/field_type/timestamp_type.rb +48 -0
- data/lib/flat_kit/field_type/unknown_type.rb +30 -0
- data/lib/flat_kit/field_type.rb +83 -0
- data/lib/flat_kit/format.rb +11 -5
- data/lib/flat_kit/input/file.rb +11 -9
- data/lib/flat_kit/input/io.rb +18 -21
- data/lib/flat_kit/input.rb +8 -7
- data/lib/flat_kit/internal_node.rb +22 -19
- data/lib/flat_kit/jsonl/format.rb +6 -2
- data/lib/flat_kit/jsonl/reader.rb +7 -4
- data/lib/flat_kit/jsonl/record.rb +16 -19
- data/lib/flat_kit/jsonl/writer.rb +25 -18
- data/lib/flat_kit/jsonl.rb +8 -4
- data/lib/flat_kit/leaf_node.rb +6 -5
- data/lib/flat_kit/log_formatter.rb +20 -0
- data/lib/flat_kit/logger.rb +12 -19
- data/lib/flat_kit/merge.rb +21 -16
- data/lib/flat_kit/merge_tree.rb +5 -6
- data/lib/flat_kit/output/file.rb +13 -9
- data/lib/flat_kit/output/io.rb +40 -35
- data/lib/flat_kit/output.rb +12 -7
- data/lib/flat_kit/position.rb +18 -0
- data/lib/flat_kit/reader.rb +8 -8
- data/lib/flat_kit/record.rb +12 -12
- data/lib/flat_kit/sentinel_internal_node.rb +6 -5
- data/lib/flat_kit/sentinel_leaf_node.rb +4 -1
- data/lib/flat_kit/sort.rb +8 -9
- data/lib/flat_kit/stat_type/nominal_stats.rb +64 -0
- data/lib/flat_kit/stat_type/numerical_stats.rb +120 -0
- data/lib/flat_kit/stat_type/ordinal_stats.rb +37 -0
- data/lib/flat_kit/stat_type.rb +70 -0
- data/lib/flat_kit/stats.rb +64 -0
- data/lib/flat_kit/writer.rb +17 -3
- data/lib/flat_kit/xsv/format.rb +6 -2
- data/lib/flat_kit/xsv/reader.rb +8 -6
- data/lib/flat_kit/xsv/record.rb +21 -15
- data/lib/flat_kit/xsv/writer.rb +36 -18
- data/lib/flat_kit/xsv.rb +7 -4
- data/lib/flat_kit.rb +33 -21
- metadata +38 -113
- data/Rakefile +0 -20
- data/tasks/default.rake +0 -242
- data/tasks/extension.rake +0 -38
- data/tasks/man.rake +0 -7
- data/tasks/this.rb +0 -208
- data/test/device_dataset.rb +0 -117
- data/test/input/test_file.rb +0 -73
- data/test/input/test_io.rb +0 -93
- data/test/jsonl/test_format.rb +0 -22
- data/test/jsonl/test_reader.rb +0 -49
- data/test/jsonl/test_record.rb +0 -61
- data/test/jsonl/test_writer.rb +0 -68
- data/test/output/test_file.rb +0 -60
- data/test/output/test_io.rb +0 -104
- data/test/test_conversions.rb +0 -45
- data/test/test_event_emitter.rb +0 -72
- data/test/test_format.rb +0 -24
- data/test/test_helper.rb +0 -26
- data/test/test_merge.rb +0 -40
- data/test/test_merge_tree.rb +0 -64
- data/test/test_version.rb +0 -11
- data/test/xsv/test_format.rb +0 -22
- data/test/xsv/test_reader.rb +0 -61
- data/test/xsv/test_record.rb +0 -69
- data/test/xsv/test_writer.rb +0 -68
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f30ca1aa08d445cdae1e2da95f32bcf3185e0ab5f003675d777cce2741ed3b8
|
4
|
+
data.tar.gz: 42606f14ad0e846e83734388f8250c0045a80208b9a91034360dc86c088121a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6b318485580e129ff979004c17ef21a4223aaf870224348b26353af7220ba8e57dfe11d21ad0870c9c0ccedd9ebcb1a1b40fc80417fef40dc8d27cdd569c205
|
7
|
+
data.tar.gz: b91c41e7778e598d2b3ab388b063e527dabceab2323c6ceb64ae8a1b68da4f13aceb79e3b400f4bbc56eb41456d717c4a498247777c6839e403f7998901f254e
|
data/CONTRIBUTING.md
CHANGED
@@ -27,8 +27,7 @@ easiest way to contribute.
|
|
27
27
|
* Fork the [repo][].
|
28
28
|
* Create a new branch for your issue: `git checkout -b issue/my-issue`
|
29
29
|
* Lovingly craft your contribution:
|
30
|
-
* `
|
31
|
-
* `bundle install` to install dependencies.
|
30
|
+
* `bin/setup` to bootstrap development.
|
32
31
|
* `rake test` to run tests
|
33
32
|
* Make sure that `rake test` passes. It's important, I said it twice.
|
34
33
|
* Add yourself to the contributors section below.
|
data/HISTORY.md
CHANGED
@@ -1,4 +1,19 @@
|
|
1
1
|
# FlatKit Changelog
|
2
|
+
|
3
|
+
## Version 1.0.0
|
4
|
+
|
5
|
+
* Setup semaphore testing
|
6
|
+
* Fix typos
|
7
|
+
* Fix commandline error if no sub command is given
|
8
|
+
* Add rubocop
|
9
|
+
* Update supported to Ruby 3.0 or greater
|
10
|
+
|
11
|
+
## Version 0.3.0
|
12
|
+
|
13
|
+
* Changing the event listening api to include meta data about the event
|
14
|
+
* Add field type detection
|
15
|
+
* Add a 'stats' command to generate stats about the data file
|
16
|
+
|
2
17
|
## Version 0.2.0
|
3
18
|
|
4
19
|
* add in event listening to allow for additional integrations
|
data/Manifest.txt
CHANGED
@@ -3,17 +3,29 @@ HISTORY.md
|
|
3
3
|
LICENSE.txt
|
4
4
|
Manifest.txt
|
5
5
|
README.md
|
6
|
-
|
7
|
-
|
6
|
+
exe/fk
|
7
|
+
flat_kit.gemspec
|
8
8
|
lib/flat_kit.rb
|
9
9
|
lib/flat_kit/cli.rb
|
10
10
|
lib/flat_kit/command.rb
|
11
11
|
lib/flat_kit/command/cat.rb
|
12
12
|
lib/flat_kit/command/merge.rb
|
13
13
|
lib/flat_kit/command/sort.rb
|
14
|
+
lib/flat_kit/command/stats.rb
|
14
15
|
lib/flat_kit/descendant_tracker.rb
|
15
16
|
lib/flat_kit/error.rb
|
16
17
|
lib/flat_kit/event_emitter.rb
|
18
|
+
lib/flat_kit/field_stats.rb
|
19
|
+
lib/flat_kit/field_type.rb
|
20
|
+
lib/flat_kit/field_type/boolean_type.rb
|
21
|
+
lib/flat_kit/field_type/date_type.rb
|
22
|
+
lib/flat_kit/field_type/float_type.rb
|
23
|
+
lib/flat_kit/field_type/guess_type.rb
|
24
|
+
lib/flat_kit/field_type/integer_type.rb
|
25
|
+
lib/flat_kit/field_type/null_type.rb
|
26
|
+
lib/flat_kit/field_type/string_type.rb
|
27
|
+
lib/flat_kit/field_type/timestamp_type.rb
|
28
|
+
lib/flat_kit/field_type/unknown_type.rb
|
17
29
|
lib/flat_kit/format.rb
|
18
30
|
lib/flat_kit/input.rb
|
19
31
|
lib/flat_kit/input/file.rb
|
@@ -25,44 +37,27 @@ lib/flat_kit/jsonl/reader.rb
|
|
25
37
|
lib/flat_kit/jsonl/record.rb
|
26
38
|
lib/flat_kit/jsonl/writer.rb
|
27
39
|
lib/flat_kit/leaf_node.rb
|
40
|
+
lib/flat_kit/log_formatter.rb
|
28
41
|
lib/flat_kit/logger.rb
|
29
42
|
lib/flat_kit/merge.rb
|
30
43
|
lib/flat_kit/merge_tree.rb
|
31
44
|
lib/flat_kit/output.rb
|
32
45
|
lib/flat_kit/output/file.rb
|
33
46
|
lib/flat_kit/output/io.rb
|
47
|
+
lib/flat_kit/position.rb
|
34
48
|
lib/flat_kit/reader.rb
|
35
49
|
lib/flat_kit/record.rb
|
36
50
|
lib/flat_kit/sentinel_internal_node.rb
|
37
51
|
lib/flat_kit/sentinel_leaf_node.rb
|
38
52
|
lib/flat_kit/sort.rb
|
53
|
+
lib/flat_kit/stat_type.rb
|
54
|
+
lib/flat_kit/stat_type/nominal_stats.rb
|
55
|
+
lib/flat_kit/stat_type/numerical_stats.rb
|
56
|
+
lib/flat_kit/stat_type/ordinal_stats.rb
|
57
|
+
lib/flat_kit/stats.rb
|
39
58
|
lib/flat_kit/writer.rb
|
40
59
|
lib/flat_kit/xsv.rb
|
41
60
|
lib/flat_kit/xsv/format.rb
|
42
61
|
lib/flat_kit/xsv/reader.rb
|
43
62
|
lib/flat_kit/xsv/record.rb
|
44
63
|
lib/flat_kit/xsv/writer.rb
|
45
|
-
tasks/default.rake
|
46
|
-
tasks/extension.rake
|
47
|
-
tasks/man.rake
|
48
|
-
tasks/this.rb
|
49
|
-
test/device_dataset.rb
|
50
|
-
test/input/test_file.rb
|
51
|
-
test/input/test_io.rb
|
52
|
-
test/jsonl/test_format.rb
|
53
|
-
test/jsonl/test_reader.rb
|
54
|
-
test/jsonl/test_record.rb
|
55
|
-
test/jsonl/test_writer.rb
|
56
|
-
test/output/test_file.rb
|
57
|
-
test/output/test_io.rb
|
58
|
-
test/test_conversions.rb
|
59
|
-
test/test_event_emitter.rb
|
60
|
-
test/test_format.rb
|
61
|
-
test/test_helper.rb
|
62
|
-
test/test_merge.rb
|
63
|
-
test/test_merge_tree.rb
|
64
|
-
test/test_version.rb
|
65
|
-
test/xsv/test_format.rb
|
66
|
-
test/xsv/test_reader.rb
|
67
|
-
test/xsv/test_record.rb
|
68
|
-
test/xsv/test_writer.rb
|
data/{bin → exe}/fk
RENAMED
data/flat_kit.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# DO NOT EDIT - This file is automatically generated
|
2
|
+
# Make changes to Manifest.txt and/or Rakefile and regenerate
|
3
|
+
# -*- encoding: utf-8 -*-
|
4
|
+
# stub: flat_kit 1.0.0 ruby lib
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "flat_kit".freeze
|
8
|
+
s.version = "1.0.0".freeze
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
11
|
+
s.metadata = { "bug_tracker_uri" => "https://github.com/copiousfreetime/flat_kit/issues", "changelog_uri" => "https://github.com/copiousfreetime/flat_kit/blob/master/HISTORY.md", "homepage_uri" => "https://github.com/copiousfreetime/flat_kit", "label" => "flat_kit", "rubygems_mfa_required" => "true", "source_code_uri" => "https://github.com/copiousfreetime/flat_kit" } if s.respond_to? :metadata=
|
12
|
+
s.require_paths = ["lib".freeze]
|
13
|
+
s.authors = ["Jeremy Hinegardner".freeze]
|
14
|
+
s.bindir = "exe".freeze
|
15
|
+
s.date = "2024-04-28"
|
16
|
+
s.description = "A library and commandline program for reading, writing, indexing, sorting, and merging CSV, TSV, JSON and other flat-file formats.".freeze
|
17
|
+
s.email = "jeremy@copiousfreetime.org".freeze
|
18
|
+
s.executables = ["fk".freeze]
|
19
|
+
s.extra_rdoc_files = ["CONTRIBUTING.md".freeze, "HISTORY.md".freeze, "LICENSE.txt".freeze, "Manifest.txt".freeze, "README.md".freeze]
|
20
|
+
s.files = ["CONTRIBUTING.md".freeze, "HISTORY.md".freeze, "LICENSE.txt".freeze, "Manifest.txt".freeze, "README.md".freeze, "exe/fk".freeze, "flat_kit.gemspec".freeze, "lib/flat_kit.rb".freeze, "lib/flat_kit/cli.rb".freeze, "lib/flat_kit/command.rb".freeze, "lib/flat_kit/command/cat.rb".freeze, "lib/flat_kit/command/merge.rb".freeze, "lib/flat_kit/command/sort.rb".freeze, "lib/flat_kit/command/stats.rb".freeze, "lib/flat_kit/descendant_tracker.rb".freeze, "lib/flat_kit/error.rb".freeze, "lib/flat_kit/event_emitter.rb".freeze, "lib/flat_kit/field_stats.rb".freeze, "lib/flat_kit/field_type.rb".freeze, "lib/flat_kit/field_type/boolean_type.rb".freeze, "lib/flat_kit/field_type/date_type.rb".freeze, "lib/flat_kit/field_type/float_type.rb".freeze, "lib/flat_kit/field_type/guess_type.rb".freeze, "lib/flat_kit/field_type/integer_type.rb".freeze, "lib/flat_kit/field_type/null_type.rb".freeze, "lib/flat_kit/field_type/string_type.rb".freeze, "lib/flat_kit/field_type/timestamp_type.rb".freeze, "lib/flat_kit/field_type/unknown_type.rb".freeze, "lib/flat_kit/format.rb".freeze, "lib/flat_kit/input.rb".freeze, "lib/flat_kit/input/file.rb".freeze, "lib/flat_kit/input/io.rb".freeze, "lib/flat_kit/internal_node.rb".freeze, "lib/flat_kit/jsonl.rb".freeze, "lib/flat_kit/jsonl/format.rb".freeze, "lib/flat_kit/jsonl/reader.rb".freeze, "lib/flat_kit/jsonl/record.rb".freeze, "lib/flat_kit/jsonl/writer.rb".freeze, "lib/flat_kit/leaf_node.rb".freeze, "lib/flat_kit/log_formatter.rb".freeze, "lib/flat_kit/logger.rb".freeze, "lib/flat_kit/merge.rb".freeze, "lib/flat_kit/merge_tree.rb".freeze, "lib/flat_kit/output.rb".freeze, "lib/flat_kit/output/file.rb".freeze, "lib/flat_kit/output/io.rb".freeze, "lib/flat_kit/position.rb".freeze, "lib/flat_kit/reader.rb".freeze, "lib/flat_kit/record.rb".freeze, "lib/flat_kit/sentinel_internal_node.rb".freeze, "lib/flat_kit/sentinel_leaf_node.rb".freeze, "lib/flat_kit/sort.rb".freeze, "lib/flat_kit/stat_type.rb".freeze, "lib/flat_kit/stat_type/nominal_stats.rb".freeze, "lib/flat_kit/stat_type/numerical_stats.rb".freeze, "lib/flat_kit/stat_type/ordinal_stats.rb".freeze, "lib/flat_kit/stats.rb".freeze, "lib/flat_kit/writer.rb".freeze, "lib/flat_kit/xsv.rb".freeze, "lib/flat_kit/xsv/format.rb".freeze, "lib/flat_kit/xsv/reader.rb".freeze, "lib/flat_kit/xsv/record.rb".freeze, "lib/flat_kit/xsv/writer.rb".freeze]
|
21
|
+
s.homepage = "http://github.com/copiousfreetime/flat_kit".freeze
|
22
|
+
s.licenses = ["MIT".freeze]
|
23
|
+
s.rdoc_options = ["--main".freeze, "README.md".freeze, "--markup".freeze, "tomdoc".freeze]
|
24
|
+
s.required_ruby_version = Gem::Requirement.new(">= 3.0.0".freeze)
|
25
|
+
s.rubygems_version = "3.5.9".freeze
|
26
|
+
s.summary = "A library and commandline program for reading, writing, indexing, sorting, and merging CSV, TSV, JSON and other flat-file formats.".freeze
|
27
|
+
|
28
|
+
s.specification_version = 4
|
29
|
+
|
30
|
+
s.add_runtime_dependency(%q<oj>.freeze, ["~> 3.0".freeze])
|
31
|
+
s.add_runtime_dependency(%q<optimist>.freeze, ["~> 3.0".freeze])
|
32
|
+
s.add_runtime_dependency(%q<csv>.freeze, ["~> 3.3".freeze])
|
33
|
+
end
|
data/lib/flat_kit/cli.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "optimist"
|
4
|
+
require_relative "../flat_kit/command"
|
3
5
|
|
4
6
|
module FlatKit
|
7
|
+
# Public: The the main entry point for the command line interface
|
8
|
+
#
|
5
9
|
class Cli
|
6
10
|
attr_reader :options
|
7
11
|
|
@@ -13,18 +17,18 @@ module FlatKit
|
|
13
17
|
@parser ||= ::Optimist::Parser.new do
|
14
18
|
version ::FlatKit::VERSION
|
15
19
|
|
16
|
-
banner "fk v#{
|
20
|
+
banner "fk v#{version}"
|
17
21
|
|
18
22
|
banner <<~USAGE
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
Usage:
|
25
|
+
fk <command> [<args>...]
|
26
|
+
fk [options]
|
23
27
|
USAGE
|
24
28
|
|
25
29
|
banner <<~OPTIONS
|
26
30
|
|
27
|
-
|
31
|
+
Options:
|
28
32
|
|
29
33
|
OPTIONS
|
30
34
|
|
@@ -40,8 +44,8 @@ module FlatKit
|
|
40
44
|
end
|
41
45
|
|
42
46
|
def self.commands_banner
|
43
|
-
sorted_commands = FlatKit::Command.children.sort_by
|
44
|
-
left_width = sorted_commands.map { |c| c.name.length }.
|
47
|
+
sorted_commands = FlatKit::Command.children.sort_by(&:name)
|
48
|
+
left_width = sorted_commands.map { |c| c.name.length }.max
|
45
49
|
banner = StringIO.new
|
46
50
|
banner.puts
|
47
51
|
banner.puts "Commands:"
|
@@ -54,27 +58,48 @@ module FlatKit
|
|
54
58
|
end
|
55
59
|
|
56
60
|
def run(argv: ARGV, env: ENV)
|
57
|
-
opts =
|
61
|
+
opts = parse_opts(argv)
|
62
|
+
init_logging(opts)
|
63
|
+
::FlatKit.logger.debug(argv)
|
64
|
+
|
65
|
+
command_name = argv.shift
|
66
|
+
exit_if_help(command_name)
|
67
|
+
|
68
|
+
command_klass = command_klass_or_exit(command_name)
|
69
|
+
command = command_klass.new(argv: argv, logger: ::FlatKit.logger, env: env)
|
70
|
+
command.call
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def parse_opts(argv)
|
76
|
+
::Optimist.with_standard_exception_handling(parser) do
|
58
77
|
parser.parse(argv)
|
59
78
|
end
|
79
|
+
end
|
60
80
|
|
61
|
-
|
62
|
-
|
63
|
-
end
|
81
|
+
def init_logging(opts)
|
82
|
+
::FlatKit.log_to(opts[:log]) if opts[:log_given]
|
64
83
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
end
|
84
|
+
::FlatKit.logger.level = opts[:verbose] ? :debug : :info
|
85
|
+
|
86
|
+
::FlatKit.logger.debug(opts)
|
87
|
+
end
|
70
88
|
|
71
|
-
|
72
|
-
|
89
|
+
def exit_if_help(command_name)
|
90
|
+
return unless command_name.nil? || command_name.downcase == "help"
|
73
91
|
|
74
|
-
|
92
|
+
parser.educate
|
93
|
+
exit 0
|
94
|
+
end
|
95
|
+
|
96
|
+
def command_class_or_exit(command_name)
|
75
97
|
command_klass = FlatKit::Command.for(command_name)
|
76
|
-
|
77
|
-
|
98
|
+
return command_klass unless command_klass.nil?
|
99
|
+
|
100
|
+
$stdout.puts "ERROR: Unknown command '#{command_name}'"
|
101
|
+
parser.educate
|
102
|
+
exit 0
|
78
103
|
end
|
79
104
|
end
|
80
105
|
end
|
data/lib/flat_kit/command/cat.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module FlatKit
|
2
4
|
class Command
|
5
|
+
# Internal: The implementation of the cat command.
|
6
|
+
#
|
3
7
|
# TODO: Implement the --flatten commandline switch
|
4
8
|
class Cat < ::FlatKit::Command
|
5
9
|
def self.name
|
@@ -12,43 +16,43 @@ module FlatKit
|
|
12
16
|
|
13
17
|
def self.parser
|
14
18
|
::Optimist::Parser.new do
|
15
|
-
banner
|
19
|
+
banner Cat.description.to_s
|
16
20
|
banner ""
|
17
21
|
|
18
22
|
banner <<~BANNER
|
19
|
-
|
20
|
-
|
21
|
-
|
23
|
+
Concatenates files that have the same field structure together into
|
24
|
+
a single file. The files can be of different formats, but must have
|
25
|
+
the same fields and names.
|
22
26
|
|
23
|
-
|
24
|
-
|
27
|
+
This is probably most easily usable as a way to convert CSV to JSON
|
28
|
+
and vice versa.
|
25
29
|
|
26
|
-
|
27
|
-
|
28
|
-
|
30
|
+
The flatfile type(s) will be automatically determined by the file name.
|
31
|
+
If the inputs or output is not a file, but from stdin or stdout then
|
32
|
+
the input and output types must be specified.
|
29
33
|
|
30
|
-
|
31
|
-
|
32
|
-
|
34
|
+
NOTE: If converting from JSON to CSV and the input JSON does not have
|
35
|
+
every possible field on ever record, then the output csv iwll
|
36
|
+
be corrupted.
|
33
37
|
|
34
|
-
|
35
|
-
|
36
|
-
|
38
|
+
In this case the input json should be fed through 'flatten' first
|
39
|
+
or use the '--flatten' flag which will require an additional pass
|
40
|
+
through the input to gather all the fields
|
37
41
|
BANNER
|
38
42
|
|
39
43
|
banner <<~USAGE
|
40
44
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
Usage:
|
46
|
+
fk cat file1.csv file2.csv > combinded.csv
|
47
|
+
fk cat --output-format json file1.csv
|
48
|
+
fk cat file1.csv.gzip -o file2.json.gzip
|
49
|
+
fk cat file1.csv.gzip --output-format json | gzip -c > file1.jsonl.gz
|
46
50
|
|
47
51
|
USAGE
|
48
52
|
|
49
53
|
banner <<~OPTIONS
|
50
54
|
|
51
|
-
|
55
|
+
Options:
|
52
56
|
|
53
57
|
OPTIONS
|
54
58
|
|
@@ -60,17 +64,15 @@ module FlatKit
|
|
60
64
|
|
61
65
|
def parse
|
62
66
|
parser = self.class.parser
|
63
|
-
::Optimist
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
raise ::Optimist::CommandlineError, e.message
|
73
|
-
end
|
67
|
+
::Optimist.with_standard_exception_handling(parser) do
|
68
|
+
@opts = parser.parse(argv)
|
69
|
+
paths = parser.leftovers
|
70
|
+
|
71
|
+
@readers = ::FlatKit::Reader.create_readers_from_paths(paths: paths, fallback: opts[:input_format])
|
72
|
+
@writer = ::FlatKit::Writer.create_writer_from_path(path: opts[:output], fallback: opts[:output_format],
|
73
|
+
reader_format: @readers.first.format_name)
|
74
|
+
rescue ::FlatKit::Error => e
|
75
|
+
raise ::Optimist::CommandlineError, e.message
|
74
76
|
end
|
75
77
|
end
|
76
78
|
|
@@ -1,8 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
2
4
|
module FlatKit
|
3
5
|
class Command
|
6
|
+
# Internal: The implementation of the merge command.
|
7
|
+
#
|
4
8
|
class Merge < ::FlatKit::Command
|
5
|
-
|
6
9
|
def self.name
|
7
10
|
"merge"
|
8
11
|
end
|
@@ -13,43 +16,43 @@ module FlatKit
|
|
13
16
|
|
14
17
|
def self.parser
|
15
18
|
::Optimist::Parser.new do
|
16
|
-
banner
|
19
|
+
banner Merge.description.to_s
|
17
20
|
banner ""
|
18
21
|
|
19
22
|
banner <<~BANNER
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
Given a set of input files that have the same structure, and are already
|
24
|
+
sorted by a set of keys. The Merge command will merge all those files
|
25
|
+
into a single output file.
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
The --key parameter is required, and it must be a comma separated list
|
28
|
+
of field nams on the input on which to use as the sort key for the merge
|
29
|
+
process.
|
27
30
|
|
28
|
-
|
29
|
-
|
31
|
+
There must also be at least 2 input files. Merging only 1 file into an
|
32
|
+
output file is the same as the 'cat' command.
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
34
|
+
The flatfile type(s) will be automatically determined by the file name.
|
35
|
+
If the output is not a file, but to stdout then the output type will
|
36
|
+
be the same as the first input file, or it can be specified as a commandline
|
37
|
+
switch.
|
35
38
|
|
36
|
-
|
37
|
-
|
39
|
+
The merge will do a single pass through the input to generate the
|
40
|
+
output.
|
38
41
|
BANNER
|
39
42
|
|
40
43
|
banner <<~USAGE
|
41
44
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
Usage:
|
46
|
+
fk merge --key surname,given_name file1.csv file2.csv > all.csv
|
47
|
+
fk merge --key surname,given_name --output-format json file1.csv file2.csv > all.json
|
48
|
+
fk merge --key field1,field2 --output-format json input*.csv | gzip -c > all.json.gz
|
49
|
+
fk merge --key field12 file*.json.gz -o all.json.gz
|
47
50
|
|
48
51
|
USAGE
|
49
52
|
|
50
53
|
banner <<~OPTIONS
|
51
54
|
|
52
|
-
|
55
|
+
Options:
|
53
56
|
|
54
57
|
OPTIONS
|
55
58
|
|
@@ -64,19 +67,17 @@ module FlatKit
|
|
64
67
|
|
65
68
|
def parse
|
66
69
|
parser = self.class.parser
|
67
|
-
::Optimist
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
raise ::Optimist::CommandlineError, e.message
|
79
|
-
end
|
70
|
+
::Optimist.with_standard_exception_handling(parser) do
|
71
|
+
@opts = parser.parse(argv)
|
72
|
+
@compare_keys = CSV.parse_line(opts[:key])
|
73
|
+
paths = parser.leftovers
|
74
|
+
raise ::Optimist::CommandlineError, "At least 2 input files are required" if paths.size < 2
|
75
|
+
|
76
|
+
@merge = ::FlatKit::Merge.new(inputs: paths, input_fallback: opts[:input_format],
|
77
|
+
compare_fields: @compare_keys,
|
78
|
+
output: opts[:output], output_fallback: opts[:output_format])
|
79
|
+
rescue ::FlatKit::Error => e
|
80
|
+
raise ::Optimist::CommandlineError, e.message
|
80
81
|
end
|
81
82
|
end
|
82
83
|
|
@@ -1,8 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
2
4
|
module FlatKit
|
3
5
|
class Command
|
6
|
+
# Internal: The implementation of the sort command.
|
7
|
+
#
|
4
8
|
class Sort < ::FlatKit::Command
|
5
|
-
|
6
9
|
def self.name
|
7
10
|
"sort"
|
8
11
|
end
|
@@ -13,41 +16,41 @@ module FlatKit
|
|
13
16
|
|
14
17
|
def self.parser
|
15
18
|
::Optimist::Parser.new do
|
16
|
-
banner
|
19
|
+
banner Sort.description.to_s
|
17
20
|
banner ""
|
18
21
|
|
19
22
|
banner <<~BANNER
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
Given an input file and a sort key, order the records in that file by that
|
24
|
+
key. If no input file is given the stdin is assumed. If no output file
|
25
|
+
is given then stdout is assumed.
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
The --key parameter is required, and it must be a comma separated list
|
28
|
+
of field nams on the input on which to use as the sort key for the merge
|
29
|
+
process.
|
27
30
|
|
28
|
-
|
31
|
+
There must also be only 1 input files.
|
29
32
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
33
|
+
The flatfile type(s) will be automatically determined by the file name.
|
34
|
+
If the output is not a file, but to stdout then the output type will
|
35
|
+
be the same as the first input file, or it can be specified as a commandline
|
36
|
+
switch.
|
34
37
|
|
35
38
|
BANNER
|
36
39
|
|
37
40
|
banner <<~USAGE
|
38
41
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
Usage:
|
43
|
+
fk sort --key surname,given_name file.csv > sorted.csv
|
44
|
+
fk sort --key surname,given_name --output-format json file.csv > sorted.json
|
45
|
+
fk sort --key field1,field2 --output-format json input.csv | gzip -c > sorted.json.gz
|
46
|
+
fk sort --key field1 file.json.gz -o sorted.json.gz
|
47
|
+
gunzip -c file.json.gz | fk sort --key field1 --input-format json --output-format json > gzip -c sorted.json.gz
|
45
48
|
|
46
49
|
USAGE
|
47
50
|
|
48
51
|
banner <<~OPTIONS
|
49
52
|
|
50
|
-
|
53
|
+
Options:
|
51
54
|
|
52
55
|
OPTIONS
|
53
56
|
|
@@ -58,25 +61,22 @@ module FlatKit
|
|
58
61
|
end
|
59
62
|
end
|
60
63
|
|
61
|
-
attr_reader :compare_keys
|
62
|
-
attr_reader :reader
|
63
|
-
attr_reader :sort
|
64
|
+
attr_reader :compare_keys, :reader, :sort
|
64
65
|
|
65
66
|
def parse
|
66
67
|
parser = self.class.parser
|
67
|
-
::Optimist
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
end
|
68
|
+
::Optimist.with_standard_exception_handling(parser) do
|
69
|
+
@opts = parser.parse(argv)
|
70
|
+
@compare_keys = CSV.parse_line(opts[:key])
|
71
|
+
paths = parser.leftovers
|
72
|
+
raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1
|
73
|
+
|
74
|
+
path = paths.first || "-" # default to stdin
|
75
|
+
@sort = ::FlatKit::Sort.new(input: path, input_fallback: opts[:input_format],
|
76
|
+
output: opts[:output], output_fallback: opts[:output_format],
|
77
|
+
compare_fields: @compare_keys)
|
78
|
+
rescue ::FlatKit::Error => e
|
79
|
+
raise ::Optimist::CommandlineError, e.message
|
80
80
|
end
|
81
81
|
end
|
82
82
|
|