flat_kit 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CONTRIBUTING.md +46 -0
- data/HISTORY.md +5 -0
- data/LICENSE.txt +21 -0
- data/Manifest.txt +66 -0
- data/README.md +80 -0
- data/Rakefile +20 -0
- data/bin/fk +4 -0
- data/lib/flat_kit.rb +23 -0
- data/lib/flat_kit/cli.rb +80 -0
- data/lib/flat_kit/command.rb +53 -0
- data/lib/flat_kit/command/cat.rb +93 -0
- data/lib/flat_kit/command/merge.rb +88 -0
- data/lib/flat_kit/command/sort.rb +88 -0
- data/lib/flat_kit/descendant_tracker.rb +27 -0
- data/lib/flat_kit/error.rb +5 -0
- data/lib/flat_kit/format.rb +34 -0
- data/lib/flat_kit/input.rb +32 -0
- data/lib/flat_kit/input/file.rb +53 -0
- data/lib/flat_kit/input/io.rb +54 -0
- data/lib/flat_kit/internal_node.rb +84 -0
- data/lib/flat_kit/jsonl.rb +8 -0
- data/lib/flat_kit/jsonl/format.rb +25 -0
- data/lib/flat_kit/jsonl/reader.rb +30 -0
- data/lib/flat_kit/jsonl/record.rb +84 -0
- data/lib/flat_kit/jsonl/writer.rb +45 -0
- data/lib/flat_kit/leaf_node.rb +71 -0
- data/lib/flat_kit/logger.rb +39 -0
- data/lib/flat_kit/merge.rb +35 -0
- data/lib/flat_kit/merge_tree.rb +104 -0
- data/lib/flat_kit/output.rb +32 -0
- data/lib/flat_kit/output/file.rb +55 -0
- data/lib/flat_kit/output/io.rb +73 -0
- data/lib/flat_kit/reader.rb +61 -0
- data/lib/flat_kit/record.rb +83 -0
- data/lib/flat_kit/sentinel_internal_node.rb +37 -0
- data/lib/flat_kit/sentinel_leaf_node.rb +37 -0
- data/lib/flat_kit/sort.rb +35 -0
- data/lib/flat_kit/writer.rb +38 -0
- data/lib/flat_kit/xsv.rb +8 -0
- data/lib/flat_kit/xsv/format.rb +25 -0
- data/lib/flat_kit/xsv/reader.rb +45 -0
- data/lib/flat_kit/xsv/record.rb +90 -0
- data/lib/flat_kit/xsv/writer.rb +70 -0
- data/tasks/default.rake +242 -0
- data/tasks/extension.rake +38 -0
- data/tasks/man.rake +7 -0
- data/tasks/this.rb +208 -0
- data/test/device_dataset.rb +117 -0
- data/test/input/test_file.rb +73 -0
- data/test/input/test_io.rb +93 -0
- data/test/jsonl/test_format.rb +22 -0
- data/test/jsonl/test_reader.rb +49 -0
- data/test/jsonl/test_record.rb +61 -0
- data/test/jsonl/test_writer.rb +68 -0
- data/test/output/test_file.rb +60 -0
- data/test/output/test_io.rb +104 -0
- data/test/test_conversions.rb +45 -0
- data/test/test_format.rb +24 -0
- data/test/test_helper.rb +26 -0
- data/test/test_merge.rb +40 -0
- data/test/test_merge_tree.rb +64 -0
- data/test/test_version.rb +11 -0
- data/test/xsv/test_format.rb +22 -0
- data/test/xsv/test_reader.rb +61 -0
- data/test/xsv/test_record.rb +69 -0
- data/test/xsv/test_writer.rb +68 -0
- metadata +237 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: fa0b52b3ab12e9cf86492571258daf5d98dde967c25d5aad65b74a6003900fa2
|
4
|
+
data.tar.gz: fcd1abff76e54854d86b131aa6773d411f4d54acbf2e48b541c69c72dd2c1149
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 072e966f06fe03de16b686f469f0e3ac84aa2cc93910cc61210875e6d9cbf0bb3cc24bb1cf4f0863b90f2d02110abcdb0f88bbfa72777f2c19b2cce30c847e45
|
7
|
+
data.tar.gz: f5f67f8818e5cce8da7b547a3ec652d6dc288f9b66a957a62519878d819c893285ea3e81e7a32286de0d614412416f7ba38d8c28e7807f12add9ec00b3241e92
|
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# Hi there!
|
2
|
+
|
3
|
+
I see you are interested in contributing. That is wonderful. I love
|
4
|
+
contributions.
|
5
|
+
|
6
|
+
I guarantee that there are bugs in this software. And I guarantee that there is
|
7
|
+
a feature you want that is not in here yet. As such, any and all bugs reports
|
8
|
+
are gratefully accepted, bugfixes even more so. Helping out with bugs is the
|
9
|
+
easiest way to contribute.
|
10
|
+
|
11
|
+
|
12
|
+
## The Quick Version
|
13
|
+
|
14
|
+
* Have a [GitHub Account][].
|
15
|
+
* Search the [GitHub Issues][] and see if your issue already present. If so
|
16
|
+
add your comments, :thumbsup:, etc.
|
17
|
+
* Issue not there? Not a problem, open up a [new issue][].
|
18
|
+
* **Bug reports** please be as detailed as possible. Include:
|
19
|
+
* full ruby engine and version: `ruby -e 'puts RUBY_DESCRIPTION'`
|
20
|
+
* operating system and version
|
21
|
+
* version of flat_kit `ruby -e "require 'flat_kit'; puts FlatKit::VERSION"`
|
22
|
+
* as much detail about the bug as possible so I can replicate it. Feel free
|
23
|
+
to link in a [gist][]
|
24
|
+
* **New Feature**
|
25
|
+
* What the new feature should do.
|
26
|
+
* What benefit the new feature brings to the project.
|
27
|
+
* Fork the [repo][].
|
28
|
+
* Create a new branch for your issue: `git checkout -b issue/my-issue`
|
29
|
+
* Lovingly craft your contribution:
|
30
|
+
* `rake develop` to bootstrap development. (skip if you already have bundler installed)
|
31
|
+
* `bundle install` to install dependencies.
|
32
|
+
* `rake test` to run tests
|
33
|
+
* Make sure that `rake test` passes. It's important, I said it twice.
|
34
|
+
* Add yourself to the contributors section below.
|
35
|
+
* Submit your [pull request][].
|
36
|
+
|
37
|
+
# Contributors
|
38
|
+
|
39
|
+
* [Jeremy Hinegardner](https://github.com/copiousfreetime)
|
40
|
+
|
41
|
+
[GitHub Account]: https://github.com/signup/free "GitHub Signup"
|
42
|
+
[GitHub Issues]: https://github.com/copiousfreetime/flat_kit/issues "FlatKit Issues"
|
43
|
+
[new issue]: https://github.com/copiousfreetime/flat_kit/issues/new "New FlatKit Issue"
|
44
|
+
[gist]: https://gist.github.com/ "New Gist"
|
45
|
+
[repo]: https://github.com/copiousfreetime/flat_kit "FlatKit Repo"
|
46
|
+
[pull request]: https://help.github.com/articles/using-pull-requests "Using Pull Requests"
|
data/HISTORY.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2021 Jeremy Hinegardner
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/Manifest.txt
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
CONTRIBUTING.md
|
2
|
+
HISTORY.md
|
3
|
+
LICENSE.txt
|
4
|
+
Manifest.txt
|
5
|
+
README.md
|
6
|
+
Rakefile
|
7
|
+
bin/fk
|
8
|
+
lib/flat_kit.rb
|
9
|
+
lib/flat_kit/cli.rb
|
10
|
+
lib/flat_kit/command.rb
|
11
|
+
lib/flat_kit/command/cat.rb
|
12
|
+
lib/flat_kit/command/merge.rb
|
13
|
+
lib/flat_kit/command/sort.rb
|
14
|
+
lib/flat_kit/descendant_tracker.rb
|
15
|
+
lib/flat_kit/error.rb
|
16
|
+
lib/flat_kit/format.rb
|
17
|
+
lib/flat_kit/input.rb
|
18
|
+
lib/flat_kit/input/file.rb
|
19
|
+
lib/flat_kit/input/io.rb
|
20
|
+
lib/flat_kit/internal_node.rb
|
21
|
+
lib/flat_kit/jsonl.rb
|
22
|
+
lib/flat_kit/jsonl/format.rb
|
23
|
+
lib/flat_kit/jsonl/reader.rb
|
24
|
+
lib/flat_kit/jsonl/record.rb
|
25
|
+
lib/flat_kit/jsonl/writer.rb
|
26
|
+
lib/flat_kit/leaf_node.rb
|
27
|
+
lib/flat_kit/logger.rb
|
28
|
+
lib/flat_kit/merge.rb
|
29
|
+
lib/flat_kit/merge_tree.rb
|
30
|
+
lib/flat_kit/output.rb
|
31
|
+
lib/flat_kit/output/file.rb
|
32
|
+
lib/flat_kit/output/io.rb
|
33
|
+
lib/flat_kit/reader.rb
|
34
|
+
lib/flat_kit/record.rb
|
35
|
+
lib/flat_kit/sentinel_internal_node.rb
|
36
|
+
lib/flat_kit/sentinel_leaf_node.rb
|
37
|
+
lib/flat_kit/sort.rb
|
38
|
+
lib/flat_kit/writer.rb
|
39
|
+
lib/flat_kit/xsv.rb
|
40
|
+
lib/flat_kit/xsv/format.rb
|
41
|
+
lib/flat_kit/xsv/reader.rb
|
42
|
+
lib/flat_kit/xsv/record.rb
|
43
|
+
lib/flat_kit/xsv/writer.rb
|
44
|
+
tasks/default.rake
|
45
|
+
tasks/extension.rake
|
46
|
+
tasks/man.rake
|
47
|
+
tasks/this.rb
|
48
|
+
test/device_dataset.rb
|
49
|
+
test/input/test_file.rb
|
50
|
+
test/input/test_io.rb
|
51
|
+
test/jsonl/test_format.rb
|
52
|
+
test/jsonl/test_reader.rb
|
53
|
+
test/jsonl/test_record.rb
|
54
|
+
test/jsonl/test_writer.rb
|
55
|
+
test/output/test_file.rb
|
56
|
+
test/output/test_io.rb
|
57
|
+
test/test_conversions.rb
|
58
|
+
test/test_format.rb
|
59
|
+
test/test_helper.rb
|
60
|
+
test/test_merge.rb
|
61
|
+
test/test_merge_tree.rb
|
62
|
+
test/test_version.rb
|
63
|
+
test/xsv/test_format.rb
|
64
|
+
test/xsv/test_reader.rb
|
65
|
+
test/xsv/test_record.rb
|
66
|
+
test/xsv/test_writer.rb
|
data/README.md
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
## FlatKit
|
2
|
+
|
3
|
+
* [Homepage](https://github.com/copiousfreetime/flat_kit/)
|
4
|
+
* [Github Project](https://github.com/copiousfreetime/flat_kit)
|
5
|
+
|
6
|
+
## DESCRIPTION
|
7
|
+
|
8
|
+
A library and commandline program for reading, writing, indexing,
|
9
|
+
sorting, and merging CSV, TSV, JSON and other flat-file formats.
|
10
|
+
|
11
|
+
## FEATURES
|
12
|
+
|
13
|
+
* Currently supporting CSV/TSV/XSV and JSON formats
|
14
|
+
* Transparently handles gzipped compressed input or output
|
15
|
+
* Sort records based upon the named keys of your choice
|
16
|
+
* Efficent singles pass merge of any number of sorted input files into a
|
17
|
+
single output file.
|
18
|
+
* Both a commandline tool and a ruby library to utilize in your own programs
|
19
|
+
|
20
|
+
|
21
|
+
```
|
22
|
+
Usage:
|
23
|
+
fk <command> [<args>...]
|
24
|
+
fk [options]
|
25
|
+
|
26
|
+
Options:
|
27
|
+
-v, --verbose Force debug. Output lots of informtion to standard error
|
28
|
+
--list List all the commands
|
29
|
+
--log=<s> Set the logger output location (default: <stderr>)
|
30
|
+
-h, --help Show help message
|
31
|
+
--version Print version and exit
|
32
|
+
|
33
|
+
Commands:
|
34
|
+
|
35
|
+
cat Concatenate files together that have the same structure.
|
36
|
+
merge Merge sorted files together that have the same structure.
|
37
|
+
sort Sort a given file by a set of fields.
|
38
|
+
```
|
39
|
+
|
40
|
+
## EXAMPLES
|
41
|
+
|
42
|
+
**Convert input csv files into a single output json file**
|
43
|
+
|
44
|
+
fk cat files/*csv -o output.json
|
45
|
+
|
46
|
+
**Sort an input json file and output it as a compressed csv**
|
47
|
+
|
48
|
+
fk sort --key year,month,day input.json -o output.csv.gz
|
49
|
+
|
50
|
+
**Merge an entire directory of sorted record compressed csv files into a compress json file**
|
51
|
+
|
52
|
+
fk merge --key category,timestamp sorted/*.csv.gz -o sorted.json.gz
|
53
|
+
|
54
|
+
## CREDITS / REFERENCES
|
55
|
+
|
56
|
+
* https://github.com/BurntSushi/xsv - lots of concepts and ideas pulled from xsv
|
57
|
+
|
58
|
+
## MIT LICENSE
|
59
|
+
|
60
|
+
MIT License
|
61
|
+
|
62
|
+
Copyright (c) 2021 Jeremy Hinegardner
|
63
|
+
|
64
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
65
|
+
of this software and associated documentation files (the "Software"), to deal
|
66
|
+
in the Software without restriction, including without limitation the rights
|
67
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
68
|
+
copies of the Software, and to permit persons to whom the Software is
|
69
|
+
furnished to do so, subject to the following conditions:
|
70
|
+
|
71
|
+
The above copyright notice and this permission notice shall be included in all
|
72
|
+
copies or substantial portions of the Software.
|
73
|
+
|
74
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
75
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
76
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
77
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
78
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
79
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
80
|
+
SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# vim: syntax=ruby
|
2
|
+
load 'tasks/this.rb'
|
3
|
+
|
4
|
+
This.name = "flat_kit"
|
5
|
+
This.author = "Jeremy Hinegardner"
|
6
|
+
This.email = "jeremy@copiousfreetime.org"
|
7
|
+
This.homepage = "http://github.com/copiousfreetime/#{ This.name }"
|
8
|
+
|
9
|
+
This.ruby_gemspec do |spec|
|
10
|
+
spec.add_dependency('oj', '~> 3.0')
|
11
|
+
spec.add_dependency('optimist', '~> 3.0')
|
12
|
+
|
13
|
+
spec.add_development_dependency( 'rake' , '~> 13.0')
|
14
|
+
spec.add_development_dependency( 'minitest' , '~> 5.11' )
|
15
|
+
spec.add_development_dependency( 'minitest-focus' , '~> 1.2' )
|
16
|
+
spec.add_development_dependency( 'rdoc' , '~> 6.3' )
|
17
|
+
spec.add_development_dependency( 'simplecov', '~> 0.21')
|
18
|
+
end
|
19
|
+
|
20
|
+
load 'tasks/default.rake'
|
data/bin/fk
ADDED
data/lib/flat_kit.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module FlatKit
|
2
|
+
VERSION = "0.1.0"
|
3
|
+
end
|
4
|
+
require 'flat_kit/error'
|
5
|
+
require 'flat_kit/logger'
|
6
|
+
require 'flat_kit/descendant_tracker'
|
7
|
+
require 'flat_kit/format'
|
8
|
+
require 'flat_kit/record'
|
9
|
+
require 'flat_kit/reader'
|
10
|
+
require 'flat_kit/writer'
|
11
|
+
require 'flat_kit/input'
|
12
|
+
require 'flat_kit/output'
|
13
|
+
require 'flat_kit/cli'
|
14
|
+
require 'flat_kit/xsv'
|
15
|
+
require 'flat_kit/jsonl'
|
16
|
+
require 'flat_kit/merge'
|
17
|
+
require 'flat_kit/sort'
|
18
|
+
|
19
|
+
require 'flat_kit/merge_tree'
|
20
|
+
require 'flat_kit/internal_node'
|
21
|
+
require 'flat_kit/sentinel_internal_node'
|
22
|
+
require 'flat_kit/sentinel_leaf_node'
|
23
|
+
require 'flat_kit/leaf_node'
|
data/lib/flat_kit/cli.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'optimist'
|
2
|
+
require_relative '../flat_kit/command'
|
3
|
+
|
4
|
+
module FlatKit
|
5
|
+
class Cli
|
6
|
+
attr_reader :options
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@parser = nil
|
10
|
+
end
|
11
|
+
|
12
|
+
def parser
|
13
|
+
@parser ||= ::Optimist::Parser.new do
|
14
|
+
version ::FlatKit::VERSION
|
15
|
+
|
16
|
+
banner "fk v#{self.version}"
|
17
|
+
|
18
|
+
banner <<~USAGE
|
19
|
+
|
20
|
+
Usage:
|
21
|
+
fk <command> [<args>...]
|
22
|
+
fk [options]
|
23
|
+
USAGE
|
24
|
+
|
25
|
+
banner <<~OPTIONS
|
26
|
+
|
27
|
+
Options:
|
28
|
+
|
29
|
+
OPTIONS
|
30
|
+
|
31
|
+
opt :verbose, "Force debug. Output lots of informtion to standard error", default: false
|
32
|
+
opt :list, "List all the commands", default: false, short: :none
|
33
|
+
opt :log, "Set the logger output location", default: "<stderr>", short: :none
|
34
|
+
opt :help, "Show help message", short: :h
|
35
|
+
opt :version, "Print version and exit", short: :none
|
36
|
+
|
37
|
+
stop_on FlatKit::Command.names
|
38
|
+
banner Cli.commands_banner
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.commands_banner
|
43
|
+
sorted_commands = FlatKit::Command.children.sort_by{ |c| c.name }
|
44
|
+
left_width = sorted_commands.map { |c| c.name.length }.sort.last
|
45
|
+
banner = StringIO.new
|
46
|
+
banner.puts
|
47
|
+
banner.puts "Commands:"
|
48
|
+
banner.puts
|
49
|
+
|
50
|
+
sorted_commands.each do |command|
|
51
|
+
banner.puts " #{command.name.ljust(left_width)} #{command.description}"
|
52
|
+
end
|
53
|
+
banner.string
|
54
|
+
end
|
55
|
+
|
56
|
+
def run(argv: ARGV, env: ENV)
|
57
|
+
opts = ::Optimist::with_standard_exception_handling(parser) do
|
58
|
+
parser.parse(argv)
|
59
|
+
end
|
60
|
+
|
61
|
+
if opts[:log_given] then
|
62
|
+
::FlatKit.log_to(opts[:log])
|
63
|
+
end
|
64
|
+
|
65
|
+
if opts[:verbose] then
|
66
|
+
::FlatKit.logger.level = :debug
|
67
|
+
else
|
68
|
+
::FlatKit.logger.level = :info
|
69
|
+
end
|
70
|
+
|
71
|
+
::FlatKit.logger.debug opts
|
72
|
+
::FlatKit.logger.debug argv
|
73
|
+
|
74
|
+
command_name = argv.shift
|
75
|
+
command_klass = FlatKit::Command.for(command_name)
|
76
|
+
command = command_klass.new(argv: argv, logger: ::FlatKit.logger, env: env)
|
77
|
+
command.call
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module FlatKit
|
2
|
+
class Command
|
3
|
+
extend DescendantTracker
|
4
|
+
|
5
|
+
attr_reader :argv
|
6
|
+
attr_reader :env
|
7
|
+
attr_reader :logger
|
8
|
+
attr_reader :opts
|
9
|
+
attr_reader :readers
|
10
|
+
attr_reader :writer
|
11
|
+
|
12
|
+
def self.name
|
13
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}.name"
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.description
|
17
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}.description"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.parser
|
21
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}.parser"
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.names
|
25
|
+
children.map { |c| c.name }
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.for(name)
|
29
|
+
children.find do |child_klass|
|
30
|
+
child_klass.name == name
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize(argv:, logger: ::FlatKit.logger, env: ENV)
|
35
|
+
@argv = argv
|
36
|
+
@env = env
|
37
|
+
@logger = logger
|
38
|
+
parse
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse
|
42
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}#parse"
|
43
|
+
end
|
44
|
+
|
45
|
+
def call
|
46
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}.description"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
require 'flat_kit/command/cat'
|
52
|
+
require 'flat_kit/command/merge'
|
53
|
+
require 'flat_kit/command/sort'
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module FlatKit
|
2
|
+
class Command
|
3
|
+
# TODO: Implement the --flatten commandline switch
|
4
|
+
class Cat < ::FlatKit::Command
|
5
|
+
def self.name
|
6
|
+
"cat"
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.description
|
10
|
+
"Concatenate files together that have the same structure."
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parser
|
14
|
+
::Optimist::Parser.new do
|
15
|
+
banner "#{Cat.description}"
|
16
|
+
banner ""
|
17
|
+
|
18
|
+
banner <<~BANNER
|
19
|
+
Concatenates files that have the same field structure together into
|
20
|
+
a single file. The files can be of different formats, but must have
|
21
|
+
the same fields and names.
|
22
|
+
|
23
|
+
This is probably most easily usable as a way to convert CSV to JSON
|
24
|
+
and vice versa.
|
25
|
+
|
26
|
+
The flatfile type(s) will be automatically determined by the file name.
|
27
|
+
If the inputs or output is not a file, but from stdin or stdout then
|
28
|
+
the input and output types must be specified.
|
29
|
+
|
30
|
+
NOTE: If converting from JSON to CSV and the input JSON does not have
|
31
|
+
every possible field on ever record, then the output csv iwll
|
32
|
+
be corrupted.
|
33
|
+
|
34
|
+
In this case the input json should be fed through 'flatten' first
|
35
|
+
or use the '--flatten' flag which will require an additional pass
|
36
|
+
through the input to gather all the fields
|
37
|
+
BANNER
|
38
|
+
|
39
|
+
banner <<~USAGE
|
40
|
+
|
41
|
+
Usage:
|
42
|
+
fk cat file1.csv file2.csv > combinded.csv
|
43
|
+
fk cat --output-format json file1.csv
|
44
|
+
fk cat file1.csv.gzip -o file2.json.gzip
|
45
|
+
fk cat file1.csv.gzip --output-format json | gzip -c > file1.jsonl.gz
|
46
|
+
|
47
|
+
USAGE
|
48
|
+
|
49
|
+
banner <<~OPTIONS
|
50
|
+
|
51
|
+
Options:
|
52
|
+
|
53
|
+
OPTIONS
|
54
|
+
|
55
|
+
opt :output, "Send the output to the given path instead of standard out.", default: "<stdout>"
|
56
|
+
opt :input_format, "Input format, csv or json", default: "auto", short: :none
|
57
|
+
opt :output_format, "Output format, csv or json", default: "auto", short: :none
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def parse
|
62
|
+
parser = self.class.parser
|
63
|
+
::Optimist::with_standard_exception_handling(parser) do
|
64
|
+
begin
|
65
|
+
@opts = parser.parse(argv)
|
66
|
+
paths = parser.leftovers
|
67
|
+
|
68
|
+
@readers = ::FlatKit::Reader.create_readers_from_paths(paths: paths, fallback: opts[:input_format])
|
69
|
+
@writer = ::FlatKit::Writer.create_writer_from_path(path: opts[:output], fallback: opts[:output_format],
|
70
|
+
reader_format: @readers.first.format_name)
|
71
|
+
rescue ::FlatKit::Error => e
|
72
|
+
raise ::Optimist::CommandlineError, e.message
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def call
|
78
|
+
total = 0
|
79
|
+
readers.each do |r|
|
80
|
+
logger.info "cat #{r.source} to #{writer.destination}"
|
81
|
+
r.each do |record|
|
82
|
+
writer.write(record)
|
83
|
+
total += 1
|
84
|
+
end
|
85
|
+
logger.info "read #{r.count} records from #{r.source}"
|
86
|
+
end
|
87
|
+
writer.close
|
88
|
+
logger.debug "processed #{writer.count} records"
|
89
|
+
logger.debug "read #{total} records"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|