flat_kit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CONTRIBUTING.md +46 -0
- data/HISTORY.md +5 -0
- data/LICENSE.txt +21 -0
- data/Manifest.txt +66 -0
- data/README.md +80 -0
- data/Rakefile +20 -0
- data/bin/fk +4 -0
- data/lib/flat_kit.rb +23 -0
- data/lib/flat_kit/cli.rb +80 -0
- data/lib/flat_kit/command.rb +53 -0
- data/lib/flat_kit/command/cat.rb +93 -0
- data/lib/flat_kit/command/merge.rb +88 -0
- data/lib/flat_kit/command/sort.rb +88 -0
- data/lib/flat_kit/descendant_tracker.rb +27 -0
- data/lib/flat_kit/error.rb +5 -0
- data/lib/flat_kit/format.rb +34 -0
- data/lib/flat_kit/input.rb +32 -0
- data/lib/flat_kit/input/file.rb +53 -0
- data/lib/flat_kit/input/io.rb +54 -0
- data/lib/flat_kit/internal_node.rb +84 -0
- data/lib/flat_kit/jsonl.rb +8 -0
- data/lib/flat_kit/jsonl/format.rb +25 -0
- data/lib/flat_kit/jsonl/reader.rb +30 -0
- data/lib/flat_kit/jsonl/record.rb +84 -0
- data/lib/flat_kit/jsonl/writer.rb +45 -0
- data/lib/flat_kit/leaf_node.rb +71 -0
- data/lib/flat_kit/logger.rb +39 -0
- data/lib/flat_kit/merge.rb +35 -0
- data/lib/flat_kit/merge_tree.rb +104 -0
- data/lib/flat_kit/output.rb +32 -0
- data/lib/flat_kit/output/file.rb +55 -0
- data/lib/flat_kit/output/io.rb +73 -0
- data/lib/flat_kit/reader.rb +61 -0
- data/lib/flat_kit/record.rb +83 -0
- data/lib/flat_kit/sentinel_internal_node.rb +37 -0
- data/lib/flat_kit/sentinel_leaf_node.rb +37 -0
- data/lib/flat_kit/sort.rb +35 -0
- data/lib/flat_kit/writer.rb +38 -0
- data/lib/flat_kit/xsv.rb +8 -0
- data/lib/flat_kit/xsv/format.rb +25 -0
- data/lib/flat_kit/xsv/reader.rb +45 -0
- data/lib/flat_kit/xsv/record.rb +90 -0
- data/lib/flat_kit/xsv/writer.rb +70 -0
- data/tasks/default.rake +242 -0
- data/tasks/extension.rake +38 -0
- data/tasks/man.rake +7 -0
- data/tasks/this.rb +208 -0
- data/test/device_dataset.rb +117 -0
- data/test/input/test_file.rb +73 -0
- data/test/input/test_io.rb +93 -0
- data/test/jsonl/test_format.rb +22 -0
- data/test/jsonl/test_reader.rb +49 -0
- data/test/jsonl/test_record.rb +61 -0
- data/test/jsonl/test_writer.rb +68 -0
- data/test/output/test_file.rb +60 -0
- data/test/output/test_io.rb +104 -0
- data/test/test_conversions.rb +45 -0
- data/test/test_format.rb +24 -0
- data/test/test_helper.rb +26 -0
- data/test/test_merge.rb +40 -0
- data/test/test_merge_tree.rb +64 -0
- data/test/test_version.rb +11 -0
- data/test/xsv/test_format.rb +22 -0
- data/test/xsv/test_reader.rb +61 -0
- data/test/xsv/test_record.rb +69 -0
- data/test/xsv/test_writer.rb +68 -0
- metadata +237 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: fa0b52b3ab12e9cf86492571258daf5d98dde967c25d5aad65b74a6003900fa2
|
4
|
+
data.tar.gz: fcd1abff76e54854d86b131aa6773d411f4d54acbf2e48b541c69c72dd2c1149
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 072e966f06fe03de16b686f469f0e3ac84aa2cc93910cc61210875e6d9cbf0bb3cc24bb1cf4f0863b90f2d02110abcdb0f88bbfa72777f2c19b2cce30c847e45
|
7
|
+
data.tar.gz: f5f67f8818e5cce8da7b547a3ec652d6dc288f9b66a957a62519878d819c893285ea3e81e7a32286de0d614412416f7ba38d8c28e7807f12add9ec00b3241e92
|
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# Hi there!
|
2
|
+
|
3
|
+
I see you are interested in contributing. That is wonderful. I love
|
4
|
+
contributions.
|
5
|
+
|
6
|
+
I guarantee that there are bugs in this software. And I guarantee that there is
|
7
|
+
a feature you want that is not in here yet. As such, any and all bugs reports
|
8
|
+
are gratefully accepted, bugfixes even more so. Helping out with bugs is the
|
9
|
+
easiest way to contribute.
|
10
|
+
|
11
|
+
|
12
|
+
## The Quick Version
|
13
|
+
|
14
|
+
* Have a [GitHub Account][].
|
15
|
+
* Search the [GitHub Issues][] and see if your issue already present. If so
|
16
|
+
add your comments, :thumbsup:, etc.
|
17
|
+
* Issue not there? Not a problem, open up a [new issue][].
|
18
|
+
* **Bug reports** please be as detailed as possible. Include:
|
19
|
+
* full ruby engine and version: `ruby -e 'puts RUBY_DESCRIPTION'`
|
20
|
+
* operating system and version
|
21
|
+
* version of flat_kit `ruby -e "require 'flat_kit'; puts FlatKit::VERSION"`
|
22
|
+
* as much detail about the bug as possible so I can replicate it. Feel free
|
23
|
+
to link in a [gist][]
|
24
|
+
* **New Feature**
|
25
|
+
* What the new feature should do.
|
26
|
+
* What benefit the new feature brings to the project.
|
27
|
+
* Fork the [repo][].
|
28
|
+
* Create a new branch for your issue: `git checkout -b issue/my-issue`
|
29
|
+
* Lovingly craft your contribution:
|
30
|
+
* `rake develop` to bootstrap development. (skip if you already have bundler installed)
|
31
|
+
* `bundle install` to install dependencies.
|
32
|
+
* `rake test` to run tests
|
33
|
+
* Make sure that `rake test` passes. It's important, I said it twice.
|
34
|
+
* Add yourself to the contributors section below.
|
35
|
+
* Submit your [pull request][].
|
36
|
+
|
37
|
+
# Contributors
|
38
|
+
|
39
|
+
* [Jeremy Hinegardner](https://github.com/copiousfreetime)
|
40
|
+
|
41
|
+
[GitHub Account]: https://github.com/signup/free "GitHub Signup"
|
42
|
+
[GitHub Issues]: https://github.com/copiousfreetime/flat_kit/issues "FlatKit Issues"
|
43
|
+
[new issue]: https://github.com/copiousfreetime/flat_kit/issues/new "New FlatKit Issue"
|
44
|
+
[gist]: https://gist.github.com/ "New Gist"
|
45
|
+
[repo]: https://github.com/copiousfreetime/flat_kit "FlatKit Repo"
|
46
|
+
[pull request]: https://help.github.com/articles/using-pull-requests "Using Pull Requests"
|
data/HISTORY.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2021 Jeremy Hinegardner
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/Manifest.txt
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
CONTRIBUTING.md
|
2
|
+
HISTORY.md
|
3
|
+
LICENSE.txt
|
4
|
+
Manifest.txt
|
5
|
+
README.md
|
6
|
+
Rakefile
|
7
|
+
bin/fk
|
8
|
+
lib/flat_kit.rb
|
9
|
+
lib/flat_kit/cli.rb
|
10
|
+
lib/flat_kit/command.rb
|
11
|
+
lib/flat_kit/command/cat.rb
|
12
|
+
lib/flat_kit/command/merge.rb
|
13
|
+
lib/flat_kit/command/sort.rb
|
14
|
+
lib/flat_kit/descendant_tracker.rb
|
15
|
+
lib/flat_kit/error.rb
|
16
|
+
lib/flat_kit/format.rb
|
17
|
+
lib/flat_kit/input.rb
|
18
|
+
lib/flat_kit/input/file.rb
|
19
|
+
lib/flat_kit/input/io.rb
|
20
|
+
lib/flat_kit/internal_node.rb
|
21
|
+
lib/flat_kit/jsonl.rb
|
22
|
+
lib/flat_kit/jsonl/format.rb
|
23
|
+
lib/flat_kit/jsonl/reader.rb
|
24
|
+
lib/flat_kit/jsonl/record.rb
|
25
|
+
lib/flat_kit/jsonl/writer.rb
|
26
|
+
lib/flat_kit/leaf_node.rb
|
27
|
+
lib/flat_kit/logger.rb
|
28
|
+
lib/flat_kit/merge.rb
|
29
|
+
lib/flat_kit/merge_tree.rb
|
30
|
+
lib/flat_kit/output.rb
|
31
|
+
lib/flat_kit/output/file.rb
|
32
|
+
lib/flat_kit/output/io.rb
|
33
|
+
lib/flat_kit/reader.rb
|
34
|
+
lib/flat_kit/record.rb
|
35
|
+
lib/flat_kit/sentinel_internal_node.rb
|
36
|
+
lib/flat_kit/sentinel_leaf_node.rb
|
37
|
+
lib/flat_kit/sort.rb
|
38
|
+
lib/flat_kit/writer.rb
|
39
|
+
lib/flat_kit/xsv.rb
|
40
|
+
lib/flat_kit/xsv/format.rb
|
41
|
+
lib/flat_kit/xsv/reader.rb
|
42
|
+
lib/flat_kit/xsv/record.rb
|
43
|
+
lib/flat_kit/xsv/writer.rb
|
44
|
+
tasks/default.rake
|
45
|
+
tasks/extension.rake
|
46
|
+
tasks/man.rake
|
47
|
+
tasks/this.rb
|
48
|
+
test/device_dataset.rb
|
49
|
+
test/input/test_file.rb
|
50
|
+
test/input/test_io.rb
|
51
|
+
test/jsonl/test_format.rb
|
52
|
+
test/jsonl/test_reader.rb
|
53
|
+
test/jsonl/test_record.rb
|
54
|
+
test/jsonl/test_writer.rb
|
55
|
+
test/output/test_file.rb
|
56
|
+
test/output/test_io.rb
|
57
|
+
test/test_conversions.rb
|
58
|
+
test/test_format.rb
|
59
|
+
test/test_helper.rb
|
60
|
+
test/test_merge.rb
|
61
|
+
test/test_merge_tree.rb
|
62
|
+
test/test_version.rb
|
63
|
+
test/xsv/test_format.rb
|
64
|
+
test/xsv/test_reader.rb
|
65
|
+
test/xsv/test_record.rb
|
66
|
+
test/xsv/test_writer.rb
|
data/README.md
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
## FlatKit
|
2
|
+
|
3
|
+
* [Homepage](https://github.com/copiousfreetime/flat_kit/)
|
4
|
+
* [Github Project](https://github.com/copiousfreetime/flat_kit)
|
5
|
+
|
6
|
+
## DESCRIPTION
|
7
|
+
|
8
|
+
A library and commandline program for reading, writing, indexing,
|
9
|
+
sorting, and merging CSV, TSV, JSON and other flat-file formats.
|
10
|
+
|
11
|
+
## FEATURES
|
12
|
+
|
13
|
+
* Currently supporting CSV/TSV/XSV and JSON formats
|
14
|
+
* Transparently handles gzipped compressed input or output
|
15
|
+
* Sort records based upon the named keys of your choice
|
16
|
+
* Efficent singles pass merge of any number of sorted input files into a
|
17
|
+
single output file.
|
18
|
+
* Both a commandline tool and a ruby library to utilize in your own programs
|
19
|
+
|
20
|
+
|
21
|
+
```
|
22
|
+
Usage:
|
23
|
+
fk <command> [<args>...]
|
24
|
+
fk [options]
|
25
|
+
|
26
|
+
Options:
|
27
|
+
-v, --verbose Force debug. Output lots of informtion to standard error
|
28
|
+
--list List all the commands
|
29
|
+
--log=<s> Set the logger output location (default: <stderr>)
|
30
|
+
-h, --help Show help message
|
31
|
+
--version Print version and exit
|
32
|
+
|
33
|
+
Commands:
|
34
|
+
|
35
|
+
cat Concatenate files together that have the same structure.
|
36
|
+
merge Merge sorted files together that have the same structure.
|
37
|
+
sort Sort a given file by a set of fields.
|
38
|
+
```
|
39
|
+
|
40
|
+
## EXAMPLES
|
41
|
+
|
42
|
+
**Convert input csv files into a single output json file**
|
43
|
+
|
44
|
+
fk cat files/*csv -o output.json
|
45
|
+
|
46
|
+
**Sort an input json file and output it as a compressed csv**
|
47
|
+
|
48
|
+
fk sort --key year,month,day input.json -o output.csv.gz
|
49
|
+
|
50
|
+
**Merge an entire directory of sorted record compressed csv files into a compress json file**
|
51
|
+
|
52
|
+
fk merge --key category,timestamp sorted/*.csv.gz -o sorted.json.gz
|
53
|
+
|
54
|
+
## CREDITS / REFERENCES
|
55
|
+
|
56
|
+
* https://github.com/BurntSushi/xsv - lots of concepts and ideas pulled from xsv
|
57
|
+
|
58
|
+
## MIT LICENSE
|
59
|
+
|
60
|
+
MIT License
|
61
|
+
|
62
|
+
Copyright (c) 2021 Jeremy Hinegardner
|
63
|
+
|
64
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
65
|
+
of this software and associated documentation files (the "Software"), to deal
|
66
|
+
in the Software without restriction, including without limitation the rights
|
67
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
68
|
+
copies of the Software, and to permit persons to whom the Software is
|
69
|
+
furnished to do so, subject to the following conditions:
|
70
|
+
|
71
|
+
The above copyright notice and this permission notice shall be included in all
|
72
|
+
copies or substantial portions of the Software.
|
73
|
+
|
74
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
75
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
76
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
77
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
78
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
79
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
80
|
+
SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# vim: syntax=ruby
|
2
|
+
load 'tasks/this.rb'
|
3
|
+
|
4
|
+
This.name = "flat_kit"
|
5
|
+
This.author = "Jeremy Hinegardner"
|
6
|
+
This.email = "jeremy@copiousfreetime.org"
|
7
|
+
This.homepage = "http://github.com/copiousfreetime/#{ This.name }"
|
8
|
+
|
9
|
+
This.ruby_gemspec do |spec|
|
10
|
+
spec.add_dependency('oj', '~> 3.0')
|
11
|
+
spec.add_dependency('optimist', '~> 3.0')
|
12
|
+
|
13
|
+
spec.add_development_dependency( 'rake' , '~> 13.0')
|
14
|
+
spec.add_development_dependency( 'minitest' , '~> 5.11' )
|
15
|
+
spec.add_development_dependency( 'minitest-focus' , '~> 1.2' )
|
16
|
+
spec.add_development_dependency( 'rdoc' , '~> 6.3' )
|
17
|
+
spec.add_development_dependency( 'simplecov', '~> 0.21')
|
18
|
+
end
|
19
|
+
|
20
|
+
load 'tasks/default.rake'
|
data/bin/fk
ADDED
data/lib/flat_kit.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module FlatKit
|
2
|
+
VERSION = "0.1.0"
|
3
|
+
end
|
4
|
+
require 'flat_kit/error'
|
5
|
+
require 'flat_kit/logger'
|
6
|
+
require 'flat_kit/descendant_tracker'
|
7
|
+
require 'flat_kit/format'
|
8
|
+
require 'flat_kit/record'
|
9
|
+
require 'flat_kit/reader'
|
10
|
+
require 'flat_kit/writer'
|
11
|
+
require 'flat_kit/input'
|
12
|
+
require 'flat_kit/output'
|
13
|
+
require 'flat_kit/cli'
|
14
|
+
require 'flat_kit/xsv'
|
15
|
+
require 'flat_kit/jsonl'
|
16
|
+
require 'flat_kit/merge'
|
17
|
+
require 'flat_kit/sort'
|
18
|
+
|
19
|
+
require 'flat_kit/merge_tree'
|
20
|
+
require 'flat_kit/internal_node'
|
21
|
+
require 'flat_kit/sentinel_internal_node'
|
22
|
+
require 'flat_kit/sentinel_leaf_node'
|
23
|
+
require 'flat_kit/leaf_node'
|
data/lib/flat_kit/cli.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'optimist'
|
2
|
+
require_relative '../flat_kit/command'
|
3
|
+
|
4
|
+
module FlatKit
|
5
|
+
class Cli
|
6
|
+
attr_reader :options
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@parser = nil
|
10
|
+
end
|
11
|
+
|
12
|
+
def parser
|
13
|
+
@parser ||= ::Optimist::Parser.new do
|
14
|
+
version ::FlatKit::VERSION
|
15
|
+
|
16
|
+
banner "fk v#{self.version}"
|
17
|
+
|
18
|
+
banner <<~USAGE
|
19
|
+
|
20
|
+
Usage:
|
21
|
+
fk <command> [<args>...]
|
22
|
+
fk [options]
|
23
|
+
USAGE
|
24
|
+
|
25
|
+
banner <<~OPTIONS
|
26
|
+
|
27
|
+
Options:
|
28
|
+
|
29
|
+
OPTIONS
|
30
|
+
|
31
|
+
opt :verbose, "Force debug. Output lots of informtion to standard error", default: false
|
32
|
+
opt :list, "List all the commands", default: false, short: :none
|
33
|
+
opt :log, "Set the logger output location", default: "<stderr>", short: :none
|
34
|
+
opt :help, "Show help message", short: :h
|
35
|
+
opt :version, "Print version and exit", short: :none
|
36
|
+
|
37
|
+
stop_on FlatKit::Command.names
|
38
|
+
banner Cli.commands_banner
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.commands_banner
|
43
|
+
sorted_commands = FlatKit::Command.children.sort_by{ |c| c.name }
|
44
|
+
left_width = sorted_commands.map { |c| c.name.length }.sort.last
|
45
|
+
banner = StringIO.new
|
46
|
+
banner.puts
|
47
|
+
banner.puts "Commands:"
|
48
|
+
banner.puts
|
49
|
+
|
50
|
+
sorted_commands.each do |command|
|
51
|
+
banner.puts " #{command.name.ljust(left_width)} #{command.description}"
|
52
|
+
end
|
53
|
+
banner.string
|
54
|
+
end
|
55
|
+
|
56
|
+
def run(argv: ARGV, env: ENV)
|
57
|
+
opts = ::Optimist::with_standard_exception_handling(parser) do
|
58
|
+
parser.parse(argv)
|
59
|
+
end
|
60
|
+
|
61
|
+
if opts[:log_given] then
|
62
|
+
::FlatKit.log_to(opts[:log])
|
63
|
+
end
|
64
|
+
|
65
|
+
if opts[:verbose] then
|
66
|
+
::FlatKit.logger.level = :debug
|
67
|
+
else
|
68
|
+
::FlatKit.logger.level = :info
|
69
|
+
end
|
70
|
+
|
71
|
+
::FlatKit.logger.debug opts
|
72
|
+
::FlatKit.logger.debug argv
|
73
|
+
|
74
|
+
command_name = argv.shift
|
75
|
+
command_klass = FlatKit::Command.for(command_name)
|
76
|
+
command = command_klass.new(argv: argv, logger: ::FlatKit.logger, env: env)
|
77
|
+
command.call
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module FlatKit
|
2
|
+
class Command
|
3
|
+
extend DescendantTracker
|
4
|
+
|
5
|
+
attr_reader :argv
|
6
|
+
attr_reader :env
|
7
|
+
attr_reader :logger
|
8
|
+
attr_reader :opts
|
9
|
+
attr_reader :readers
|
10
|
+
attr_reader :writer
|
11
|
+
|
12
|
+
def self.name
|
13
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}.name"
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.description
|
17
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}.description"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.parser
|
21
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}.parser"
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.names
|
25
|
+
children.map { |c| c.name }
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.for(name)
|
29
|
+
children.find do |child_klass|
|
30
|
+
child_klass.name == name
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize(argv:, logger: ::FlatKit.logger, env: ENV)
|
35
|
+
@argv = argv
|
36
|
+
@env = env
|
37
|
+
@logger = logger
|
38
|
+
parse
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse
|
42
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}#parse"
|
43
|
+
end
|
44
|
+
|
45
|
+
def call
|
46
|
+
raise NotImplementedError, "#{self.class} must implement #{self.class}.description"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
require 'flat_kit/command/cat'
|
52
|
+
require 'flat_kit/command/merge'
|
53
|
+
require 'flat_kit/command/sort'
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module FlatKit
|
2
|
+
class Command
|
3
|
+
# TODO: Implement the --flatten commandline switch
|
4
|
+
class Cat < ::FlatKit::Command
|
5
|
+
def self.name
|
6
|
+
"cat"
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.description
|
10
|
+
"Concatenate files together that have the same structure."
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parser
|
14
|
+
::Optimist::Parser.new do
|
15
|
+
banner "#{Cat.description}"
|
16
|
+
banner ""
|
17
|
+
|
18
|
+
banner <<~BANNER
|
19
|
+
Concatenates files that have the same field structure together into
|
20
|
+
a single file. The files can be of different formats, but must have
|
21
|
+
the same fields and names.
|
22
|
+
|
23
|
+
This is probably most easily usable as a way to convert CSV to JSON
|
24
|
+
and vice versa.
|
25
|
+
|
26
|
+
The flatfile type(s) will be automatically determined by the file name.
|
27
|
+
If the inputs or output is not a file, but from stdin or stdout then
|
28
|
+
the input and output types must be specified.
|
29
|
+
|
30
|
+
NOTE: If converting from JSON to CSV and the input JSON does not have
|
31
|
+
every possible field on ever record, then the output csv iwll
|
32
|
+
be corrupted.
|
33
|
+
|
34
|
+
In this case the input json should be fed through 'flatten' first
|
35
|
+
or use the '--flatten' flag which will require an additional pass
|
36
|
+
through the input to gather all the fields
|
37
|
+
BANNER
|
38
|
+
|
39
|
+
banner <<~USAGE
|
40
|
+
|
41
|
+
Usage:
|
42
|
+
fk cat file1.csv file2.csv > combinded.csv
|
43
|
+
fk cat --output-format json file1.csv
|
44
|
+
fk cat file1.csv.gzip -o file2.json.gzip
|
45
|
+
fk cat file1.csv.gzip --output-format json | gzip -c > file1.jsonl.gz
|
46
|
+
|
47
|
+
USAGE
|
48
|
+
|
49
|
+
banner <<~OPTIONS
|
50
|
+
|
51
|
+
Options:
|
52
|
+
|
53
|
+
OPTIONS
|
54
|
+
|
55
|
+
opt :output, "Send the output to the given path instead of standard out.", default: "<stdout>"
|
56
|
+
opt :input_format, "Input format, csv or json", default: "auto", short: :none
|
57
|
+
opt :output_format, "Output format, csv or json", default: "auto", short: :none
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def parse
|
62
|
+
parser = self.class.parser
|
63
|
+
::Optimist::with_standard_exception_handling(parser) do
|
64
|
+
begin
|
65
|
+
@opts = parser.parse(argv)
|
66
|
+
paths = parser.leftovers
|
67
|
+
|
68
|
+
@readers = ::FlatKit::Reader.create_readers_from_paths(paths: paths, fallback: opts[:input_format])
|
69
|
+
@writer = ::FlatKit::Writer.create_writer_from_path(path: opts[:output], fallback: opts[:output_format],
|
70
|
+
reader_format: @readers.first.format_name)
|
71
|
+
rescue ::FlatKit::Error => e
|
72
|
+
raise ::Optimist::CommandlineError, e.message
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def call
|
78
|
+
total = 0
|
79
|
+
readers.each do |r|
|
80
|
+
logger.info "cat #{r.source} to #{writer.destination}"
|
81
|
+
r.each do |record|
|
82
|
+
writer.write(record)
|
83
|
+
total += 1
|
84
|
+
end
|
85
|
+
logger.info "read #{r.count} records from #{r.source}"
|
86
|
+
end
|
87
|
+
writer.close
|
88
|
+
logger.debug "processed #{writer.count} records"
|
89
|
+
logger.debug "read #{total} records"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|