metacrunch-file 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Readme.md +24 -14
- data/lib/metacrunch/file.rb +3 -0
- data/lib/metacrunch/file/csv_source.rb +35 -0
- data/lib/metacrunch/file/destination.rb +3 -27
- data/lib/metacrunch/file/file_destination.rb +36 -0
- data/lib/metacrunch/file/file_source.rb +56 -0
- data/lib/metacrunch/file/source.rb +3 -47
- data/lib/metacrunch/file/version.rb +1 -1
- data/metacrunch-file.gemspec +2 -1
- metadata +22 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9980829422c838552b9334134e52932f3d70d132859965baa70a93f184ffefaf
|
4
|
+
data.tar.gz: 81b3b88c8725eb30469eb1dd40a7f9507865e66a998b76ff2ace5691e424b684
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '090856c4f3cef0b0bc2bedaa2a5fd87ef73005b4678e2bbe5a938475438701b1ed74307dc502bfd5f52614589ed6ef1f55667037d0c1599ecd8ae8c1fb68d8dc'
|
7
|
+
data.tar.gz: b940c2ea0555e018303de286a028e975d1cd628cd527d65ca702afb97d7bbac40cc31cad67b27871b3737891b65f016ccff31b960ba761af0cb4157cb033195e
|
data/Readme.md
CHANGED
@@ -6,7 +6,10 @@ metacrunch-file
|
|
6
6
|
[![Test Coverage](https://codeclimate.com/github/ubpb/metacrunch-file/badges/coverage.svg)](https://codeclimate.com/github/ubpb/metacrunch-file/coverage)
|
7
7
|
[![CircleCI](https://circleci.com/gh/ubpb/metacrunch-file.svg?style=svg)](https://circleci.com/gh/ubpb/metacrunch-file)
|
8
8
|
|
9
|
-
This is the official file package for the [metacrunch ETL toolkit](https://github.com/ubpb/metacrunch).
|
9
|
+
This is the official file package for the [metacrunch ETL toolkit](https://github.com/ubpb/metacrunch).
|
10
|
+
|
11
|
+
*Note: For working examples on how to use this package check out our [demo repository](https://github.com/ubpb/metacrunch-demo).*
|
12
|
+
|
10
13
|
|
11
14
|
Installation
|
12
15
|
------------
|
@@ -14,7 +17,7 @@ Installation
|
|
14
17
|
Include the gem in your `Gemfile`
|
15
18
|
|
16
19
|
```ruby
|
17
|
-
gem "metacrunch-file", "~> 1.
|
20
|
+
gem "metacrunch-file", "~> 1.3.0"
|
18
21
|
```
|
19
22
|
|
20
23
|
and run `$ bundle install` to install it.
|
@@ -29,24 +32,20 @@ $ gem install metacrunch-file
|
|
29
32
|
Usage
|
30
33
|
-----
|
31
34
|
|
32
|
-
|
33
|
-
|
34
|
-
### `Metacrunch::File::Source`
|
35
|
+
## `Metacrunch::File::FileSource`
|
35
36
|
|
36
37
|
This class provides a metacrunch `source` implementation that can be used to read data from files in the file system into a metacrunch job. The class can be used to read regular files, compressed files (gzip), tar archives and compressed tar archives (gzip).
|
37
38
|
|
38
|
-
You can access non-option arguments from the command line using the `ARGV` constant.
|
39
|
-
|
40
39
|
```ruby
|
41
40
|
# my_job.metacrunch
|
42
41
|
|
43
42
|
# If you call this example like so
|
44
43
|
# $ metacrunch my_job.metacrunch *.xml
|
45
44
|
# ARGV will contain all the XML files in the current directory.
|
46
|
-
source Metacrunch::File::
|
45
|
+
source Metacrunch::File::FileSource.new(ARGV)
|
47
46
|
|
48
47
|
# ... or you can set the filenames directly
|
49
|
-
source Metacrunch::File::
|
48
|
+
source Metacrunch::File::FileSource.new(["my-data.xml", "my-other-data.xml", "..."])
|
50
49
|
```
|
51
50
|
|
52
51
|
**Options**
|
@@ -67,22 +66,33 @@ transformation ->(file_entry) do
|
|
67
66
|
end
|
68
67
|
```
|
69
68
|
|
70
|
-
|
69
|
+
## `Metacrunch::File::FileDestination`
|
71
70
|
|
72
71
|
This class provides a metacrunch `destination` to write data to a file. Every data that gets passed to the destination is appended to the given file. If the data is an `Array` every element of that array is appended to the file. Non existing files will be created automatically.
|
73
72
|
|
74
73
|
```ruby
|
75
74
|
# my_job.metacrunch
|
76
75
|
|
77
|
-
|
76
|
+
destination Metacrunch::File::FileDestination.new("/tmp/my-data.txt" [, OPTIONS])
|
78
77
|
```
|
79
78
|
|
80
79
|
**Options**
|
81
80
|
|
82
81
|
* `override_existing_file`: Overrides an existing file if set to `true`. If set to `false` an error is raised if the file already exists. Defaults to `false`.
|
83
|
-
*
|
84
82
|
|
85
|
-
|
83
|
+
## `Metacrunch::File::CSVSource`
|
84
|
+
|
85
|
+
This class provides a metacrunch `source` for reading CSV files. It is a simple wrapper around [smarter_csv](https://github.com/tilo/smarter_csv) gem.
|
86
|
+
|
87
|
+
**Options**
|
88
|
+
|
89
|
+
* `headers`: Whether or not the file contains headers as the first line. Important if the file does not contain headers, otherwise you would lose the first line of data. Defaults to `true`.
|
90
|
+
* `col_sep`: Column separator. Defaults to `,`.
|
91
|
+
* `row_sep`: Row separator or record separator. Defaults to `\n`.
|
92
|
+
* `quote_char`: Quotation character. Defaults to `"`.
|
93
|
+
* `file_encoding`: Set the file encoding. Defaults to `utf-8`.
|
94
|
+
|
95
|
+
## `Metacrunch::File::XLSXDestination`
|
86
96
|
|
87
97
|
This class provides a metacrunch `destination` implementation to create simple Excel (xlsx) files.
|
88
98
|
|
@@ -95,7 +105,7 @@ transformation ->(data) do
|
|
95
105
|
[data["foo"], data["bar"], ...]
|
96
106
|
end
|
97
107
|
|
98
|
-
|
108
|
+
destination Metacrunch::File::XLSXDestination.new(
|
99
109
|
"/tmp/my-data.xlsx", # filename
|
100
110
|
["Column 1", "Column 2", ...], # header columns
|
101
111
|
OPTIONS
|
data/lib/metacrunch/file.rb
CHANGED
@@ -4,7 +4,10 @@ require "active_support/core_ext"
|
|
4
4
|
module Metacrunch
|
5
5
|
module File
|
6
6
|
require_relative "file/entry"
|
7
|
+
require_relative "file/csv_source"
|
8
|
+
require_relative "file/file_source"
|
7
9
|
require_relative "file/source"
|
10
|
+
require_relative "file/file_destination"
|
8
11
|
require_relative "file/destination"
|
9
12
|
require_relative "file/xlsx_destination"
|
10
13
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "metacrunch/file"
|
2
|
+
require "smarter_csv"
|
3
|
+
|
4
|
+
module Metacrunch
|
5
|
+
class File::CSVSource
|
6
|
+
|
7
|
+
DEFAULT_OPTIONS = {
|
8
|
+
headers: true,
|
9
|
+
col_sep: ",",
|
10
|
+
row_sep: "\n",
|
11
|
+
quote_char: '"',
|
12
|
+
file_encoding: "utf-8"
|
13
|
+
}
|
14
|
+
|
15
|
+
def initialize(csv_filename, options = {})
|
16
|
+
@filename = csv_filename
|
17
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
18
|
+
end
|
19
|
+
|
20
|
+
def each(&block)
|
21
|
+
return enum_for(__method__) unless block_given?
|
22
|
+
|
23
|
+
SmarterCSV.process(@filename, {
|
24
|
+
headers_in_file: @options[:headers],
|
25
|
+
col_sep: @options[:col_sep],
|
26
|
+
row_sep: @options[:row_sep],
|
27
|
+
quote_char: @options[:quote_char],
|
28
|
+
file_encoding: @options[:file_encoding]
|
29
|
+
}) do |line|
|
30
|
+
yield line
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -1,35 +1,11 @@
|
|
1
1
|
require "metacrunch/file"
|
2
2
|
|
3
3
|
module Metacrunch
|
4
|
-
class File::Destination
|
5
|
-
|
6
|
-
DEFAULT_OPTIONS = {
|
7
|
-
override_existing_file: false
|
8
|
-
}
|
4
|
+
class File::Destination < File::FileDestination
|
9
5
|
|
10
6
|
def initialize(filename, options = {})
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
if ::File.exists?(@filename) && @options[:override_existing_file] == false
|
15
|
-
raise "File `#{@filename}` exists but `override_existing_file` option was set to `false`"
|
16
|
-
end
|
17
|
-
|
18
|
-
@file = ::File.open(@filename, 'wb+')
|
19
|
-
end
|
20
|
-
|
21
|
-
def write(data)
|
22
|
-
return if data.blank?
|
23
|
-
|
24
|
-
if data.is_a?(Array)
|
25
|
-
data.each { |row| @file.write(row) }
|
26
|
-
else
|
27
|
-
@file.write(data)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def close
|
32
|
-
@file.close if @file
|
7
|
+
warn "[DEPRECATION] `Metacrunch::File::Destination` is deprecated. Please use `Metacrunch::File::FileDestination` instead."
|
8
|
+
super
|
33
9
|
end
|
34
10
|
|
35
11
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "metacrunch/file"
|
2
|
+
|
3
|
+
module Metacrunch
|
4
|
+
class File::FileDestination
|
5
|
+
|
6
|
+
DEFAULT_OPTIONS = {
|
7
|
+
override_existing_file: false
|
8
|
+
}
|
9
|
+
|
10
|
+
def initialize(filename, options = {})
|
11
|
+
@filename = ::File.expand_path(filename)
|
12
|
+
@options = DEFAULT_OPTIONS.deep_merge(options)
|
13
|
+
|
14
|
+
if ::File.exists?(@filename) && @options[:override_existing_file] == false
|
15
|
+
raise "File `#{@filename}` exists but `override_existing_file` option was set to `false`"
|
16
|
+
end
|
17
|
+
|
18
|
+
@file = ::File.open(@filename, 'wb+')
|
19
|
+
end
|
20
|
+
|
21
|
+
def write(data)
|
22
|
+
return if data.blank?
|
23
|
+
|
24
|
+
if data.is_a?(Array)
|
25
|
+
data.each { |row| @file.write(row) }
|
26
|
+
else
|
27
|
+
@file.write(data)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def close
|
32
|
+
@file.close if @file
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "metacrunch/file"
|
2
|
+
require "rubygems/package"
|
3
|
+
|
4
|
+
module Metacrunch
|
5
|
+
class File::FileSource
|
6
|
+
|
7
|
+
def initialize(filenames)
|
8
|
+
@filenames = [*filenames].map{|f| f.presence}.compact
|
9
|
+
end
|
10
|
+
|
11
|
+
def each(&block)
|
12
|
+
return enum_for(__method__) unless block_given?
|
13
|
+
|
14
|
+
@filenames.each do |filename|
|
15
|
+
if is_archive?(filename)
|
16
|
+
read_archive(filename, &block)
|
17
|
+
else
|
18
|
+
read_regular_file(filename, &block)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def is_archive?(filename)
|
26
|
+
filename.ends_with?(".tar") || filename.ends_with?(".tar.gz") || filename.ends_with?(".tgz")
|
27
|
+
end
|
28
|
+
|
29
|
+
def is_gzip_file?(filename)
|
30
|
+
filename.ends_with?(".gz") || filename.ends_with?(".tgz")
|
31
|
+
end
|
32
|
+
|
33
|
+
def read_regular_file(filename, &block)
|
34
|
+
if ::File.file?(filename)
|
35
|
+
io = is_gzip_file?(filename) ? Zlib::GzipReader.open(filename) : ::File.open(filename, "r")
|
36
|
+
yield File::Entry.new(filename: filename, archive_filename: nil, contents: io.read)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def read_archive(filename, &block)
|
41
|
+
io = is_gzip_file?(filename) ? Zlib::GzipReader.open(filename) : ::File.open(filename, "r")
|
42
|
+
tarReader = Gem::Package::TarReader.new(io)
|
43
|
+
|
44
|
+
tarReader.each do |_tar_entry|
|
45
|
+
if _tar_entry.file?
|
46
|
+
yield File::Entry.new(
|
47
|
+
filename: filename,
|
48
|
+
archive_filename: _tar_entry.full_name,
|
49
|
+
contents: _tar_entry.read
|
50
|
+
)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -1,55 +1,11 @@
|
|
1
1
|
require "metacrunch/file"
|
2
|
-
require "rubygems/package"
|
3
2
|
|
4
3
|
module Metacrunch
|
5
|
-
class File::Source
|
4
|
+
class File::Source < File::FileSource
|
6
5
|
|
7
6
|
def initialize(filenames)
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
def each(&block)
|
12
|
-
return enum_for(__method__) unless block_given?
|
13
|
-
|
14
|
-
@filenames.each do |filename|
|
15
|
-
if is_archive?(filename)
|
16
|
-
read_archive(filename, &block)
|
17
|
-
else
|
18
|
-
read_regular_file(filename, &block)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
def is_archive?(filename)
|
26
|
-
filename.ends_with?(".tar") || filename.ends_with?(".tar.gz") || filename.ends_with?(".tgz")
|
27
|
-
end
|
28
|
-
|
29
|
-
def is_gzip_file?(filename)
|
30
|
-
filename.ends_with?(".gz") || filename.ends_with?(".tgz")
|
31
|
-
end
|
32
|
-
|
33
|
-
def read_regular_file(filename, &block)
|
34
|
-
if ::File.file?(filename)
|
35
|
-
io = is_gzip_file?(filename) ? Zlib::GzipReader.open(filename) : ::File.open(filename, "r")
|
36
|
-
yield File::Entry.new(filename: filename, archive_filename: nil, contents: io.read)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def read_archive(filename, &block)
|
41
|
-
io = is_gzip_file?(filename) ? Zlib::GzipReader.open(filename) : ::File.open(filename, "r")
|
42
|
-
tarReader = Gem::Package::TarReader.new(io)
|
43
|
-
|
44
|
-
tarReader.each do |_tar_entry|
|
45
|
-
if _tar_entry.file?
|
46
|
-
yield File::Entry.new(
|
47
|
-
filename: filename,
|
48
|
-
archive_filename: _tar_entry.full_name,
|
49
|
-
contents: _tar_entry.read
|
50
|
-
)
|
51
|
-
end
|
52
|
-
end
|
7
|
+
warn "[DEPRECATION] `Metacrunch::File::Source` is deprecated. Please use `Metacrunch::File::FileSource` instead."
|
8
|
+
super
|
53
9
|
end
|
54
10
|
|
55
11
|
end
|
data/metacrunch-file.gemspec
CHANGED
@@ -16,5 +16,6 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.require_paths = ["lib"]
|
17
17
|
|
18
18
|
spec.add_dependency "activesupport", ">= 5.1.0"
|
19
|
-
spec.add_dependency "axlsx", "
|
19
|
+
spec.add_dependency "axlsx", ">= 3.0.0.pre"
|
20
|
+
spec.add_dependency "smarter_csv", "~> 1.2.6"
|
20
21
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -26,18 +26,32 @@ dependencies:
|
|
26
26
|
version: 5.1.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: axlsx
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.0.0.pre
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.0.0.pre
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: smarter_csv
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - "~>"
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: 2.
|
47
|
+
version: 1.2.6
|
34
48
|
type: :runtime
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
52
|
- - "~>"
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: 2.
|
54
|
+
version: 1.2.6
|
41
55
|
description:
|
42
56
|
email:
|
43
57
|
executables: []
|
@@ -53,8 +67,11 @@ files:
|
|
53
67
|
- Readme.md
|
54
68
|
- bin/console
|
55
69
|
- lib/metacrunch/file.rb
|
70
|
+
- lib/metacrunch/file/csv_source.rb
|
56
71
|
- lib/metacrunch/file/destination.rb
|
57
72
|
- lib/metacrunch/file/entry.rb
|
73
|
+
- lib/metacrunch/file/file_destination.rb
|
74
|
+
- lib/metacrunch/file/file_source.rb
|
58
75
|
- lib/metacrunch/file/source.rb
|
59
76
|
- lib/metacrunch/file/version.rb
|
60
77
|
- lib/metacrunch/file/xlsx_destination.rb
|
@@ -78,8 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
95
|
- !ruby/object:Gem::Version
|
79
96
|
version: '0'
|
80
97
|
requirements: []
|
81
|
-
|
82
|
-
rubygems_version: 2.7.7
|
98
|
+
rubygems_version: 3.0.3
|
83
99
|
signing_key:
|
84
100
|
specification_version: 4
|
85
101
|
summary: File package for the metacrunch ETL toolkit.
|