metacrunch-file 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Readme.md +28 -10
- data/lib/metacrunch/file/csv_destination.rb +32 -0
- data/lib/metacrunch/file/csv_source.rb +8 -12
- data/lib/metacrunch/file/file_destination.rb +1 -1
- data/lib/metacrunch/file/version.rb +1 -1
- data/lib/metacrunch/file.rb +1 -0
- data/metacrunch-file.gemspec +1 -1
- metadata +6 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6fbb7eafc182c80e2bbb2a1c63b57e02824df6739a0f123c32d609997ceaaf9c
|
|
4
|
+
data.tar.gz: ee41b433ea1ea30109074c2043213ff928baa2fd516c735545392294dc3fa800
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7e26d47271466d6df0ec80c80903741895529a037e1bf1d8b03d751effba20a7f292cea7cc704856b7d5cb4bc1eabe2f0f493bde32eca07e22595bb43c63b495
|
|
7
|
+
data.tar.gz: f9dadce5be892b1f3fec41e3490b06d7b5826964132ff9b2e1c3077f998823e51d8347a426b75c712e434f3ae6a3f92fd29c0db4a66ca3323414c2cdfe3e16e8
|
data/Readme.md
CHANGED
|
@@ -6,7 +6,7 @@ metacrunch-file
|
|
|
6
6
|
[](https://codeclimate.com/github/ubpb/metacrunch-file/coverage)
|
|
7
7
|
[](https://circleci.com/gh/ubpb/metacrunch-file)
|
|
8
8
|
|
|
9
|
-
This is the official file package for the [metacrunch ETL toolkit](https://github.com/ubpb/metacrunch).
|
|
9
|
+
This is the official file package for the [metacrunch ETL toolkit](https://github.com/ubpb/metacrunch).
|
|
10
10
|
|
|
11
11
|
*Note: For working examples on how to use this package check out our [demo repository](https://github.com/ubpb/metacrunch-demo).*
|
|
12
12
|
|
|
@@ -17,7 +17,7 @@ Installation
|
|
|
17
17
|
Include the gem in your `Gemfile`
|
|
18
18
|
|
|
19
19
|
```ruby
|
|
20
|
-
gem "metacrunch-file", "~> 1.
|
|
20
|
+
gem "metacrunch-file", "~> 1.5.0"
|
|
21
21
|
```
|
|
22
22
|
|
|
23
23
|
and run `$ bundle install` to install it.
|
|
@@ -52,7 +52,7 @@ source Metacrunch::File::FileSource.new(["my-data.xml", "my-other-data.xml", "..
|
|
|
52
52
|
|
|
53
53
|
NONE.
|
|
54
54
|
|
|
55
|
-
The source yields objects of type `Metacrunch::File::Entry` for every file it reads.
|
|
55
|
+
The source yields objects of type `Metacrunch::File::Entry` for every file it reads.
|
|
56
56
|
|
|
57
57
|
```ruby
|
|
58
58
|
# my_job.metacrunch
|
|
@@ -82,21 +82,39 @@ destination Metacrunch::File::FileDestination.new("/tmp/my-data.txt" [, OPTIONS]
|
|
|
82
82
|
|
|
83
83
|
## `Metacrunch::File::CSVSource`
|
|
84
84
|
|
|
85
|
-
This class provides a metacrunch `source` for reading CSV files. It is a simple wrapper around [smarter_csv](https://github.com/tilo/smarter_csv) gem.
|
|
85
|
+
This class provides a metacrunch `source` for reading CSV files. It is a simple wrapper around [smarter_csv](https://github.com/tilo/smarter_csv) gem.
|
|
86
86
|
|
|
87
87
|
```ruby
|
|
88
88
|
# my_job.metacrunch
|
|
89
89
|
|
|
90
|
-
source Metacrunch::File::CSVSource.new(
|
|
90
|
+
source Metacrunch::File::CSVSource.new(
|
|
91
|
+
"source.csv" # filename
|
|
92
|
+
[, OPTIONS]) # options
|
|
91
93
|
```
|
|
92
94
|
|
|
93
95
|
**Options**
|
|
94
96
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
* `
|
|
98
|
-
|
|
99
|
-
|
|
97
|
+
Using the `options` argument you can pass any [CSV reading option supported by smarter_csv](https://github.com/tilo/smarter_csv/blob/main/docs/options.md#csv-reading) using the key `csv_options`.
|
|
98
|
+
|
|
99
|
+
* `csv_options`: Hash with any option supported by smarter_csv for CVS reading. Our defaults are `headers_in_file: true`, `col_sep: ","`, `row_sep: "\n"`, `quote_char: '"'`, `file_encoding: "utf-8"`
|
|
100
|
+
|
|
101
|
+
## `Metacrunch::File::CSVDestination`
|
|
102
|
+
|
|
103
|
+
This class provides a metacrunch `desination` for writing CSV files. Like the `CSVSource` this uses [smarter_csv](https://github.com/tilo/smarter_csv) under the hood.
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
# my_job.metacrunch
|
|
107
|
+
|
|
108
|
+
destination Metacrunch::File::CSVDestination.new(
|
|
109
|
+
"result.csv" # filename
|
|
110
|
+
[, OPTIONS] # options
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**Options**
|
|
115
|
+
|
|
116
|
+
* `override_existing_file`: Overrides an existing file if set to `true`. If set to `false` an error is raised if the file already exists. Defaults to `false`.
|
|
117
|
+
* `csv_options`: Set options for CSV generation as `col_sep`. Full list is [here](https://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html#class-CSV-label-Options).
|
|
100
118
|
|
|
101
119
|
## `Metacrunch::File::XLSXDestination`
|
|
102
120
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require "metacrunch/file"
|
|
2
|
+
require "smarter_csv"
|
|
3
|
+
|
|
4
|
+
module Metacrunch
|
|
5
|
+
class File::CSVDestination
|
|
6
|
+
|
|
7
|
+
DEFAULT_OPTIONS = {
|
|
8
|
+
override_existing_file: false,
|
|
9
|
+
csv_options: {}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
def initialize(filename, options = {})
|
|
13
|
+
@filename = ::File.expand_path(filename)
|
|
14
|
+
@options = DEFAULT_OPTIONS.deep_merge(options)
|
|
15
|
+
|
|
16
|
+
if ::File.exist?(@filename) && @options[:override_existing_file] == false
|
|
17
|
+
raise "File `#{@filename}` exists but `override_existing_file` option was set to `false`"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
@csv_writer = SmarterCSV::Writer.new(@filename, @options[:csv_options])
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def write(data)
|
|
24
|
+
@csv_writer << data
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def close
|
|
28
|
+
@csv_writer.finalize if @csv_writer
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -5,11 +5,13 @@ module Metacrunch
|
|
|
5
5
|
class File::CSVSource
|
|
6
6
|
|
|
7
7
|
DEFAULT_OPTIONS = {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
8
|
+
csv_options: {
|
|
9
|
+
headers_in_file: true,
|
|
10
|
+
col_sep: ",",
|
|
11
|
+
row_sep: "\n",
|
|
12
|
+
quote_char: '"',
|
|
13
|
+
file_encoding: "utf-8"
|
|
14
|
+
}
|
|
13
15
|
}
|
|
14
16
|
|
|
15
17
|
def initialize(csv_filename, options = {})
|
|
@@ -20,13 +22,7 @@ module Metacrunch
|
|
|
20
22
|
def each(&block)
|
|
21
23
|
return enum_for(__method__) unless block_given?
|
|
22
24
|
|
|
23
|
-
SmarterCSV.process(@filename,
|
|
24
|
-
headers_in_file: @options[:headers],
|
|
25
|
-
col_sep: @options[:col_sep],
|
|
26
|
-
row_sep: @options[:row_sep],
|
|
27
|
-
quote_char: @options[:quote_char],
|
|
28
|
-
file_encoding: @options[:file_encoding]
|
|
29
|
-
}) do |line|
|
|
25
|
+
SmarterCSV.process(@filename, @options[:csv_options]) do |line|
|
|
30
26
|
yield line
|
|
31
27
|
end
|
|
32
28
|
end
|
|
@@ -11,7 +11,7 @@ module Metacrunch
|
|
|
11
11
|
@filename = ::File.expand_path(filename)
|
|
12
12
|
@options = DEFAULT_OPTIONS.deep_merge(options)
|
|
13
13
|
|
|
14
|
-
if ::File.
|
|
14
|
+
if ::File.exist?(@filename) && @options[:override_existing_file] == false
|
|
15
15
|
raise "File `#{@filename}` exists but `override_existing_file` option was set to `false`"
|
|
16
16
|
end
|
|
17
17
|
|
data/lib/metacrunch/file.rb
CHANGED
data/metacrunch-file.gemspec
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: metacrunch-file
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- René Sprotte
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: activesupport
|
|
@@ -30,14 +29,14 @@ dependencies:
|
|
|
30
29
|
requirements:
|
|
31
30
|
- - "~>"
|
|
32
31
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: '
|
|
32
|
+
version: '4.0'
|
|
34
33
|
type: :runtime
|
|
35
34
|
prerelease: false
|
|
36
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
36
|
requirements:
|
|
38
37
|
- - "~>"
|
|
39
38
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: '
|
|
39
|
+
version: '4.0'
|
|
41
40
|
- !ruby/object:Gem::Dependency
|
|
42
41
|
name: smarter_csv
|
|
43
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -52,8 +51,6 @@ dependencies:
|
|
|
52
51
|
- - "~>"
|
|
53
52
|
- !ruby/object:Gem::Version
|
|
54
53
|
version: '1.2'
|
|
55
|
-
description:
|
|
56
|
-
email:
|
|
57
54
|
executables: []
|
|
58
55
|
extensions: []
|
|
59
56
|
extra_rdoc_files: []
|
|
@@ -67,6 +64,7 @@ files:
|
|
|
67
64
|
- Readme.md
|
|
68
65
|
- bin/console
|
|
69
66
|
- lib/metacrunch/file.rb
|
|
67
|
+
- lib/metacrunch/file/csv_destination.rb
|
|
70
68
|
- lib/metacrunch/file/csv_source.rb
|
|
71
69
|
- lib/metacrunch/file/destination.rb
|
|
72
70
|
- lib/metacrunch/file/entry.rb
|
|
@@ -80,7 +78,6 @@ homepage: http://github.com/ubpb/metacrunch-file
|
|
|
80
78
|
licenses:
|
|
81
79
|
- MIT
|
|
82
80
|
metadata: {}
|
|
83
|
-
post_install_message:
|
|
84
81
|
rdoc_options: []
|
|
85
82
|
require_paths:
|
|
86
83
|
- lib
|
|
@@ -95,8 +92,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
95
92
|
- !ruby/object:Gem::Version
|
|
96
93
|
version: '0'
|
|
97
94
|
requirements: []
|
|
98
|
-
rubygems_version: 3.
|
|
99
|
-
signing_key:
|
|
95
|
+
rubygems_version: 3.6.9
|
|
100
96
|
specification_version: 4
|
|
101
97
|
summary: File package for the metacrunch ETL toolkit.
|
|
102
98
|
test_files: []
|