metacrunch-file 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Readme.md +12 -13
- data/lib/metacrunch/file/csv_destination.rb +6 -23
- data/lib/metacrunch/file/csv_source.rb +8 -12
- data/lib/metacrunch/file/file_destination.rb +1 -1
- data/lib/metacrunch/file/version.rb +1 -1
- data/metacrunch-file.gemspec +1 -1
- metadata +5 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6fbb7eafc182c80e2bbb2a1c63b57e02824df6739a0f123c32d609997ceaaf9c
|
|
4
|
+
data.tar.gz: ee41b433ea1ea30109074c2043213ff928baa2fd516c735545392294dc3fa800
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7e26d47271466d6df0ec80c80903741895529a037e1bf1d8b03d751effba20a7f292cea7cc704856b7d5cb4bc1eabe2f0f493bde32eca07e22595bb43c63b495
|
|
7
|
+
data.tar.gz: f9dadce5be892b1f3fec41e3490b06d7b5826964132ff9b2e1c3077f998823e51d8347a426b75c712e434f3ae6a3f92fd29c0db4a66ca3323414c2cdfe3e16e8
|
data/Readme.md
CHANGED
|
@@ -6,7 +6,7 @@ metacrunch-file
|
|
|
6
6
|
[](https://codeclimate.com/github/ubpb/metacrunch-file/coverage)
|
|
7
7
|
[](https://circleci.com/gh/ubpb/metacrunch-file)
|
|
8
8
|
|
|
9
|
-
This is the official file package for the [metacrunch ETL toolkit](https://github.com/ubpb/metacrunch).
|
|
9
|
+
This is the official file package for the [metacrunch ETL toolkit](https://github.com/ubpb/metacrunch).
|
|
10
10
|
|
|
11
11
|
*Note: For working examples on how to use this package check out our [demo repository](https://github.com/ubpb/metacrunch-demo).*
|
|
12
12
|
|
|
@@ -52,7 +52,7 @@ source Metacrunch::File::FileSource.new(["my-data.xml", "my-other-data.xml", "..
|
|
|
52
52
|
|
|
53
53
|
NONE.
|
|
54
54
|
|
|
55
|
-
The source yields objects of type `Metacrunch::File::Entry` for every file it reads.
|
|
55
|
+
The source yields objects of type `Metacrunch::File::Entry` for every file it reads.
|
|
56
56
|
|
|
57
57
|
```ruby
|
|
58
58
|
# my_job.metacrunch
|
|
@@ -82,33 +82,32 @@ destination Metacrunch::File::FileDestination.new("/tmp/my-data.txt" [, OPTIONS]
|
|
|
82
82
|
|
|
83
83
|
## `Metacrunch::File::CSVSource`
|
|
84
84
|
|
|
85
|
-
This class provides a metacrunch `source` for reading CSV files. It is a simple wrapper around [smarter_csv](https://github.com/tilo/smarter_csv) gem.
|
|
85
|
+
This class provides a metacrunch `source` for reading CSV files. It is a simple wrapper around [smarter_csv](https://github.com/tilo/smarter_csv) gem.
|
|
86
86
|
|
|
87
87
|
```ruby
|
|
88
88
|
# my_job.metacrunch
|
|
89
89
|
|
|
90
|
-
source Metacrunch::File::CSVSource.new(
|
|
90
|
+
source Metacrunch::File::CSVSource.new(
|
|
91
|
+
"source.csv" # filename
|
|
92
|
+
[, OPTIONS]) # options
|
|
91
93
|
```
|
|
92
94
|
|
|
93
95
|
**Options**
|
|
94
96
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
* `
|
|
98
|
-
* `quote_char`: Quotation character. Defaults to `"`.
|
|
99
|
-
* `file_encoding`: Set the file encoding. Defaults to `utf-8`.
|
|
97
|
+
Using the `options` argument you can pass any [CSV reading option supported by smarter_csv](https://github.com/tilo/smarter_csv/blob/main/docs/options.md#csv-reading) using the key `csv_options`.
|
|
98
|
+
|
|
99
|
+
* `csv_options`: Hash with any option supported by smarter_csv for CVS reading. Our defaults are `headers_in_file: true`, `col_sep: ","`, `row_sep: "\n"`, `quote_char: '"'`, `file_encoding: "utf-8"`
|
|
100
100
|
|
|
101
101
|
## `Metacrunch::File::CSVDestination`
|
|
102
102
|
|
|
103
|
-
This class provides a metacrunch `desination` for writing CSV files.
|
|
103
|
+
This class provides a metacrunch `desination` for writing CSV files. Like the `CSVSource` this uses [smarter_csv](https://github.com/tilo/smarter_csv) under the hood.
|
|
104
104
|
|
|
105
105
|
```ruby
|
|
106
106
|
# my_job.metacrunch
|
|
107
107
|
|
|
108
108
|
destination Metacrunch::File::CSVDestination.new(
|
|
109
|
-
"result.csv"
|
|
110
|
-
[
|
|
111
|
-
[, OPTIONS]
|
|
109
|
+
"result.csv" # filename
|
|
110
|
+
[, OPTIONS] # options
|
|
112
111
|
)
|
|
113
112
|
```
|
|
114
113
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require "metacrunch/file"
|
|
2
|
+
require "smarter_csv"
|
|
2
3
|
|
|
3
4
|
module Metacrunch
|
|
4
5
|
class File::CSVDestination
|
|
@@ -8,41 +9,23 @@ module Metacrunch
|
|
|
8
9
|
csv_options: {}
|
|
9
10
|
}
|
|
10
11
|
|
|
11
|
-
def initialize(filename,
|
|
12
|
+
def initialize(filename, options = {})
|
|
12
13
|
@filename = ::File.expand_path(filename)
|
|
13
|
-
@headers = headers
|
|
14
14
|
@options = DEFAULT_OPTIONS.deep_merge(options)
|
|
15
15
|
|
|
16
|
-
if ::File.
|
|
16
|
+
if ::File.exist?(@filename) && @options[:override_existing_file] == false
|
|
17
17
|
raise "File `#{@filename}` exists but `override_existing_file` option was set to `false`"
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
-
@
|
|
21
|
-
|
|
22
|
-
if @headers.present?
|
|
23
|
-
raise ArgumentError, "Headers must be an Array" unless @headers.is_a?(Array)
|
|
24
|
-
csv_str = CSV.generate_line(@headers, **@options[:csv_options])
|
|
25
|
-
@file.write(csv_str)
|
|
26
|
-
end
|
|
20
|
+
@csv_writer = SmarterCSV::Writer.new(@filename, @options[:csv_options])
|
|
27
21
|
end
|
|
28
22
|
|
|
29
23
|
def write(data)
|
|
30
|
-
|
|
31
|
-
raise ArgumentError, "Data must be an Array" unless data.is_a?(Array)
|
|
32
|
-
|
|
33
|
-
if data.first.is_a?(Array)
|
|
34
|
-
data.each do |d|
|
|
35
|
-
csv_str = CSV.generate_line(d, **@options[:csv_options])
|
|
36
|
-
@file.write(csv_str)
|
|
37
|
-
end
|
|
38
|
-
else
|
|
39
|
-
csv_str = CSV.generate_line(data, **@options[:csv_options])
|
|
40
|
-
@file.write(csv_str)
|
|
41
|
-
end
|
|
24
|
+
@csv_writer << data
|
|
42
25
|
end
|
|
43
26
|
|
|
44
27
|
def close
|
|
45
|
-
@
|
|
28
|
+
@csv_writer.finalize if @csv_writer
|
|
46
29
|
end
|
|
47
30
|
|
|
48
31
|
end
|
|
@@ -5,11 +5,13 @@ module Metacrunch
|
|
|
5
5
|
class File::CSVSource
|
|
6
6
|
|
|
7
7
|
DEFAULT_OPTIONS = {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
8
|
+
csv_options: {
|
|
9
|
+
headers_in_file: true,
|
|
10
|
+
col_sep: ",",
|
|
11
|
+
row_sep: "\n",
|
|
12
|
+
quote_char: '"',
|
|
13
|
+
file_encoding: "utf-8"
|
|
14
|
+
}
|
|
13
15
|
}
|
|
14
16
|
|
|
15
17
|
def initialize(csv_filename, options = {})
|
|
@@ -20,13 +22,7 @@ module Metacrunch
|
|
|
20
22
|
def each(&block)
|
|
21
23
|
return enum_for(__method__) unless block_given?
|
|
22
24
|
|
|
23
|
-
SmarterCSV.process(@filename,
|
|
24
|
-
headers_in_file: @options[:headers],
|
|
25
|
-
col_sep: @options[:col_sep],
|
|
26
|
-
row_sep: @options[:row_sep],
|
|
27
|
-
quote_char: @options[:quote_char],
|
|
28
|
-
file_encoding: @options[:file_encoding]
|
|
29
|
-
}) do |line|
|
|
25
|
+
SmarterCSV.process(@filename, @options[:csv_options]) do |line|
|
|
30
26
|
yield line
|
|
31
27
|
end
|
|
32
28
|
end
|
|
@@ -11,7 +11,7 @@ module Metacrunch
|
|
|
11
11
|
@filename = ::File.expand_path(filename)
|
|
12
12
|
@options = DEFAULT_OPTIONS.deep_merge(options)
|
|
13
13
|
|
|
14
|
-
if ::File.
|
|
14
|
+
if ::File.exist?(@filename) && @options[:override_existing_file] == false
|
|
15
15
|
raise "File `#{@filename}` exists but `override_existing_file` option was set to `false`"
|
|
16
16
|
end
|
|
17
17
|
|
data/metacrunch-file.gemspec
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: metacrunch-file
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- René Sprotte
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: activesupport
|
|
@@ -30,14 +29,14 @@ dependencies:
|
|
|
30
29
|
requirements:
|
|
31
30
|
- - "~>"
|
|
32
31
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: '
|
|
32
|
+
version: '4.0'
|
|
34
33
|
type: :runtime
|
|
35
34
|
prerelease: false
|
|
36
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
36
|
requirements:
|
|
38
37
|
- - "~>"
|
|
39
38
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: '
|
|
39
|
+
version: '4.0'
|
|
41
40
|
- !ruby/object:Gem::Dependency
|
|
42
41
|
name: smarter_csv
|
|
43
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -52,8 +51,6 @@ dependencies:
|
|
|
52
51
|
- - "~>"
|
|
53
52
|
- !ruby/object:Gem::Version
|
|
54
53
|
version: '1.2'
|
|
55
|
-
description:
|
|
56
|
-
email:
|
|
57
54
|
executables: []
|
|
58
55
|
extensions: []
|
|
59
56
|
extra_rdoc_files: []
|
|
@@ -81,7 +78,6 @@ homepage: http://github.com/ubpb/metacrunch-file
|
|
|
81
78
|
licenses:
|
|
82
79
|
- MIT
|
|
83
80
|
metadata: {}
|
|
84
|
-
post_install_message:
|
|
85
81
|
rdoc_options: []
|
|
86
82
|
require_paths:
|
|
87
83
|
- lib
|
|
@@ -96,8 +92,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
96
92
|
- !ruby/object:Gem::Version
|
|
97
93
|
version: '0'
|
|
98
94
|
requirements: []
|
|
99
|
-
rubygems_version: 3.
|
|
100
|
-
signing_key:
|
|
95
|
+
rubygems_version: 3.6.9
|
|
101
96
|
specification_version: 4
|
|
102
97
|
summary: File package for the metacrunch ETL toolkit.
|
|
103
98
|
test_files: []
|