embulk 0.8.3-java → 0.8.4-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +1 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +128 -21
- data/embulk-docs/src/built-in.rst +75 -2
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +16 -15
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.4.rst +18 -0
- data/embulk-standards/build.gradle +1 -0
- data/embulk-standards/src/main/java/org/embulk/standards/Bzip2FileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/Bzip2FileEncoderPlugin.java +67 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +3 -0
- data/lib/embulk/guess/bzip2.rb +23 -0
- data/lib/embulk/runner.rb +2 -2
- data/lib/embulk/version.rb +1 -1
- metadata +39 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3dabb8856cd4e9ad6d545a11567e80f4d9554f0
|
4
|
+
data.tar.gz: 73793784b13f37a9f1f7dd26050e60c7cd1798fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8eaa75bfa389c681008b811b0705e78dfe0d25825e09e88c2dd97f8c4924f5ee894f2c3461dff704ec25aa8e93596015edac642554b2b185c1562a43e5606f84
|
7
|
+
data.tar.gz: 9caa1141f13553a073ac3841a0cd0d463ae4ffc92810c691e5a5990a44d4b0d17b491afe11057369150ba031d8d0d6e4c176e49c2cd4a004b30da284bb38d0e6
|
data/build.gradle
CHANGED
@@ -4,6 +4,8 @@ import java.util.List;
|
|
4
4
|
import java.util.ArrayList;
|
5
5
|
import java.util.Map;
|
6
6
|
import java.util.HashMap;
|
7
|
+
import java.io.InputStream;
|
8
|
+
import java.io.Closeable;
|
7
9
|
import java.io.IOException;
|
8
10
|
import org.msgpack.value.Value;
|
9
11
|
import org.msgpack.value.ValueFactory;
|
@@ -13,6 +15,14 @@ import com.fasterxml.jackson.core.JsonToken;
|
|
13
15
|
|
14
16
|
public class JsonParser
|
15
17
|
{
|
18
|
+
public interface Stream
|
19
|
+
extends Closeable
|
20
|
+
{
|
21
|
+
Value next() throws IOException;
|
22
|
+
|
23
|
+
void close() throws IOException;
|
24
|
+
}
|
25
|
+
|
16
26
|
private final JsonFactory factory;
|
17
27
|
|
18
28
|
public JsonParser()
|
@@ -21,38 +31,136 @@ public class JsonParser
|
|
21
31
|
factory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
|
22
32
|
}
|
23
33
|
|
34
|
+
public Stream open(InputStream in) throws IOException
|
35
|
+
{
|
36
|
+
return new StreamParseContext(factory, in);
|
37
|
+
}
|
38
|
+
|
24
39
|
public Value parse(String json)
|
25
40
|
{
|
26
|
-
return new
|
41
|
+
return new SingleParseContext(factory, json).parse();
|
42
|
+
}
|
43
|
+
|
44
|
+
private static String sampleJsonString(String json)
|
45
|
+
{
|
46
|
+
if (json.length() < 100) {
|
47
|
+
return json;
|
48
|
+
}
|
49
|
+
else {
|
50
|
+
return json.substring(0, 97) + "...";
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
private static class StreamParseContext
|
55
|
+
extends AbstractParseContext
|
56
|
+
implements Stream
|
57
|
+
{
|
58
|
+
public StreamParseContext(JsonFactory factory, InputStream in)
|
59
|
+
throws IOException, JsonParseException
|
60
|
+
{
|
61
|
+
super(createParser(factory, in));
|
62
|
+
}
|
63
|
+
|
64
|
+
private static com.fasterxml.jackson.core.JsonParser createParser(JsonFactory factory, InputStream in)
|
65
|
+
throws IOException
|
66
|
+
{
|
67
|
+
try {
|
68
|
+
return factory.createParser(in);
|
69
|
+
}
|
70
|
+
catch (IOException ex) {
|
71
|
+
throw ex;
|
72
|
+
}
|
73
|
+
catch (Exception ex) {
|
74
|
+
throw new JsonParseException("Failed to parse JSON", ex);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
@Override
|
79
|
+
public void close() throws IOException
|
80
|
+
{
|
81
|
+
parser.close();
|
82
|
+
}
|
83
|
+
|
84
|
+
@Override
|
85
|
+
protected String sampleJsonString()
|
86
|
+
{
|
87
|
+
return "in";
|
88
|
+
}
|
27
89
|
}
|
28
90
|
|
29
|
-
private class
|
91
|
+
private static class SingleParseContext
|
92
|
+
extends AbstractParseContext
|
30
93
|
{
|
31
94
|
private final String json;
|
32
|
-
private final com.fasterxml.jackson.core.JsonParser parser;
|
33
95
|
|
34
|
-
public
|
96
|
+
public SingleParseContext(JsonFactory factory, String json)
|
35
97
|
{
|
98
|
+
super(createParser(factory, json));
|
36
99
|
this.json = json;
|
100
|
+
}
|
101
|
+
|
102
|
+
private static com.fasterxml.jackson.core.JsonParser createParser(JsonFactory factory, String json)
|
103
|
+
{
|
37
104
|
try {
|
38
|
-
|
105
|
+
return factory.createParser(json);
|
39
106
|
}
|
40
107
|
catch (Exception ex) {
|
41
|
-
throw new JsonParseException("Failed to parse
|
108
|
+
throw new JsonParseException("Failed to parse JSON: "+JsonParser.sampleJsonString(json), ex);
|
42
109
|
}
|
43
110
|
}
|
44
111
|
|
45
112
|
public Value parse()
|
113
|
+
{
|
114
|
+
try {
|
115
|
+
Value v = next();
|
116
|
+
if (v == null) {
|
117
|
+
throw new JsonParseException("Unable to parse empty string");
|
118
|
+
}
|
119
|
+
return v;
|
120
|
+
}
|
121
|
+
catch (IOException ex) {
|
122
|
+
throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
@Override
|
127
|
+
protected String sampleJsonString()
|
128
|
+
{
|
129
|
+
return JsonParser.sampleJsonString(json);
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
private static abstract class AbstractParseContext
|
134
|
+
{
|
135
|
+
protected final com.fasterxml.jackson.core.JsonParser parser;
|
136
|
+
|
137
|
+
public AbstractParseContext(com.fasterxml.jackson.core.JsonParser parser)
|
138
|
+
{
|
139
|
+
this.parser = parser;
|
140
|
+
}
|
141
|
+
|
142
|
+
protected abstract String sampleJsonString();
|
143
|
+
|
144
|
+
public Value next() throws IOException
|
46
145
|
{
|
47
146
|
try {
|
48
147
|
JsonToken token = parser.nextToken();
|
148
|
+
if (token == null) {
|
149
|
+
return null;
|
150
|
+
}
|
49
151
|
return jsonTokenToValue(token);
|
50
152
|
}
|
153
|
+
catch (com.fasterxml.jackson.core.JsonParseException ex) {
|
154
|
+
throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
|
155
|
+
}
|
156
|
+
catch (IOException ex) {
|
157
|
+
throw ex;
|
158
|
+
}
|
51
159
|
catch (JsonParseException ex) {
|
52
160
|
throw ex;
|
53
161
|
}
|
54
|
-
catch (
|
55
|
-
throw new JsonParseException("Failed to parse
|
162
|
+
catch (RuntimeException ex) {
|
163
|
+
throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
|
56
164
|
}
|
57
165
|
}
|
58
166
|
|
@@ -81,9 +189,12 @@ public class JsonParser
|
|
81
189
|
List<Value> list = new ArrayList<>();
|
82
190
|
while (true) {
|
83
191
|
token = parser.nextToken();
|
84
|
-
if(token == JsonToken.END_ARRAY) {
|
192
|
+
if (token == JsonToken.END_ARRAY) {
|
85
193
|
return ValueFactory.newArray(list);
|
86
194
|
}
|
195
|
+
else if (token == null) {
|
196
|
+
throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting an element of an array: " + sampleJsonString());
|
197
|
+
}
|
87
198
|
list.add(jsonTokenToValue(token));
|
88
199
|
}
|
89
200
|
}
|
@@ -94,11 +205,17 @@ public class JsonParser
|
|
94
205
|
if (token == JsonToken.END_OBJECT) {
|
95
206
|
return ValueFactory.newMap(map);
|
96
207
|
}
|
208
|
+
else if (token == null) {
|
209
|
+
throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting a key of object: " + sampleJsonString());
|
210
|
+
}
|
97
211
|
String key = parser.getCurrentName();
|
98
212
|
if (key == null) {
|
99
|
-
throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
|
213
|
+
throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation() + ": " + sampleJsonString());
|
100
214
|
}
|
101
215
|
token = parser.nextToken();
|
216
|
+
if (token == null) {
|
217
|
+
throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting a value of object: " + sampleJsonString());
|
218
|
+
}
|
102
219
|
Value value = jsonTokenToValue(token);
|
103
220
|
map.put(ValueFactory.newString(key), value);
|
104
221
|
}
|
@@ -108,18 +225,8 @@ public class JsonParser
|
|
108
225
|
case END_OBJECT:
|
109
226
|
case NOT_AVAILABLE:
|
110
227
|
default:
|
111
|
-
throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
|
228
|
+
throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation() + ": " + sampleJsonString());
|
112
229
|
}
|
113
230
|
}
|
114
231
|
}
|
115
|
-
|
116
|
-
private static String sampleJsonString(String json)
|
117
|
-
{
|
118
|
-
if (json.length() < 100) {
|
119
|
-
return json;
|
120
|
-
}
|
121
|
-
else {
|
122
|
-
return json.substring(0, 97) + "...";
|
123
|
-
}
|
124
|
-
}
|
125
232
|
}
|
@@ -44,13 +44,13 @@ A configuration file consists of following sections:
|
|
44
44
|
|
45
45
|
* **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
|
46
46
|
|
47
|
-
* **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
|
47
|
+
* **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, bzip2, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
|
48
48
|
|
49
49
|
* **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
|
50
50
|
|
51
51
|
* **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
|
52
52
|
|
53
|
-
* **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
|
53
|
+
* **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip or bzip2)
|
54
54
|
|
55
55
|
* **filters:** Filter plugins options (optional).
|
56
56
|
|
@@ -298,6 +298,27 @@ Example
|
|
298
298
|
- {type: gzip}
|
299
299
|
|
300
300
|
|
301
|
+
BZip2 decoder plugin
|
302
|
+
------------------
|
303
|
+
|
304
|
+
The ``bzip2`` decoder plugin decompresses bzip2 files before input plugins read them.
|
305
|
+
|
306
|
+
Options
|
307
|
+
~~~~~~~~~~~~~~~~~~
|
308
|
+
|
309
|
+
This plugin doesn't have any options.
|
310
|
+
|
311
|
+
Example
|
312
|
+
~~~~~~~~~~~~~~~~~~
|
313
|
+
|
314
|
+
.. code-block:: yaml
|
315
|
+
|
316
|
+
in:
|
317
|
+
...
|
318
|
+
decoders:
|
319
|
+
- {type: bzip2}
|
320
|
+
|
321
|
+
|
301
322
|
File output plugin
|
302
323
|
------------------
|
303
324
|
|
@@ -448,6 +469,58 @@ Example
|
|
448
469
|
- type: gzip
|
449
470
|
level: 1
|
450
471
|
|
472
|
+
|
473
|
+
Gzip encoder plugin
|
474
|
+
------------------
|
475
|
+
|
476
|
+
The ``gzip`` encoder plugin compresses output files using gzip.
|
477
|
+
|
478
|
+
Options
|
479
|
+
~~~~~~~~~~~~~~~~~~
|
480
|
+
|
481
|
+
+---------+----------+----------------------------------------------------------------------+--------------------+
|
482
|
+
| name | type | description | required? |
|
483
|
+
+=========+==========+======================================================================+====================+
|
484
|
+
| level | integer | Compression level. From 0 (no compression) to 9 (best compression). | ``6`` by default |
|
485
|
+
+---------+----------+----------------------------------------------------------------------+--------------------+
|
486
|
+
|
487
|
+
Example
|
488
|
+
~~~~~~~~~~~~~~~~~~
|
489
|
+
|
490
|
+
.. code-block:: yaml
|
491
|
+
|
492
|
+
out:
|
493
|
+
...
|
494
|
+
encoders:
|
495
|
+
- type: gzip
|
496
|
+
level: 1
|
497
|
+
|
498
|
+
BZip2 encoder plugin
|
499
|
+
------------------
|
500
|
+
|
501
|
+
The ``bzip2`` encoder plugin compresses output files using bzip2.
|
502
|
+
|
503
|
+
Options
|
504
|
+
~~~~~~~~~~~~~~~~~~
|
505
|
+
|
506
|
+
+---------+----------+----------------------------------------------------------------------+--------------------+
|
507
|
+
| name | type | description | required? |
|
508
|
+
+=========+==========+======================================================================+====================+
|
509
|
+
| level | integer | Compression level. From 1 to 9 (best compression). | ``9`` by default |
|
510
|
+
+---------+----------+----------------------------------------------------------------------+--------------------+
|
511
|
+
|
512
|
+
Example
|
513
|
+
~~~~~~~~~~~~~~~~~~
|
514
|
+
|
515
|
+
.. code-block:: yaml
|
516
|
+
|
517
|
+
out:
|
518
|
+
...
|
519
|
+
encoders:
|
520
|
+
- type: bzip2
|
521
|
+
level: 6
|
522
|
+
|
523
|
+
|
451
524
|
Rename filter plugin
|
452
525
|
------------------
|
453
526
|
|
@@ -24,9 +24,9 @@ For the smallest setup, you can unzip the package and run `./bin/elasticsearch`
|
|
24
24
|
|
25
25
|
.. code-block:: console
|
26
26
|
|
27
|
-
$ wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-
|
28
|
-
$ unzip elasticsearch-
|
29
|
-
$ cd elasticsearch-
|
27
|
+
$ wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/zip/elasticsearch/2.2.0/elasticsearch-2.2.0.zip
|
28
|
+
$ unzip elasticsearch-2.2.0.zip
|
29
|
+
$ cd elasticsearch-2.2.0
|
30
30
|
$ ./bin/elasticsearch
|
31
31
|
|
32
32
|
Step 2. Download and unzip Kibana:
|
@@ -36,12 +36,12 @@ You can find releases from the `Kibana website <http://www.elasticsearch.org/ove
|
|
36
36
|
|
37
37
|
.. code-block:: console
|
38
38
|
|
39
|
-
$ wget https://download.
|
40
|
-
$ tar zxvf kibana-4.
|
41
|
-
$ cd kibana-4.
|
39
|
+
$ wget https://download.elastic.co/kibana/kibana/kibana-4.4.0-linux-x64.tar.gz
|
40
|
+
$ tar zxvf kibana-4.4.0-linux-x64.tar.gz
|
41
|
+
$ cd kibana-4.4.0-linux-x64
|
42
42
|
$ ./bin/kibana
|
43
43
|
|
44
|
-
Note: If you're using Mac OS X, https://download.
|
44
|
+
Note: If you're using Mac OS X, https://download.elastic.co/kibana/kibana/kibana-4.4.0-darwin-x64.tar.gz is the URL to download.
|
45
45
|
|
46
46
|
Now Elasticsearch and Kibana started. Open http://localhost:5601/ using your browser to see the Kibana's graphical interface.
|
47
47
|
|
@@ -75,7 +75,7 @@ Loading a CSV file
|
|
75
75
|
|
76
76
|
Assuming you have a CSV files at ``./mydata/csv/`` directory. If you don't have CSV files, you can create ones using ``embulk example ./mydata`` command.
|
77
77
|
|
78
|
-
Create this configuration file and save as ``
|
78
|
+
Create this configuration file and save as ``seed.yml``:
|
79
79
|
|
80
80
|
.. code-block:: yaml
|
81
81
|
|
@@ -93,9 +93,9 @@ In fact, this configuration lacks some important information. However, embulk gu
|
|
93
93
|
|
94
94
|
.. code-block:: console
|
95
95
|
|
96
|
-
$ embulk guess
|
96
|
+
$ embulk guess ./mydata/seed.yml -o config.yml
|
97
97
|
|
98
|
-
The generated config
|
98
|
+
The generated config.yml file should include complete information as following:
|
99
99
|
|
100
100
|
.. code-block:: yaml
|
101
101
|
|
@@ -137,24 +137,25 @@ Now, you can run the bulk loading:
|
|
137
137
|
|
138
138
|
.. code-block:: console
|
139
139
|
|
140
|
-
$ embulk run config
|
140
|
+
$ embulk run config.yml -c diff.yml
|
141
141
|
|
142
142
|
Scheduling loading by cron
|
143
143
|
------------------
|
144
144
|
|
145
|
-
At the last step, you ran embulk command with ``-
|
145
|
+
At the last step, you ran embulk command with ``-c diff.yml`` file. The ``diff.yml`` file should include a parameter named ``last_path``:
|
146
146
|
|
147
147
|
.. code-block:: yaml
|
148
148
|
|
149
|
-
last_path: mydata/csv/sample_01.csv.gz
|
149
|
+
in: {last_path: mydata/csv/sample_01.csv.gz}
|
150
|
+
out: {}
|
150
151
|
|
151
152
|
With this configuration, embulk loads the files newer than this file in alphabetical order.
|
152
153
|
|
153
|
-
For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips ``sample_01.csv.gz`` file and loads ``sample_02.csv.gz`` only next time. And the next
|
154
|
+
For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips ``sample_01.csv.gz`` file and loads ``sample_02.csv.gz`` only next time. And the next ``diff.yml`` file has ``last_path: mydata/csv/sample_02.csv.gz`` for the next next execution.
|
154
155
|
|
155
156
|
So, if you want to loads newly created files every day, you can setup this cron schedule:
|
156
157
|
|
157
158
|
.. code-block:: cron
|
158
159
|
|
159
|
-
0 * * * * embulk run /path/to/
|
160
|
+
0 * * * * embulk run /path/to/config.yml -c /path/to/diff.yml
|
160
161
|
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
Release 0.8.4
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Added ``bzip2`` encoder plugin, decoder plugin, and guess plugin.
|
8
|
+
* Fixed PageBuilder to close underlaying output plugin.
|
9
|
+
* Embulk::Runner accepts .yaml as well as .yml
|
10
|
+
|
11
|
+
Java API
|
12
|
+
------------------
|
13
|
+
|
14
|
+
* Added JsonParser.Stream API for parser plugins to parse a stream of json objects.
|
15
|
+
|
16
|
+
Release Date
|
17
|
+
------------------
|
18
|
+
2016-02-16
|
@@ -0,0 +1,55 @@
|
|
1
|
+
package org.embulk.standards;
|
2
|
+
|
3
|
+
import java.io.InputStream;
|
4
|
+
import java.io.IOException;
|
5
|
+
import org.embulk.config.Task;
|
6
|
+
import org.embulk.config.TaskSource;
|
7
|
+
import org.embulk.config.ConfigSource;
|
8
|
+
import org.embulk.config.ConfigInject;
|
9
|
+
import org.embulk.spi.DecoderPlugin;
|
10
|
+
import org.embulk.spi.BufferAllocator;
|
11
|
+
import org.embulk.spi.FileInput;
|
12
|
+
import org.embulk.spi.util.FileInputInputStream;
|
13
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
14
|
+
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
|
15
|
+
|
16
|
+
public class Bzip2FileDecoderPlugin
|
17
|
+
implements DecoderPlugin
|
18
|
+
{
|
19
|
+
public interface PluginTask
|
20
|
+
extends Task
|
21
|
+
{
|
22
|
+
@ConfigInject
|
23
|
+
BufferAllocator getBufferAllocator();
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public void transaction(ConfigSource config, DecoderPlugin.Control control)
|
28
|
+
{
|
29
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
30
|
+
control.run(task.dump());
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public FileInput open(TaskSource taskSource, FileInput fileInput)
|
35
|
+
{
|
36
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
37
|
+
final FileInputInputStream files = new FileInputInputStream(fileInput);
|
38
|
+
return new InputStreamFileInput(
|
39
|
+
task.getBufferAllocator(),
|
40
|
+
new InputStreamFileInput.Provider() {
|
41
|
+
public InputStream openNext() throws IOException
|
42
|
+
{
|
43
|
+
if (!files.nextFile()) {
|
44
|
+
return null;
|
45
|
+
}
|
46
|
+
return new BZip2CompressorInputStream(files, true);
|
47
|
+
}
|
48
|
+
|
49
|
+
public void close() throws IOException
|
50
|
+
{
|
51
|
+
files.close();
|
52
|
+
}
|
53
|
+
});
|
54
|
+
}
|
55
|
+
}
|
@@ -0,0 +1,67 @@
|
|
1
|
+
package org.embulk.standards;
|
2
|
+
|
3
|
+
import java.io.OutputStream;
|
4
|
+
import java.io.IOException;
|
5
|
+
import javax.validation.constraints.Min;
|
6
|
+
import javax.validation.constraints.Max;
|
7
|
+
import org.embulk.config.Task;
|
8
|
+
import org.embulk.config.Config;
|
9
|
+
import org.embulk.config.ConfigInject;
|
10
|
+
import org.embulk.config.ConfigDefault;
|
11
|
+
import org.embulk.config.TaskSource;
|
12
|
+
import org.embulk.config.ConfigSource;
|
13
|
+
import org.embulk.spi.EncoderPlugin;
|
14
|
+
import org.embulk.spi.FileOutput;
|
15
|
+
import org.embulk.spi.BufferAllocator;
|
16
|
+
import org.embulk.spi.util.FileOutputOutputStream;
|
17
|
+
import org.embulk.spi.util.OutputStreamFileOutput;
|
18
|
+
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
|
19
|
+
|
20
|
+
public class Bzip2FileEncoderPlugin
|
21
|
+
implements EncoderPlugin
|
22
|
+
{
|
23
|
+
public interface PluginTask
|
24
|
+
extends Task
|
25
|
+
{
|
26
|
+
@Config("level")
|
27
|
+
@ConfigDefault("9")
|
28
|
+
@Min(1)
|
29
|
+
@Max(9)
|
30
|
+
int getLevel();
|
31
|
+
|
32
|
+
@ConfigInject
|
33
|
+
BufferAllocator getBufferAllocator();
|
34
|
+
}
|
35
|
+
|
36
|
+
public void transaction(ConfigSource config, EncoderPlugin.Control control)
|
37
|
+
{
|
38
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
39
|
+
control.run(task.dump());
|
40
|
+
}
|
41
|
+
|
42
|
+
@Override
|
43
|
+
public FileOutput open(TaskSource taskSource, final FileOutput fileOutput)
|
44
|
+
{
|
45
|
+
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
46
|
+
|
47
|
+
final FileOutputOutputStream output = new FileOutputOutputStream(fileOutput, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.FLUSH);
|
48
|
+
|
49
|
+
return new OutputStreamFileOutput(new OutputStreamFileOutput.Provider() {
|
50
|
+
public OutputStream openNext() throws IOException
|
51
|
+
{
|
52
|
+
output.nextFile();
|
53
|
+
return new BZip2CompressorOutputStream(output, task.getLevel());
|
54
|
+
}
|
55
|
+
|
56
|
+
public void finish() throws IOException
|
57
|
+
{
|
58
|
+
fileOutput.finish();
|
59
|
+
}
|
60
|
+
|
61
|
+
public void close() throws IOException
|
62
|
+
{
|
63
|
+
fileOutput.close();
|
64
|
+
}
|
65
|
+
});
|
66
|
+
}
|
67
|
+
}
|
@@ -30,6 +30,7 @@ public class StandardPluginModule
|
|
30
30
|
|
31
31
|
// file decoder plugins
|
32
32
|
registerPluginTo(binder, DecoderPlugin.class, "gzip", GzipFileDecoderPlugin.class);
|
33
|
+
registerPluginTo(binder, DecoderPlugin.class, "bzip2", Bzip2FileDecoderPlugin.class);
|
33
34
|
|
34
35
|
// output plugins
|
35
36
|
registerPluginTo(binder, OutputPlugin.class, "file", LocalFileOutputPlugin.class);
|
@@ -41,12 +42,14 @@ public class StandardPluginModule
|
|
41
42
|
|
42
43
|
// file encoder plugins
|
43
44
|
registerPluginTo(binder, EncoderPlugin.class, "gzip", GzipFileEncoderPlugin.class);
|
45
|
+
registerPluginTo(binder, EncoderPlugin.class, "bzip2", Bzip2FileEncoderPlugin.class);
|
44
46
|
|
45
47
|
// filter plugins
|
46
48
|
registerPluginTo(binder, FilterPlugin.class, "rename", RenameFilterPlugin.class);
|
47
49
|
|
48
50
|
// default guess plugins
|
49
51
|
registerDefaultGuessPluginTo(binder, new PluginType("gzip"));
|
52
|
+
registerDefaultGuessPluginTo(binder, new PluginType("bzip2"));
|
50
53
|
registerDefaultGuessPluginTo(binder, new PluginType("csv"));
|
51
54
|
// charset and newline guess plugins are loaded and invoked by CsvGuessPlugin
|
52
55
|
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Guess
|
3
|
+
|
4
|
+
class Bzip2GuessPlugin < GuessPlugin
|
5
|
+
Plugin.register_guess('bzip2', self)
|
6
|
+
|
7
|
+
# magic: BZ
|
8
|
+
# version: 'h' = bzip2
|
9
|
+
# blocksize: 1 .. 9
|
10
|
+
# block magic: 0x314159265359 (6 bytes)
|
11
|
+
block_magic = [0x31, 0x41, 0x59, 0x26, 0x53, 0x59].pack('C*')
|
12
|
+
BZIP2_HEADER_PATTERN = /BZh[1-9]#{Regexp.quote(block_magic)}/n
|
13
|
+
|
14
|
+
def guess(config, sample_buffer)
|
15
|
+
if sample_buffer[0,10] =~ BZIP2_HEADER_PATTERN
|
16
|
+
return {"decoders" => [{"type" => "bzip2"}]}
|
17
|
+
end
|
18
|
+
return {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
data/lib/embulk/runner.rb
CHANGED
@@ -122,12 +122,12 @@ module Embulk
|
|
122
122
|
case config
|
123
123
|
when String
|
124
124
|
case config
|
125
|
-
when /\.
|
125
|
+
when /\.ya?ml\.liquid$/
|
126
126
|
require 'liquid'
|
127
127
|
template_params = options[:template_params] || {}
|
128
128
|
template_include_path = File.expand_path(options[:template_include_path] || File.dirname(config)) unless options[:template_include_path] == false
|
129
129
|
@embed.newConfigLoader.fromYamlString run_liquid(File.read(config), template_params, template_include_path)
|
130
|
-
when /\.
|
130
|
+
when /\.ya?ml$/
|
131
131
|
@embed.newConfigLoader.fromYamlString File.read(config)
|
132
132
|
else
|
133
133
|
raise ConfigError.new("Unsupported file extension. Supported file extensions are .yml and .yml.liquid: #{config}")
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,127 +1,127 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.4
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 1.10.6
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ">="
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: 1.10.6
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :runtime
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: msgpack
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- - "
|
24
|
+
- - ">="
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
26
|
+
version: 1.10.6
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - "~>"
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: 0.7.3
|
33
|
+
name: msgpack
|
39
34
|
prerelease: false
|
40
35
|
type: :runtime
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: liquid
|
43
36
|
version_requirements: !ruby/object:Gem::Requirement
|
44
37
|
requirements:
|
45
38
|
- - "~>"
|
46
39
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
40
|
+
version: 0.7.3
|
41
|
+
- !ruby/object:Gem::Dependency
|
48
42
|
requirement: !ruby/object:Gem::Requirement
|
49
43
|
requirements:
|
50
44
|
- - "~>"
|
51
45
|
- !ruby/object:Gem::Version
|
52
46
|
version: 3.0.6
|
47
|
+
name: liquid
|
53
48
|
prerelease: false
|
54
49
|
type: :runtime
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rjack-icu
|
57
50
|
version_requirements: !ruby/object:Gem::Requirement
|
58
51
|
requirements:
|
59
52
|
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
54
|
+
version: 3.0.6
|
55
|
+
- !ruby/object:Gem::Dependency
|
62
56
|
requirement: !ruby/object:Gem::Requirement
|
63
57
|
requirements:
|
64
58
|
- - "~>"
|
65
59
|
- !ruby/object:Gem::Version
|
66
60
|
version: 4.54.1.1
|
61
|
+
name: rjack-icu
|
67
62
|
prerelease: false
|
68
63
|
type: :runtime
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rake
|
71
64
|
version_requirements: !ruby/object:Gem::Requirement
|
72
65
|
requirements:
|
73
|
-
- - "
|
66
|
+
- - "~>"
|
74
67
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
68
|
+
version: 4.54.1.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
76
70
|
requirement: !ruby/object:Gem::Requirement
|
77
71
|
requirements:
|
78
72
|
- - ">="
|
79
73
|
- !ruby/object:Gem::Version
|
80
74
|
version: 0.10.0
|
75
|
+
name: rake
|
81
76
|
prerelease: false
|
82
77
|
type: :development
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: test-unit
|
85
78
|
version_requirements: !ruby/object:Gem::Requirement
|
86
79
|
requirements:
|
87
|
-
- - "
|
80
|
+
- - ">="
|
88
81
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
82
|
+
version: 0.10.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
90
84
|
requirement: !ruby/object:Gem::Requirement
|
91
85
|
requirements:
|
92
86
|
- - "~>"
|
93
87
|
- !ruby/object:Gem::Version
|
94
88
|
version: 3.0.9
|
89
|
+
name: test-unit
|
95
90
|
prerelease: false
|
96
91
|
type: :development
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: yard
|
99
92
|
version_requirements: !ruby/object:Gem::Requirement
|
100
93
|
requirements:
|
101
94
|
- - "~>"
|
102
95
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.
|
96
|
+
version: 3.0.9
|
97
|
+
- !ruby/object:Gem::Dependency
|
104
98
|
requirement: !ruby/object:Gem::Requirement
|
105
99
|
requirements:
|
106
100
|
- - "~>"
|
107
101
|
- !ruby/object:Gem::Version
|
108
102
|
version: 0.8.7
|
103
|
+
name: yard
|
109
104
|
prerelease: false
|
110
105
|
type: :development
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: kramdown
|
113
106
|
version_requirements: !ruby/object:Gem::Requirement
|
114
107
|
requirements:
|
115
108
|
- - "~>"
|
116
109
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
110
|
+
version: 0.8.7
|
111
|
+
- !ruby/object:Gem::Dependency
|
118
112
|
requirement: !ruby/object:Gem::Requirement
|
119
113
|
requirements:
|
120
114
|
- - "~>"
|
121
115
|
- !ruby/object:Gem::Version
|
122
116
|
version: 1.5.0
|
117
|
+
name: kramdown
|
123
118
|
prerelease: false
|
124
119
|
type: :development
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.5.0
|
125
125
|
description: Embulk is an open-source, plugin-based bulk data loader to scale and simplify data management across heterogeneous data stores. It can collect and ship any kinds of data in high throughput with transaction control.
|
126
126
|
email:
|
127
127
|
- frsyuki@gmail.com
|
@@ -146,9 +146,10 @@ files:
|
|
146
146
|
- classpath/bval-core-0.5.jar
|
147
147
|
- classpath/bval-jsr303-0.5.jar
|
148
148
|
- classpath/commons-beanutils-core-1.8.3.jar
|
149
|
+
- classpath/commons-compress-1.10.jar
|
149
150
|
- classpath/commons-lang3-3.1.jar
|
150
|
-
- classpath/embulk-core-0.8.
|
151
|
-
- classpath/embulk-standards-0.8.
|
151
|
+
- classpath/embulk-core-0.8.4.jar
|
152
|
+
- classpath/embulk-standards-0.8.4.jar
|
152
153
|
- classpath/guava-18.0.jar
|
153
154
|
- classpath/guice-4.0.jar
|
154
155
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -462,7 +463,10 @@ files:
|
|
462
463
|
- embulk-docs/src/release/release-0.8.1.rst
|
463
464
|
- embulk-docs/src/release/release-0.8.2.rst
|
464
465
|
- embulk-docs/src/release/release-0.8.3.rst
|
466
|
+
- embulk-docs/src/release/release-0.8.4.rst
|
465
467
|
- embulk-standards/build.gradle
|
468
|
+
- embulk-standards/src/main/java/org/embulk/standards/Bzip2FileDecoderPlugin.java
|
469
|
+
- embulk-standards/src/main/java/org/embulk/standards/Bzip2FileEncoderPlugin.java
|
466
470
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
467
471
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
468
472
|
- embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java
|
@@ -547,6 +551,7 @@ files:
|
|
547
551
|
- lib/embulk/file_output_plugin.rb
|
548
552
|
- lib/embulk/filter_plugin.rb
|
549
553
|
- lib/embulk/formatter_plugin.rb
|
554
|
+
- lib/embulk/guess/bzip2.rb
|
550
555
|
- lib/embulk/guess/charset.rb
|
551
556
|
- lib/embulk/guess/csv.rb
|
552
557
|
- lib/embulk/guess/gzip.rb
|