embulk 0.8.3-java → 0.8.4-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 956d386bd5ac502aba7c83bc7737405fc79b67d9
4
- data.tar.gz: 1c2ecba6b13fce3d5befbfdd58a0eb354d30eb52
3
+ metadata.gz: e3dabb8856cd4e9ad6d545a11567e80f4d9554f0
4
+ data.tar.gz: 73793784b13f37a9f1f7dd26050e60c7cd1798fe
5
5
  SHA512:
6
- metadata.gz: 354abced4dc4a3e0c99544a0b669e53c37e0e7c3b66065697c4997b9cc9c98e7f355dfd6fe507a3e4a9ae6dccbdeb3ea0ff37e53261589a65576ec1dc710edcd
7
- data.tar.gz: 51c32feb1a486d4873ed4cc5efbbeda83aff46d2f5bdc653ec51aaeba5f876e0b3017fc965daa2234fac404b992bd3cbec3b2c439554ad6fee3bbf81205c1f22
6
+ metadata.gz: 8eaa75bfa389c681008b811b0705e78dfe0d25825e09e88c2dd97f8c4924f5ee894f2c3461dff704ec25aa8e93596015edac642554b2b185c1562a43e5606f84
7
+ data.tar.gz: 9caa1141f13553a073ac3841a0cd0d463ae4ffc92810c691e5a5990a44d4b0d17b491afe11057369150ba031d8d0d6e4c176e49c2cd4a004b30da284bb38d0e6
@@ -16,7 +16,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.8.3'
19
+ version = '0.8.4'
20
20
 
21
21
  ext {
22
22
  jrubyVersion = '9.0.4.0'
@@ -252,6 +252,7 @@ public class PageBuilder
252
252
  buffer = null;
253
253
  bufferSlice = null;
254
254
  }
255
+ output.close();
255
256
  }
256
257
 
257
258
  /* TODO for variable-length types
@@ -4,6 +4,8 @@ import java.util.List;
4
4
  import java.util.ArrayList;
5
5
  import java.util.Map;
6
6
  import java.util.HashMap;
7
+ import java.io.InputStream;
8
+ import java.io.Closeable;
7
9
  import java.io.IOException;
8
10
  import org.msgpack.value.Value;
9
11
  import org.msgpack.value.ValueFactory;
@@ -13,6 +15,14 @@ import com.fasterxml.jackson.core.JsonToken;
13
15
 
14
16
  public class JsonParser
15
17
  {
18
+ public interface Stream
19
+ extends Closeable
20
+ {
21
+ Value next() throws IOException;
22
+
23
+ void close() throws IOException;
24
+ }
25
+
16
26
  private final JsonFactory factory;
17
27
 
18
28
  public JsonParser()
@@ -21,38 +31,136 @@ public class JsonParser
21
31
  factory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
22
32
  }
23
33
 
34
+ public Stream open(InputStream in) throws IOException
35
+ {
36
+ return new StreamParseContext(factory, in);
37
+ }
38
+
24
39
  public Value parse(String json)
25
40
  {
26
- return new ParseContext(json).parse();
41
+ return new SingleParseContext(factory, json).parse();
42
+ }
43
+
44
+ private static String sampleJsonString(String json)
45
+ {
46
+ if (json.length() < 100) {
47
+ return json;
48
+ }
49
+ else {
50
+ return json.substring(0, 97) + "...";
51
+ }
52
+ }
53
+
54
+ private static class StreamParseContext
55
+ extends AbstractParseContext
56
+ implements Stream
57
+ {
58
+ public StreamParseContext(JsonFactory factory, InputStream in)
59
+ throws IOException, JsonParseException
60
+ {
61
+ super(createParser(factory, in));
62
+ }
63
+
64
+ private static com.fasterxml.jackson.core.JsonParser createParser(JsonFactory factory, InputStream in)
65
+ throws IOException
66
+ {
67
+ try {
68
+ return factory.createParser(in);
69
+ }
70
+ catch (IOException ex) {
71
+ throw ex;
72
+ }
73
+ catch (Exception ex) {
74
+ throw new JsonParseException("Failed to parse JSON", ex);
75
+ }
76
+ }
77
+
78
+ @Override
79
+ public void close() throws IOException
80
+ {
81
+ parser.close();
82
+ }
83
+
84
+ @Override
85
+ protected String sampleJsonString()
86
+ {
87
+ return "in";
88
+ }
27
89
  }
28
90
 
29
- private class ParseContext
91
+ private static class SingleParseContext
92
+ extends AbstractParseContext
30
93
  {
31
94
  private final String json;
32
- private final com.fasterxml.jackson.core.JsonParser parser;
33
95
 
34
- public ParseContext(String json)
96
+ public SingleParseContext(JsonFactory factory, String json)
35
97
  {
98
+ super(createParser(factory, json));
36
99
  this.json = json;
100
+ }
101
+
102
+ private static com.fasterxml.jackson.core.JsonParser createParser(JsonFactory factory, String json)
103
+ {
37
104
  try {
38
- this.parser = factory.createParser(json);
105
+ return factory.createParser(json);
39
106
  }
40
107
  catch (Exception ex) {
41
- throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
108
+ throw new JsonParseException("Failed to parse JSON: "+JsonParser.sampleJsonString(json), ex);
42
109
  }
43
110
  }
44
111
 
45
112
  public Value parse()
113
+ {
114
+ try {
115
+ Value v = next();
116
+ if (v == null) {
117
+ throw new JsonParseException("Unable to parse empty string");
118
+ }
119
+ return v;
120
+ }
121
+ catch (IOException ex) {
122
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
123
+ }
124
+ }
125
+
126
+ @Override
127
+ protected String sampleJsonString()
128
+ {
129
+ return JsonParser.sampleJsonString(json);
130
+ }
131
+ }
132
+
133
+ private static abstract class AbstractParseContext
134
+ {
135
+ protected final com.fasterxml.jackson.core.JsonParser parser;
136
+
137
+ public AbstractParseContext(com.fasterxml.jackson.core.JsonParser parser)
138
+ {
139
+ this.parser = parser;
140
+ }
141
+
142
+ protected abstract String sampleJsonString();
143
+
144
+ public Value next() throws IOException
46
145
  {
47
146
  try {
48
147
  JsonToken token = parser.nextToken();
148
+ if (token == null) {
149
+ return null;
150
+ }
49
151
  return jsonTokenToValue(token);
50
152
  }
153
+ catch (com.fasterxml.jackson.core.JsonParseException ex) {
154
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
155
+ }
156
+ catch (IOException ex) {
157
+ throw ex;
158
+ }
51
159
  catch (JsonParseException ex) {
52
160
  throw ex;
53
161
  }
54
- catch (Exception ex) {
55
- throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
162
+ catch (RuntimeException ex) {
163
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
56
164
  }
57
165
  }
58
166
 
@@ -81,9 +189,12 @@ public class JsonParser
81
189
  List<Value> list = new ArrayList<>();
82
190
  while (true) {
83
191
  token = parser.nextToken();
84
- if(token == JsonToken.END_ARRAY) {
192
+ if (token == JsonToken.END_ARRAY) {
85
193
  return ValueFactory.newArray(list);
86
194
  }
195
+ else if (token == null) {
196
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting an element of an array: " + sampleJsonString());
197
+ }
87
198
  list.add(jsonTokenToValue(token));
88
199
  }
89
200
  }
@@ -94,11 +205,17 @@ public class JsonParser
94
205
  if (token == JsonToken.END_OBJECT) {
95
206
  return ValueFactory.newMap(map);
96
207
  }
208
+ else if (token == null) {
209
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting a key of object: " + sampleJsonString());
210
+ }
97
211
  String key = parser.getCurrentName();
98
212
  if (key == null) {
99
- throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
213
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation() + ": " + sampleJsonString());
100
214
  }
101
215
  token = parser.nextToken();
216
+ if (token == null) {
217
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting a value of object: " + sampleJsonString());
218
+ }
102
219
  Value value = jsonTokenToValue(token);
103
220
  map.put(ValueFactory.newString(key), value);
104
221
  }
@@ -108,18 +225,8 @@ public class JsonParser
108
225
  case END_OBJECT:
109
226
  case NOT_AVAILABLE:
110
227
  default:
111
- throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
228
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation() + ": " + sampleJsonString());
112
229
  }
113
230
  }
114
231
  }
115
-
116
- private static String sampleJsonString(String json)
117
- {
118
- if (json.length() < 100) {
119
- return json;
120
- }
121
- else {
122
- return json.substring(0, 97) + "...";
123
- }
124
- }
125
232
  }
@@ -44,13 +44,13 @@ A configuration file consists of following sections:
44
44
 
45
45
  * **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
46
46
 
47
- * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
47
+ * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, bzip2, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
48
48
 
49
49
  * **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
50
50
 
51
51
  * **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
52
52
 
53
- * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
53
+ * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip or bzip2)
54
54
 
55
55
  * **filters:** Filter plugins options (optional).
56
56
 
@@ -298,6 +298,27 @@ Example
298
298
  - {type: gzip}
299
299
 
300
300
 
301
+ BZip2 decoder plugin
302
+ ------------------
303
+
304
+ The ``bzip2`` decoder plugin decompresses bzip2 files before input plugins read them.
305
+
306
+ Options
307
+ ~~~~~~~~~~~~~~~~~~
308
+
309
+ This plugin doesn't have any options.
310
+
311
+ Example
312
+ ~~~~~~~~~~~~~~~~~~
313
+
314
+ .. code-block:: yaml
315
+
316
+ in:
317
+ ...
318
+ decoders:
319
+ - {type: bzip2}
320
+
321
+
301
322
  File output plugin
302
323
  ------------------
303
324
 
@@ -448,6 +469,58 @@ Example
448
469
  - type: gzip
449
470
  level: 1
450
471
 
472
+
473
+ Gzip encoder plugin
474
+ ------------------
475
+
476
+ The ``gzip`` encoder plugin compresses output files using gzip.
477
+
478
+ Options
479
+ ~~~~~~~~~~~~~~~~~~
480
+
481
+ +---------+----------+----------------------------------------------------------------------+--------------------+
482
+ | name | type | description | required? |
483
+ +=========+==========+======================================================================+====================+
484
+ | level | integer | Compression level. From 0 (no compression) to 9 (best compression). | ``6`` by default |
485
+ +---------+----------+----------------------------------------------------------------------+--------------------+
486
+
487
+ Example
488
+ ~~~~~~~~~~~~~~~~~~
489
+
490
+ .. code-block:: yaml
491
+
492
+ out:
493
+ ...
494
+ encoders:
495
+ - type: gzip
496
+ level: 1
497
+
498
+ BZip2 encoder plugin
499
+ ------------------
500
+
501
+ The ``bzip2`` encoder plugin compresses output files using bzip2.
502
+
503
+ Options
504
+ ~~~~~~~~~~~~~~~~~~
505
+
506
+ +---------+----------+----------------------------------------------------------------------+--------------------+
507
+ | name | type | description | required? |
508
+ +=========+==========+======================================================================+====================+
509
+ | level | integer | Compression level. From 1 to 9 (best compression). | ``9`` by default |
510
+ +---------+----------+----------------------------------------------------------------------+--------------------+
511
+
512
+ Example
513
+ ~~~~~~~~~~~~~~~~~~
514
+
515
+ .. code-block:: yaml
516
+
517
+ out:
518
+ ...
519
+ encoders:
520
+ - type: bzip2
521
+ level: 6
522
+
523
+
451
524
  Rename filter plugin
452
525
  ------------------
453
526
 
@@ -24,9 +24,9 @@ For the smallest setup, you can unzip the package and run `./bin/elasticsearch`
24
24
 
25
25
  .. code-block:: console
26
26
 
27
- $ wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.4.4.zip
28
- $ unzip elasticsearch-1.4.4.zip
29
- $ cd elasticsearch-1.4.4
27
+ $ wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/zip/elasticsearch/2.2.0/elasticsearch-2.2.0.zip
28
+ $ unzip elasticsearch-2.2.0.zip
29
+ $ cd elasticsearch-2.2.0
30
30
  $ ./bin/elasticsearch
31
31
 
32
32
  Step 2. Download and unzip Kibana:
@@ -36,12 +36,12 @@ You can find releases from the `Kibana website <http://www.elasticsearch.org/ove
36
36
 
37
37
  .. code-block:: console
38
38
 
39
- $ wget https://download.elasticsearch.org/kibana/kibana/kibana-4.0.0-linux-x64.tar.gz
40
- $ tar zxvf kibana-4.0.0-linux-x64.tar.gz
41
- $ cd kibana-4.0.0-linux-x64
39
+ $ wget https://download.elastic.co/kibana/kibana/kibana-4.4.0-linux-x64.tar.gz
40
+ $ tar zxvf kibana-4.4.0-linux-x64.tar.gz
41
+ $ cd kibana-4.4.0-linux-x64
42
42
  $ ./bin/kibana
43
43
 
44
- Note: If you're using Mac OS X, https://download.elasticsearch.org/kibana/kibana/kibana-4.0.0-darwin-x64.tar.gz is the URL to download.
44
+ Note: If you're using Mac OS X, https://download.elastic.co/kibana/kibana/kibana-4.4.0-darwin-x64.tar.gz is the URL to download.
45
45
 
46
46
  Now Elasticsearch and Kibana started. Open http://localhost:5601/ using your browser to see the Kibana's graphical interface.
47
47
 
@@ -75,7 +75,7 @@ Loading a CSV file
75
75
 
76
76
  Assuming you have a CSV files at ``./mydata/csv/`` directory. If you don't have CSV files, you can create ones using ``embulk example ./mydata`` command.
77
77
 
78
- Create this configuration file and save as ``config.yml``:
78
+ Create this configuration file and save as ``seed.yml``:
79
79
 
80
80
  .. code-block:: yaml
81
81
 
@@ -93,9 +93,9 @@ In fact, this configuration lacks some important information. However, embulk gu
93
93
 
94
94
  .. code-block:: console
95
95
 
96
- $ embulk guess config.yml -o config-complete.yml
96
+ $ embulk guess ./mydata/seed.yml -o config.yml
97
97
 
98
- The generated config-complete.yml file should include complete information as following:
98
+ The generated config.yml file should include complete information as following:
99
99
 
100
100
  .. code-block:: yaml
101
101
 
@@ -137,24 +137,25 @@ Now, you can run the bulk loading:
137
137
 
138
138
  .. code-block:: console
139
139
 
140
- $ embulk run config-complete.yml -o next-config.yml
140
+ $ embulk run config.yml -c diff.yml
141
141
 
142
142
  Scheduling loading by cron
143
143
  ------------------
144
144
 
145
- At the last step, you ran embulk command with ``-o next-config.yml`` file. The ``next-config.yml`` file should include a parameter named ``last_path``:
145
+ At the last step, you ran embulk command with ``-c diff.yml`` file. The ``diff.yml`` file should include a parameter named ``last_path``:
146
146
 
147
147
  .. code-block:: yaml
148
148
 
149
- last_path: mydata/csv/sample_01.csv.gz
149
+ in: {last_path: mydata/csv/sample_01.csv.gz}
150
+ out: {}
150
151
 
151
152
  With this configuration, embulk loads the files newer than this file in alphabetical order.
152
153
 
153
- For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips ``sample_01.csv.gz`` file and loads ``sample_02.csv.gz`` only next time. And the next next-config.yml file has ``last_path: mydata/csv/sample_02.csv.gz`` for the next next execution.
154
+ For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips ``sample_01.csv.gz`` file and loads ``sample_02.csv.gz`` only next time. And the next ``diff.yml`` file has ``last_path: mydata/csv/sample_02.csv.gz`` for the next next execution.
154
155
 
155
156
  So, if you want to loads newly created files every day, you can setup this cron schedule:
156
157
 
157
158
  .. code-block:: cron
158
159
 
159
- 0 * * * * embulk run /path/to/next-config.yml -o /path/to/next-config.yml
160
+ 0 * * * * embulk run /path/to/config.yml -c /path/to/diff.yml
160
161
 
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.4
7
8
  release/release-0.8.3
8
9
  release/release-0.8.2
9
10
  release/release-0.8.1
@@ -0,0 +1,18 @@
1
+ Release 0.8.4
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Added ``bzip2`` encoder plugin, decoder plugin, and guess plugin.
8
+ * Fixed PageBuilder to close underlaying output plugin.
9
+ * Embulk::Runner accepts .yaml as well as .yml
10
+
11
+ Java API
12
+ ------------------
13
+
14
+ * Added JsonParser.Stream API for parser plugins to parse a stream of json objects.
15
+
16
+ Release Date
17
+ ------------------
18
+ 2016-02-16
@@ -1,5 +1,6 @@
1
1
  dependencies {
2
2
  compile project(':embulk-core')
3
+ compile 'org.apache.commons:commons-compress:1.10'
3
4
 
4
5
  testCompile project(':embulk-core').sourceSets.test.output
5
6
  }
@@ -0,0 +1,55 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.io.InputStream;
4
+ import java.io.IOException;
5
+ import org.embulk.config.Task;
6
+ import org.embulk.config.TaskSource;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.ConfigInject;
9
+ import org.embulk.spi.DecoderPlugin;
10
+ import org.embulk.spi.BufferAllocator;
11
+ import org.embulk.spi.FileInput;
12
+ import org.embulk.spi.util.FileInputInputStream;
13
+ import org.embulk.spi.util.InputStreamFileInput;
14
+ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
15
+
16
+ public class Bzip2FileDecoderPlugin
17
+ implements DecoderPlugin
18
+ {
19
+ public interface PluginTask
20
+ extends Task
21
+ {
22
+ @ConfigInject
23
+ BufferAllocator getBufferAllocator();
24
+ }
25
+
26
+ @Override
27
+ public void transaction(ConfigSource config, DecoderPlugin.Control control)
28
+ {
29
+ PluginTask task = config.loadConfig(PluginTask.class);
30
+ control.run(task.dump());
31
+ }
32
+
33
+ @Override
34
+ public FileInput open(TaskSource taskSource, FileInput fileInput)
35
+ {
36
+ PluginTask task = taskSource.loadTask(PluginTask.class);
37
+ final FileInputInputStream files = new FileInputInputStream(fileInput);
38
+ return new InputStreamFileInput(
39
+ task.getBufferAllocator(),
40
+ new InputStreamFileInput.Provider() {
41
+ public InputStream openNext() throws IOException
42
+ {
43
+ if (!files.nextFile()) {
44
+ return null;
45
+ }
46
+ return new BZip2CompressorInputStream(files, true);
47
+ }
48
+
49
+ public void close() throws IOException
50
+ {
51
+ files.close();
52
+ }
53
+ });
54
+ }
55
+ }
@@ -0,0 +1,67 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.io.OutputStream;
4
+ import java.io.IOException;
5
+ import javax.validation.constraints.Min;
6
+ import javax.validation.constraints.Max;
7
+ import org.embulk.config.Task;
8
+ import org.embulk.config.Config;
9
+ import org.embulk.config.ConfigInject;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.config.TaskSource;
12
+ import org.embulk.config.ConfigSource;
13
+ import org.embulk.spi.EncoderPlugin;
14
+ import org.embulk.spi.FileOutput;
15
+ import org.embulk.spi.BufferAllocator;
16
+ import org.embulk.spi.util.FileOutputOutputStream;
17
+ import org.embulk.spi.util.OutputStreamFileOutput;
18
+ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
19
+
20
+ public class Bzip2FileEncoderPlugin
21
+ implements EncoderPlugin
22
+ {
23
+ public interface PluginTask
24
+ extends Task
25
+ {
26
+ @Config("level")
27
+ @ConfigDefault("9")
28
+ @Min(1)
29
+ @Max(9)
30
+ int getLevel();
31
+
32
+ @ConfigInject
33
+ BufferAllocator getBufferAllocator();
34
+ }
35
+
36
+ public void transaction(ConfigSource config, EncoderPlugin.Control control)
37
+ {
38
+ PluginTask task = config.loadConfig(PluginTask.class);
39
+ control.run(task.dump());
40
+ }
41
+
42
+ @Override
43
+ public FileOutput open(TaskSource taskSource, final FileOutput fileOutput)
44
+ {
45
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
46
+
47
+ final FileOutputOutputStream output = new FileOutputOutputStream(fileOutput, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.FLUSH);
48
+
49
+ return new OutputStreamFileOutput(new OutputStreamFileOutput.Provider() {
50
+ public OutputStream openNext() throws IOException
51
+ {
52
+ output.nextFile();
53
+ return new BZip2CompressorOutputStream(output, task.getLevel());
54
+ }
55
+
56
+ public void finish() throws IOException
57
+ {
58
+ fileOutput.finish();
59
+ }
60
+
61
+ public void close() throws IOException
62
+ {
63
+ fileOutput.close();
64
+ }
65
+ });
66
+ }
67
+ }
@@ -30,6 +30,7 @@ public class StandardPluginModule
30
30
 
31
31
  // file decoder plugins
32
32
  registerPluginTo(binder, DecoderPlugin.class, "gzip", GzipFileDecoderPlugin.class);
33
+ registerPluginTo(binder, DecoderPlugin.class, "bzip2", Bzip2FileDecoderPlugin.class);
33
34
 
34
35
  // output plugins
35
36
  registerPluginTo(binder, OutputPlugin.class, "file", LocalFileOutputPlugin.class);
@@ -41,12 +42,14 @@ public class StandardPluginModule
41
42
 
42
43
  // file encoder plugins
43
44
  registerPluginTo(binder, EncoderPlugin.class, "gzip", GzipFileEncoderPlugin.class);
45
+ registerPluginTo(binder, EncoderPlugin.class, "bzip2", Bzip2FileEncoderPlugin.class);
44
46
 
45
47
  // filter plugins
46
48
  registerPluginTo(binder, FilterPlugin.class, "rename", RenameFilterPlugin.class);
47
49
 
48
50
  // default guess plugins
49
51
  registerDefaultGuessPluginTo(binder, new PluginType("gzip"));
52
+ registerDefaultGuessPluginTo(binder, new PluginType("bzip2"));
50
53
  registerDefaultGuessPluginTo(binder, new PluginType("csv"));
51
54
  // charset and newline guess plugins are loaded and invoked by CsvGuessPlugin
52
55
  }
@@ -0,0 +1,23 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ class Bzip2GuessPlugin < GuessPlugin
5
+ Plugin.register_guess('bzip2', self)
6
+
7
+ # magic: BZ
8
+ # version: 'h' = bzip2
9
+ # blocksize: 1 .. 9
10
+ # block magic: 0x314159265359 (6 bytes)
11
+ block_magic = [0x31, 0x41, 0x59, 0x26, 0x53, 0x59].pack('C*')
12
+ BZIP2_HEADER_PATTERN = /BZh[1-9]#{Regexp.quote(block_magic)}/n
13
+
14
+ def guess(config, sample_buffer)
15
+ if sample_buffer[0,10] =~ BZIP2_HEADER_PATTERN
16
+ return {"decoders" => [{"type" => "bzip2"}]}
17
+ end
18
+ return {}
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -122,12 +122,12 @@ module Embulk
122
122
  case config
123
123
  when String
124
124
  case config
125
- when /\.yml\.liquid$/
125
+ when /\.ya?ml\.liquid$/
126
126
  require 'liquid'
127
127
  template_params = options[:template_params] || {}
128
128
  template_include_path = File.expand_path(options[:template_include_path] || File.dirname(config)) unless options[:template_include_path] == false
129
129
  @embed.newConfigLoader.fromYamlString run_liquid(File.read(config), template_params, template_include_path)
130
- when /\.yml$/
130
+ when /\.ya?ml$/
131
131
  @embed.newConfigLoader.fromYamlString File.read(config)
132
132
  else
133
133
  raise ConfigError.new("Unsupported file extension. Supported file extensions are .yml and .yml.liquid: #{config}")
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.8.3'
2
+ VERSION = '0.8.4'
3
3
  end
metadata CHANGED
@@ -1,127 +1,127 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.8.4
5
5
  platform: java
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 1.10.6
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ">="
23
17
  - !ruby/object:Gem::Version
24
18
  version: 1.10.6
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :runtime
27
- - !ruby/object:Gem::Dependency
28
- name: msgpack
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - "~>"
24
+ - - ">="
32
25
  - !ruby/object:Gem::Version
33
- version: 0.7.3
26
+ version: 1.10.6
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - "~>"
37
31
  - !ruby/object:Gem::Version
38
32
  version: 0.7.3
33
+ name: msgpack
39
34
  prerelease: false
40
35
  type: :runtime
41
- - !ruby/object:Gem::Dependency
42
- name: liquid
43
36
  version_requirements: !ruby/object:Gem::Requirement
44
37
  requirements:
45
38
  - - "~>"
46
39
  - !ruby/object:Gem::Version
47
- version: 3.0.6
40
+ version: 0.7.3
41
+ - !ruby/object:Gem::Dependency
48
42
  requirement: !ruby/object:Gem::Requirement
49
43
  requirements:
50
44
  - - "~>"
51
45
  - !ruby/object:Gem::Version
52
46
  version: 3.0.6
47
+ name: liquid
53
48
  prerelease: false
54
49
  type: :runtime
55
- - !ruby/object:Gem::Dependency
56
- name: rjack-icu
57
50
  version_requirements: !ruby/object:Gem::Requirement
58
51
  requirements:
59
52
  - - "~>"
60
53
  - !ruby/object:Gem::Version
61
- version: 4.54.1.1
54
+ version: 3.0.6
55
+ - !ruby/object:Gem::Dependency
62
56
  requirement: !ruby/object:Gem::Requirement
63
57
  requirements:
64
58
  - - "~>"
65
59
  - !ruby/object:Gem::Version
66
60
  version: 4.54.1.1
61
+ name: rjack-icu
67
62
  prerelease: false
68
63
  type: :runtime
69
- - !ruby/object:Gem::Dependency
70
- name: rake
71
64
  version_requirements: !ruby/object:Gem::Requirement
72
65
  requirements:
73
- - - ">="
66
+ - - "~>"
74
67
  - !ruby/object:Gem::Version
75
- version: 0.10.0
68
+ version: 4.54.1.1
69
+ - !ruby/object:Gem::Dependency
76
70
  requirement: !ruby/object:Gem::Requirement
77
71
  requirements:
78
72
  - - ">="
79
73
  - !ruby/object:Gem::Version
80
74
  version: 0.10.0
75
+ name: rake
81
76
  prerelease: false
82
77
  type: :development
83
- - !ruby/object:Gem::Dependency
84
- name: test-unit
85
78
  version_requirements: !ruby/object:Gem::Requirement
86
79
  requirements:
87
- - - "~>"
80
+ - - ">="
88
81
  - !ruby/object:Gem::Version
89
- version: 3.0.9
82
+ version: 0.10.0
83
+ - !ruby/object:Gem::Dependency
90
84
  requirement: !ruby/object:Gem::Requirement
91
85
  requirements:
92
86
  - - "~>"
93
87
  - !ruby/object:Gem::Version
94
88
  version: 3.0.9
89
+ name: test-unit
95
90
  prerelease: false
96
91
  type: :development
97
- - !ruby/object:Gem::Dependency
98
- name: yard
99
92
  version_requirements: !ruby/object:Gem::Requirement
100
93
  requirements:
101
94
  - - "~>"
102
95
  - !ruby/object:Gem::Version
103
- version: 0.8.7
96
+ version: 3.0.9
97
+ - !ruby/object:Gem::Dependency
104
98
  requirement: !ruby/object:Gem::Requirement
105
99
  requirements:
106
100
  - - "~>"
107
101
  - !ruby/object:Gem::Version
108
102
  version: 0.8.7
103
+ name: yard
109
104
  prerelease: false
110
105
  type: :development
111
- - !ruby/object:Gem::Dependency
112
- name: kramdown
113
106
  version_requirements: !ruby/object:Gem::Requirement
114
107
  requirements:
115
108
  - - "~>"
116
109
  - !ruby/object:Gem::Version
117
- version: 1.5.0
110
+ version: 0.8.7
111
+ - !ruby/object:Gem::Dependency
118
112
  requirement: !ruby/object:Gem::Requirement
119
113
  requirements:
120
114
  - - "~>"
121
115
  - !ruby/object:Gem::Version
122
116
  version: 1.5.0
117
+ name: kramdown
123
118
  prerelease: false
124
119
  type: :development
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 1.5.0
125
125
  description: Embulk is an open-source, plugin-based bulk data loader to scale and simplify data management across heterogeneous data stores. It can collect and ship any kinds of data in high throughput with transaction control.
126
126
  email:
127
127
  - frsyuki@gmail.com
@@ -146,9 +146,10 @@ files:
146
146
  - classpath/bval-core-0.5.jar
147
147
  - classpath/bval-jsr303-0.5.jar
148
148
  - classpath/commons-beanutils-core-1.8.3.jar
149
+ - classpath/commons-compress-1.10.jar
149
150
  - classpath/commons-lang3-3.1.jar
150
- - classpath/embulk-core-0.8.3.jar
151
- - classpath/embulk-standards-0.8.3.jar
151
+ - classpath/embulk-core-0.8.4.jar
152
+ - classpath/embulk-standards-0.8.4.jar
152
153
  - classpath/guava-18.0.jar
153
154
  - classpath/guice-4.0.jar
154
155
  - classpath/guice-bootstrap-0.1.1.jar
@@ -462,7 +463,10 @@ files:
462
463
  - embulk-docs/src/release/release-0.8.1.rst
463
464
  - embulk-docs/src/release/release-0.8.2.rst
464
465
  - embulk-docs/src/release/release-0.8.3.rst
466
+ - embulk-docs/src/release/release-0.8.4.rst
465
467
  - embulk-standards/build.gradle
468
+ - embulk-standards/src/main/java/org/embulk/standards/Bzip2FileDecoderPlugin.java
469
+ - embulk-standards/src/main/java/org/embulk/standards/Bzip2FileEncoderPlugin.java
466
470
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
467
471
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
468
472
  - embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java
@@ -547,6 +551,7 @@ files:
547
551
  - lib/embulk/file_output_plugin.rb
548
552
  - lib/embulk/filter_plugin.rb
549
553
  - lib/embulk/formatter_plugin.rb
554
+ - lib/embulk/guess/bzip2.rb
550
555
  - lib/embulk/guess/charset.rb
551
556
  - lib/embulk/guess/csv.rb
552
557
  - lib/embulk/guess/gzip.rb