embulk 0.8.3 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4548bdb9a7472a0a1f86ef346f85a0229a433111
4
- data.tar.gz: bc5e863271a2fb217584a76618a7c9eac6706c05
3
+ metadata.gz: 671d7e093dbc15c2d87cf48cc69d6c46db01906f
4
+ data.tar.gz: 6fea3fc181559ca1821ca48f462cd2944994b741
5
5
  SHA512:
6
- metadata.gz: b47331b6a147ac624727a0e3da414c8e95cc7d74a022bdd174426572386349ecd76c2ef38ad8bdb3bf1e51e6c58967a87e4e67bb7db4f498bc4143f39799f6ee
7
- data.tar.gz: 98ce9c0581f425d516d057124ee1ea435726a1447e37e8416f36635801cca3fef32d50f719cbe1e95803e7229fd950b835290dace22cbcdb6f87b3327c332cd7
6
+ metadata.gz: b4c6d18798c4cdc272348c5febdcc875478a2d21165fe37fd9b5c1f55fdd3c97b57aa8260f840cd7e213c11cb65ec1768c6df9c1413f16ea25435b2a50b2bce6
7
+ data.tar.gz: cee70f19578b92763838f3f404af6cd562aa566ab31548c0d9197c54e27f74bd11fd7ae49c2ed0ed94dd76e25ae20bb7add76e595f20cea0a76995e77ffe895b
@@ -16,7 +16,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.8.3'
19
+ version = '0.8.4'
20
20
 
21
21
  ext {
22
22
  jrubyVersion = '9.0.4.0'
@@ -252,6 +252,7 @@ public class PageBuilder
252
252
  buffer = null;
253
253
  bufferSlice = null;
254
254
  }
255
+ output.close();
255
256
  }
256
257
 
257
258
  /* TODO for variable-length types
@@ -4,6 +4,8 @@ import java.util.List;
4
4
  import java.util.ArrayList;
5
5
  import java.util.Map;
6
6
  import java.util.HashMap;
7
+ import java.io.InputStream;
8
+ import java.io.Closeable;
7
9
  import java.io.IOException;
8
10
  import org.msgpack.value.Value;
9
11
  import org.msgpack.value.ValueFactory;
@@ -13,6 +15,14 @@ import com.fasterxml.jackson.core.JsonToken;
13
15
 
14
16
  public class JsonParser
15
17
  {
18
+ public interface Stream
19
+ extends Closeable
20
+ {
21
+ Value next() throws IOException;
22
+
23
+ void close() throws IOException;
24
+ }
25
+
16
26
  private final JsonFactory factory;
17
27
 
18
28
  public JsonParser()
@@ -21,38 +31,136 @@ public class JsonParser
21
31
  factory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
22
32
  }
23
33
 
34
+ public Stream open(InputStream in) throws IOException
35
+ {
36
+ return new StreamParseContext(factory, in);
37
+ }
38
+
24
39
  public Value parse(String json)
25
40
  {
26
- return new ParseContext(json).parse();
41
+ return new SingleParseContext(factory, json).parse();
42
+ }
43
+
44
+ private static String sampleJsonString(String json)
45
+ {
46
+ if (json.length() < 100) {
47
+ return json;
48
+ }
49
+ else {
50
+ return json.substring(0, 97) + "...";
51
+ }
52
+ }
53
+
54
+ private static class StreamParseContext
55
+ extends AbstractParseContext
56
+ implements Stream
57
+ {
58
+ public StreamParseContext(JsonFactory factory, InputStream in)
59
+ throws IOException, JsonParseException
60
+ {
61
+ super(createParser(factory, in));
62
+ }
63
+
64
+ private static com.fasterxml.jackson.core.JsonParser createParser(JsonFactory factory, InputStream in)
65
+ throws IOException
66
+ {
67
+ try {
68
+ return factory.createParser(in);
69
+ }
70
+ catch (IOException ex) {
71
+ throw ex;
72
+ }
73
+ catch (Exception ex) {
74
+ throw new JsonParseException("Failed to parse JSON", ex);
75
+ }
76
+ }
77
+
78
+ @Override
79
+ public void close() throws IOException
80
+ {
81
+ parser.close();
82
+ }
83
+
84
+ @Override
85
+ protected String sampleJsonString()
86
+ {
87
+ return "in";
88
+ }
27
89
  }
28
90
 
29
- private class ParseContext
91
+ private static class SingleParseContext
92
+ extends AbstractParseContext
30
93
  {
31
94
  private final String json;
32
- private final com.fasterxml.jackson.core.JsonParser parser;
33
95
 
34
- public ParseContext(String json)
96
+ public SingleParseContext(JsonFactory factory, String json)
35
97
  {
98
+ super(createParser(factory, json));
36
99
  this.json = json;
100
+ }
101
+
102
+ private static com.fasterxml.jackson.core.JsonParser createParser(JsonFactory factory, String json)
103
+ {
37
104
  try {
38
- this.parser = factory.createParser(json);
105
+ return factory.createParser(json);
39
106
  }
40
107
  catch (Exception ex) {
41
- throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
108
+ throw new JsonParseException("Failed to parse JSON: "+JsonParser.sampleJsonString(json), ex);
42
109
  }
43
110
  }
44
111
 
45
112
  public Value parse()
113
+ {
114
+ try {
115
+ Value v = next();
116
+ if (v == null) {
117
+ throw new JsonParseException("Unable to parse empty string");
118
+ }
119
+ return v;
120
+ }
121
+ catch (IOException ex) {
122
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
123
+ }
124
+ }
125
+
126
+ @Override
127
+ protected String sampleJsonString()
128
+ {
129
+ return JsonParser.sampleJsonString(json);
130
+ }
131
+ }
132
+
133
+ private static abstract class AbstractParseContext
134
+ {
135
+ protected final com.fasterxml.jackson.core.JsonParser parser;
136
+
137
+ public AbstractParseContext(com.fasterxml.jackson.core.JsonParser parser)
138
+ {
139
+ this.parser = parser;
140
+ }
141
+
142
+ protected abstract String sampleJsonString();
143
+
144
+ public Value next() throws IOException
46
145
  {
47
146
  try {
48
147
  JsonToken token = parser.nextToken();
148
+ if (token == null) {
149
+ return null;
150
+ }
49
151
  return jsonTokenToValue(token);
50
152
  }
153
+ catch (com.fasterxml.jackson.core.JsonParseException ex) {
154
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
155
+ }
156
+ catch (IOException ex) {
157
+ throw ex;
158
+ }
51
159
  catch (JsonParseException ex) {
52
160
  throw ex;
53
161
  }
54
- catch (Exception ex) {
55
- throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
162
+ catch (RuntimeException ex) {
163
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
56
164
  }
57
165
  }
58
166
 
@@ -81,9 +189,12 @@ public class JsonParser
81
189
  List<Value> list = new ArrayList<>();
82
190
  while (true) {
83
191
  token = parser.nextToken();
84
- if(token == JsonToken.END_ARRAY) {
192
+ if (token == JsonToken.END_ARRAY) {
85
193
  return ValueFactory.newArray(list);
86
194
  }
195
+ else if (token == null) {
196
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting an element of an array: " + sampleJsonString());
197
+ }
87
198
  list.add(jsonTokenToValue(token));
88
199
  }
89
200
  }
@@ -94,11 +205,17 @@ public class JsonParser
94
205
  if (token == JsonToken.END_OBJECT) {
95
206
  return ValueFactory.newMap(map);
96
207
  }
208
+ else if (token == null) {
209
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting a key of object: " + sampleJsonString());
210
+ }
97
211
  String key = parser.getCurrentName();
98
212
  if (key == null) {
99
- throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
213
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation() + ": " + sampleJsonString());
100
214
  }
101
215
  token = parser.nextToken();
216
+ if (token == null) {
217
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting a value of object: " + sampleJsonString());
218
+ }
102
219
  Value value = jsonTokenToValue(token);
103
220
  map.put(ValueFactory.newString(key), value);
104
221
  }
@@ -108,18 +225,8 @@ public class JsonParser
108
225
  case END_OBJECT:
109
226
  case NOT_AVAILABLE:
110
227
  default:
111
- throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
228
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation() + ": " + sampleJsonString());
112
229
  }
113
230
  }
114
231
  }
115
-
116
- private static String sampleJsonString(String json)
117
- {
118
- if (json.length() < 100) {
119
- return json;
120
- }
121
- else {
122
- return json.substring(0, 97) + "...";
123
- }
124
- }
125
232
  }
@@ -44,13 +44,13 @@ A configuration file consists of following sections:
44
44
 
45
45
  * **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
46
46
 
47
- * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
47
+ * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, bzip2, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
48
48
 
49
49
  * **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
50
50
 
51
51
  * **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
52
52
 
53
- * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
53
+ * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip or bzip2)
54
54
 
55
55
  * **filters:** Filter plugins options (optional).
56
56
 
@@ -298,6 +298,27 @@ Example
298
298
  - {type: gzip}
299
299
 
300
300
 
301
+ BZip2 decoder plugin
302
+ ------------------
303
+
304
+ The ``bzip2`` decoder plugin decompresses bzip2 files before input plugins read them.
305
+
306
+ Options
307
+ ~~~~~~~~~~~~~~~~~~
308
+
309
+ This plugin doesn't have any options.
310
+
311
+ Example
312
+ ~~~~~~~~~~~~~~~~~~
313
+
314
+ .. code-block:: yaml
315
+
316
+ in:
317
+ ...
318
+ decoders:
319
+ - {type: bzip2}
320
+
321
+
301
322
  File output plugin
302
323
  ------------------
303
324
 
@@ -448,6 +469,58 @@ Example
448
469
  - type: gzip
449
470
  level: 1
450
471
 
472
+
473
+ Gzip encoder plugin
474
+ ------------------
475
+
476
+ The ``gzip`` encoder plugin compresses output files using gzip.
477
+
478
+ Options
479
+ ~~~~~~~~~~~~~~~~~~
480
+
481
+ +---------+----------+----------------------------------------------------------------------+--------------------+
482
+ | name | type | description | required? |
483
+ +=========+==========+======================================================================+====================+
484
+ | level | integer | Compression level. From 0 (no compression) to 9 (best compression). | ``6`` by default |
485
+ +---------+----------+----------------------------------------------------------------------+--------------------+
486
+
487
+ Example
488
+ ~~~~~~~~~~~~~~~~~~
489
+
490
+ .. code-block:: yaml
491
+
492
+ out:
493
+ ...
494
+ encoders:
495
+ - type: gzip
496
+ level: 1
497
+
498
+ BZip2 encoder plugin
499
+ ------------------
500
+
501
+ The ``bzip2`` encoder plugin compresses output files using bzip2.
502
+
503
+ Options
504
+ ~~~~~~~~~~~~~~~~~~
505
+
506
+ +---------+----------+----------------------------------------------------------------------+--------------------+
507
+ | name | type | description | required? |
508
+ +=========+==========+======================================================================+====================+
509
+ | level | integer | Compression level. From 1 to 9 (best compression). | ``9`` by default |
510
+ +---------+----------+----------------------------------------------------------------------+--------------------+
511
+
512
+ Example
513
+ ~~~~~~~~~~~~~~~~~~
514
+
515
+ .. code-block:: yaml
516
+
517
+ out:
518
+ ...
519
+ encoders:
520
+ - type: bzip2
521
+ level: 6
522
+
523
+
451
524
  Rename filter plugin
452
525
  ------------------
453
526
 
@@ -24,9 +24,9 @@ For the smallest setup, you can unzip the package and run `./bin/elasticsearch`
24
24
 
25
25
  .. code-block:: console
26
26
 
27
- $ wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.4.4.zip
28
- $ unzip elasticsearch-1.4.4.zip
29
- $ cd elasticsearch-1.4.4
27
+ $ wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/zip/elasticsearch/2.2.0/elasticsearch-2.2.0.zip
28
+ $ unzip elasticsearch-2.2.0.zip
29
+ $ cd elasticsearch-2.2.0
30
30
  $ ./bin/elasticsearch
31
31
 
32
32
  Step 2. Download and unzip Kibana:
@@ -36,12 +36,12 @@ You can find releases from the `Kibana website <http://www.elasticsearch.org/ove
36
36
 
37
37
  .. code-block:: console
38
38
 
39
- $ wget https://download.elasticsearch.org/kibana/kibana/kibana-4.0.0-linux-x64.tar.gz
40
- $ tar zxvf kibana-4.0.0-linux-x64.tar.gz
41
- $ cd kibana-4.0.0-linux-x64
39
+ $ wget https://download.elastic.co/kibana/kibana/kibana-4.4.0-linux-x64.tar.gz
40
+ $ tar zxvf kibana-4.4.0-linux-x64.tar.gz
41
+ $ cd kibana-4.4.0-linux-x64
42
42
  $ ./bin/kibana
43
43
 
44
- Note: If you're using Mac OS X, https://download.elasticsearch.org/kibana/kibana/kibana-4.0.0-darwin-x64.tar.gz is the URL to download.
44
+ Note: If you're using Mac OS X, https://download.elastic.co/kibana/kibana/kibana-4.4.0-darwin-x64.tar.gz is the URL to download.
45
45
 
46
46
  Now Elasticsearch and Kibana started. Open http://localhost:5601/ using your browser to see the Kibana's graphical interface.
47
47
 
@@ -75,7 +75,7 @@ Loading a CSV file
75
75
 
76
76
  Assuming you have a CSV files at ``./mydata/csv/`` directory. If you don't have CSV files, you can create ones using ``embulk example ./mydata`` command.
77
77
 
78
- Create this configuration file and save as ``config.yml``:
78
+ Create this configuration file and save as ``seed.yml``:
79
79
 
80
80
  .. code-block:: yaml
81
81
 
@@ -93,9 +93,9 @@ In fact, this configuration lacks some important information. However, embulk gu
93
93
 
94
94
  .. code-block:: console
95
95
 
96
- $ embulk guess config.yml -o config-complete.yml
96
+ $ embulk guess ./mydata/seed.yml -o config.yml
97
97
 
98
- The generated config-complete.yml file should include complete information as following:
98
+ The generated config.yml file should include complete information as following:
99
99
 
100
100
  .. code-block:: yaml
101
101
 
@@ -137,24 +137,25 @@ Now, you can run the bulk loading:
137
137
 
138
138
  .. code-block:: console
139
139
 
140
- $ embulk run config-complete.yml -o next-config.yml
140
+ $ embulk run config.yml -c diff.yml
141
141
 
142
142
  Scheduling loading by cron
143
143
  ------------------
144
144
 
145
- At the last step, you ran embulk command with ``-o next-config.yml`` file. The ``next-config.yml`` file should include a parameter named ``last_path``:
145
+ At the last step, you ran embulk command with ``-c diff.yml`` file. The ``diff.yml`` file should include a parameter named ``last_path``:
146
146
 
147
147
  .. code-block:: yaml
148
148
 
149
- last_path: mydata/csv/sample_01.csv.gz
149
+ in: {last_path: mydata/csv/sample_01.csv.gz}
150
+ out: {}
150
151
 
151
152
  With this configuration, embulk loads the files newer than this file in alphabetical order.
152
153
 
153
- For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips ``sample_01.csv.gz`` file and loads ``sample_02.csv.gz`` only next time. And the next next-config.yml file has ``last_path: mydata/csv/sample_02.csv.gz`` for the next next execution.
154
+ For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips ``sample_01.csv.gz`` file and loads ``sample_02.csv.gz`` only next time. And the next ``diff.yml`` file has ``last_path: mydata/csv/sample_02.csv.gz`` for the next next execution.
154
155
 
155
156
  So, if you want to loads newly created files every day, you can setup this cron schedule:
156
157
 
157
158
  .. code-block:: cron
158
159
 
159
- 0 * * * * embulk run /path/to/next-config.yml -o /path/to/next-config.yml
160
+ 0 * * * * embulk run /path/to/config.yml -c /path/to/diff.yml
160
161
 
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.4
7
8
  release/release-0.8.3
8
9
  release/release-0.8.2
9
10
  release/release-0.8.1
@@ -0,0 +1,18 @@
1
+ Release 0.8.4
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Added ``bzip2`` encoder plugin, decoder plugin, and guess plugin.
8
+ * Fixed PageBuilder to close underlaying output plugin.
9
+ * Embulk::Runner accepts .yaml as well as .yml
10
+
11
+ Java API
12
+ ------------------
13
+
14
+ * Added JsonParser.Stream API for parser plugins to parse a stream of json objects.
15
+
16
+ Release Date
17
+ ------------------
18
+ 2016-02-16
@@ -1,5 +1,6 @@
1
1
  dependencies {
2
2
  compile project(':embulk-core')
3
+ compile 'org.apache.commons:commons-compress:1.10'
3
4
 
4
5
  testCompile project(':embulk-core').sourceSets.test.output
5
6
  }
@@ -0,0 +1,55 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.io.InputStream;
4
+ import java.io.IOException;
5
+ import org.embulk.config.Task;
6
+ import org.embulk.config.TaskSource;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.ConfigInject;
9
+ import org.embulk.spi.DecoderPlugin;
10
+ import org.embulk.spi.BufferAllocator;
11
+ import org.embulk.spi.FileInput;
12
+ import org.embulk.spi.util.FileInputInputStream;
13
+ import org.embulk.spi.util.InputStreamFileInput;
14
+ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
15
+
16
+ public class Bzip2FileDecoderPlugin
17
+ implements DecoderPlugin
18
+ {
19
+ public interface PluginTask
20
+ extends Task
21
+ {
22
+ @ConfigInject
23
+ BufferAllocator getBufferAllocator();
24
+ }
25
+
26
+ @Override
27
+ public void transaction(ConfigSource config, DecoderPlugin.Control control)
28
+ {
29
+ PluginTask task = config.loadConfig(PluginTask.class);
30
+ control.run(task.dump());
31
+ }
32
+
33
+ @Override
34
+ public FileInput open(TaskSource taskSource, FileInput fileInput)
35
+ {
36
+ PluginTask task = taskSource.loadTask(PluginTask.class);
37
+ final FileInputInputStream files = new FileInputInputStream(fileInput);
38
+ return new InputStreamFileInput(
39
+ task.getBufferAllocator(),
40
+ new InputStreamFileInput.Provider() {
41
+ public InputStream openNext() throws IOException
42
+ {
43
+ if (!files.nextFile()) {
44
+ return null;
45
+ }
46
+ return new BZip2CompressorInputStream(files, true);
47
+ }
48
+
49
+ public void close() throws IOException
50
+ {
51
+ files.close();
52
+ }
53
+ });
54
+ }
55
+ }
@@ -0,0 +1,67 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.io.OutputStream;
4
+ import java.io.IOException;
5
+ import javax.validation.constraints.Min;
6
+ import javax.validation.constraints.Max;
7
+ import org.embulk.config.Task;
8
+ import org.embulk.config.Config;
9
+ import org.embulk.config.ConfigInject;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.config.TaskSource;
12
+ import org.embulk.config.ConfigSource;
13
+ import org.embulk.spi.EncoderPlugin;
14
+ import org.embulk.spi.FileOutput;
15
+ import org.embulk.spi.BufferAllocator;
16
+ import org.embulk.spi.util.FileOutputOutputStream;
17
+ import org.embulk.spi.util.OutputStreamFileOutput;
18
+ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
19
+
20
+ public class Bzip2FileEncoderPlugin
21
+ implements EncoderPlugin
22
+ {
23
+ public interface PluginTask
24
+ extends Task
25
+ {
26
+ @Config("level")
27
+ @ConfigDefault("9")
28
+ @Min(1)
29
+ @Max(9)
30
+ int getLevel();
31
+
32
+ @ConfigInject
33
+ BufferAllocator getBufferAllocator();
34
+ }
35
+
36
+ public void transaction(ConfigSource config, EncoderPlugin.Control control)
37
+ {
38
+ PluginTask task = config.loadConfig(PluginTask.class);
39
+ control.run(task.dump());
40
+ }
41
+
42
+ @Override
43
+ public FileOutput open(TaskSource taskSource, final FileOutput fileOutput)
44
+ {
45
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
46
+
47
+ final FileOutputOutputStream output = new FileOutputOutputStream(fileOutput, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.FLUSH);
48
+
49
+ return new OutputStreamFileOutput(new OutputStreamFileOutput.Provider() {
50
+ public OutputStream openNext() throws IOException
51
+ {
52
+ output.nextFile();
53
+ return new BZip2CompressorOutputStream(output, task.getLevel());
54
+ }
55
+
56
+ public void finish() throws IOException
57
+ {
58
+ fileOutput.finish();
59
+ }
60
+
61
+ public void close() throws IOException
62
+ {
63
+ fileOutput.close();
64
+ }
65
+ });
66
+ }
67
+ }
@@ -30,6 +30,7 @@ public class StandardPluginModule
30
30
 
31
31
  // file decoder plugins
32
32
  registerPluginTo(binder, DecoderPlugin.class, "gzip", GzipFileDecoderPlugin.class);
33
+ registerPluginTo(binder, DecoderPlugin.class, "bzip2", Bzip2FileDecoderPlugin.class);
33
34
 
34
35
  // output plugins
35
36
  registerPluginTo(binder, OutputPlugin.class, "file", LocalFileOutputPlugin.class);
@@ -41,12 +42,14 @@ public class StandardPluginModule
41
42
 
42
43
  // file encoder plugins
43
44
  registerPluginTo(binder, EncoderPlugin.class, "gzip", GzipFileEncoderPlugin.class);
45
+ registerPluginTo(binder, EncoderPlugin.class, "bzip2", Bzip2FileEncoderPlugin.class);
44
46
 
45
47
  // filter plugins
46
48
  registerPluginTo(binder, FilterPlugin.class, "rename", RenameFilterPlugin.class);
47
49
 
48
50
  // default guess plugins
49
51
  registerDefaultGuessPluginTo(binder, new PluginType("gzip"));
52
+ registerDefaultGuessPluginTo(binder, new PluginType("bzip2"));
50
53
  registerDefaultGuessPluginTo(binder, new PluginType("csv"));
51
54
  // charset and newline guess plugins are loaded and invoked by CsvGuessPlugin
52
55
  }
@@ -0,0 +1,23 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ class Bzip2GuessPlugin < GuessPlugin
5
+ Plugin.register_guess('bzip2', self)
6
+
7
+ # magic: BZ
8
+ # version: 'h' = bzip2
9
+ # blocksize: 1 .. 9
10
+ # block magic: 0x314159265359 (6 bytes)
11
+ block_magic = [0x31, 0x41, 0x59, 0x26, 0x53, 0x59].pack('C*')
12
+ BZIP2_HEADER_PATTERN = /BZh[1-9]#{Regexp.quote(block_magic)}/n
13
+
14
+ def guess(config, sample_buffer)
15
+ if sample_buffer[0,10] =~ BZIP2_HEADER_PATTERN
16
+ return {"decoders" => [{"type" => "bzip2"}]}
17
+ end
18
+ return {}
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -122,12 +122,12 @@ module Embulk
122
122
  case config
123
123
  when String
124
124
  case config
125
- when /\.yml\.liquid$/
125
+ when /\.ya?ml\.liquid$/
126
126
  require 'liquid'
127
127
  template_params = options[:template_params] || {}
128
128
  template_include_path = File.expand_path(options[:template_include_path] || File.dirname(config)) unless options[:template_include_path] == false
129
129
  @embed.newConfigLoader.fromYamlString run_liquid(File.read(config), template_params, template_include_path)
130
- when /\.yml$/
130
+ when /\.ya?ml$/
131
131
  @embed.newConfigLoader.fromYamlString File.read(config)
132
132
  else
133
133
  raise ConfigError.new("Unsupported file extension. Supported file extensions are .yml and .yml.liquid: #{config}")
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.8.3'
2
+ VERSION = '0.8.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.8.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jruby-jars
@@ -106,9 +106,10 @@ files:
106
106
  - classpath/bval-core-0.5.jar
107
107
  - classpath/bval-jsr303-0.5.jar
108
108
  - classpath/commons-beanutils-core-1.8.3.jar
109
+ - classpath/commons-compress-1.10.jar
109
110
  - classpath/commons-lang3-3.1.jar
110
- - classpath/embulk-core-0.8.3.jar
111
- - classpath/embulk-standards-0.8.3.jar
111
+ - classpath/embulk-core-0.8.4.jar
112
+ - classpath/embulk-standards-0.8.4.jar
112
113
  - classpath/guava-18.0.jar
113
114
  - classpath/guice-4.0.jar
114
115
  - classpath/guice-bootstrap-0.1.1.jar
@@ -422,7 +423,10 @@ files:
422
423
  - embulk-docs/src/release/release-0.8.1.rst
423
424
  - embulk-docs/src/release/release-0.8.2.rst
424
425
  - embulk-docs/src/release/release-0.8.3.rst
426
+ - embulk-docs/src/release/release-0.8.4.rst
425
427
  - embulk-standards/build.gradle
428
+ - embulk-standards/src/main/java/org/embulk/standards/Bzip2FileDecoderPlugin.java
429
+ - embulk-standards/src/main/java/org/embulk/standards/Bzip2FileEncoderPlugin.java
426
430
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
427
431
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
428
432
  - embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java
@@ -507,6 +511,7 @@ files:
507
511
  - lib/embulk/file_output_plugin.rb
508
512
  - lib/embulk/filter_plugin.rb
509
513
  - lib/embulk/formatter_plugin.rb
514
+ - lib/embulk/guess/bzip2.rb
510
515
  - lib/embulk/guess/charset.rb
511
516
  - lib/embulk/guess/csv.rb
512
517
  - lib/embulk/guess/gzip.rb