embulk 0.8.3 → 0.8.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4548bdb9a7472a0a1f86ef346f85a0229a433111
4
- data.tar.gz: bc5e863271a2fb217584a76618a7c9eac6706c05
3
+ metadata.gz: 671d7e093dbc15c2d87cf48cc69d6c46db01906f
4
+ data.tar.gz: 6fea3fc181559ca1821ca48f462cd2944994b741
5
5
  SHA512:
6
- metadata.gz: b47331b6a147ac624727a0e3da414c8e95cc7d74a022bdd174426572386349ecd76c2ef38ad8bdb3bf1e51e6c58967a87e4e67bb7db4f498bc4143f39799f6ee
7
- data.tar.gz: 98ce9c0581f425d516d057124ee1ea435726a1447e37e8416f36635801cca3fef32d50f719cbe1e95803e7229fd950b835290dace22cbcdb6f87b3327c332cd7
6
+ metadata.gz: b4c6d18798c4cdc272348c5febdcc875478a2d21165fe37fd9b5c1f55fdd3c97b57aa8260f840cd7e213c11cb65ec1768c6df9c1413f16ea25435b2a50b2bce6
7
+ data.tar.gz: cee70f19578b92763838f3f404af6cd562aa566ab31548c0d9197c54e27f74bd11fd7ae49c2ed0ed94dd76e25ae20bb7add76e595f20cea0a76995e77ffe895b
@@ -16,7 +16,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.8.3'
19
+ version = '0.8.4'
20
20
 
21
21
  ext {
22
22
  jrubyVersion = '9.0.4.0'
@@ -252,6 +252,7 @@ public class PageBuilder
252
252
  buffer = null;
253
253
  bufferSlice = null;
254
254
  }
255
+ output.close();
255
256
  }
256
257
 
257
258
  /* TODO for variable-length types
@@ -4,6 +4,8 @@ import java.util.List;
4
4
  import java.util.ArrayList;
5
5
  import java.util.Map;
6
6
  import java.util.HashMap;
7
+ import java.io.InputStream;
8
+ import java.io.Closeable;
7
9
  import java.io.IOException;
8
10
  import org.msgpack.value.Value;
9
11
  import org.msgpack.value.ValueFactory;
@@ -13,6 +15,14 @@ import com.fasterxml.jackson.core.JsonToken;
13
15
 
14
16
  public class JsonParser
15
17
  {
18
+ public interface Stream
19
+ extends Closeable
20
+ {
21
+ Value next() throws IOException;
22
+
23
+ void close() throws IOException;
24
+ }
25
+
16
26
  private final JsonFactory factory;
17
27
 
18
28
  public JsonParser()
@@ -21,38 +31,136 @@ public class JsonParser
21
31
  factory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
22
32
  }
23
33
 
34
+ public Stream open(InputStream in) throws IOException
35
+ {
36
+ return new StreamParseContext(factory, in);
37
+ }
38
+
24
39
  public Value parse(String json)
25
40
  {
26
- return new ParseContext(json).parse();
41
+ return new SingleParseContext(factory, json).parse();
42
+ }
43
+
44
+ private static String sampleJsonString(String json)
45
+ {
46
+ if (json.length() < 100) {
47
+ return json;
48
+ }
49
+ else {
50
+ return json.substring(0, 97) + "...";
51
+ }
52
+ }
53
+
54
+ private static class StreamParseContext
55
+ extends AbstractParseContext
56
+ implements Stream
57
+ {
58
+ public StreamParseContext(JsonFactory factory, InputStream in)
59
+ throws IOException, JsonParseException
60
+ {
61
+ super(createParser(factory, in));
62
+ }
63
+
64
+ private static com.fasterxml.jackson.core.JsonParser createParser(JsonFactory factory, InputStream in)
65
+ throws IOException
66
+ {
67
+ try {
68
+ return factory.createParser(in);
69
+ }
70
+ catch (IOException ex) {
71
+ throw ex;
72
+ }
73
+ catch (Exception ex) {
74
+ throw new JsonParseException("Failed to parse JSON", ex);
75
+ }
76
+ }
77
+
78
+ @Override
79
+ public void close() throws IOException
80
+ {
81
+ parser.close();
82
+ }
83
+
84
+ @Override
85
+ protected String sampleJsonString()
86
+ {
87
+ return "in";
88
+ }
27
89
  }
28
90
 
29
- private class ParseContext
91
+ private static class SingleParseContext
92
+ extends AbstractParseContext
30
93
  {
31
94
  private final String json;
32
- private final com.fasterxml.jackson.core.JsonParser parser;
33
95
 
34
- public ParseContext(String json)
96
+ public SingleParseContext(JsonFactory factory, String json)
35
97
  {
98
+ super(createParser(factory, json));
36
99
  this.json = json;
100
+ }
101
+
102
+ private static com.fasterxml.jackson.core.JsonParser createParser(JsonFactory factory, String json)
103
+ {
37
104
  try {
38
- this.parser = factory.createParser(json);
105
+ return factory.createParser(json);
39
106
  }
40
107
  catch (Exception ex) {
41
- throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
108
+ throw new JsonParseException("Failed to parse JSON: "+JsonParser.sampleJsonString(json), ex);
42
109
  }
43
110
  }
44
111
 
45
112
  public Value parse()
113
+ {
114
+ try {
115
+ Value v = next();
116
+ if (v == null) {
117
+ throw new JsonParseException("Unable to parse empty string");
118
+ }
119
+ return v;
120
+ }
121
+ catch (IOException ex) {
122
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
123
+ }
124
+ }
125
+
126
+ @Override
127
+ protected String sampleJsonString()
128
+ {
129
+ return JsonParser.sampleJsonString(json);
130
+ }
131
+ }
132
+
133
+ private static abstract class AbstractParseContext
134
+ {
135
+ protected final com.fasterxml.jackson.core.JsonParser parser;
136
+
137
+ public AbstractParseContext(com.fasterxml.jackson.core.JsonParser parser)
138
+ {
139
+ this.parser = parser;
140
+ }
141
+
142
+ protected abstract String sampleJsonString();
143
+
144
+ public Value next() throws IOException
46
145
  {
47
146
  try {
48
147
  JsonToken token = parser.nextToken();
148
+ if (token == null) {
149
+ return null;
150
+ }
49
151
  return jsonTokenToValue(token);
50
152
  }
153
+ catch (com.fasterxml.jackson.core.JsonParseException ex) {
154
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
155
+ }
156
+ catch (IOException ex) {
157
+ throw ex;
158
+ }
51
159
  catch (JsonParseException ex) {
52
160
  throw ex;
53
161
  }
54
- catch (Exception ex) {
55
- throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
162
+ catch (RuntimeException ex) {
163
+ throw new JsonParseException("Failed to parse JSON: "+sampleJsonString(), ex);
56
164
  }
57
165
  }
58
166
 
@@ -81,9 +189,12 @@ public class JsonParser
81
189
  List<Value> list = new ArrayList<>();
82
190
  while (true) {
83
191
  token = parser.nextToken();
84
- if(token == JsonToken.END_ARRAY) {
192
+ if (token == JsonToken.END_ARRAY) {
85
193
  return ValueFactory.newArray(list);
86
194
  }
195
+ else if (token == null) {
196
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting an element of an array: " + sampleJsonString());
197
+ }
87
198
  list.add(jsonTokenToValue(token));
88
199
  }
89
200
  }
@@ -94,11 +205,17 @@ public class JsonParser
94
205
  if (token == JsonToken.END_OBJECT) {
95
206
  return ValueFactory.newMap(map);
96
207
  }
208
+ else if (token == null) {
209
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting a key of object: " + sampleJsonString());
210
+ }
97
211
  String key = parser.getCurrentName();
98
212
  if (key == null) {
99
- throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
213
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation() + ": " + sampleJsonString());
100
214
  }
101
215
  token = parser.nextToken();
216
+ if (token == null) {
217
+ throw new JsonParseException("Unexpected end of JSON at "+parser.getTokenLocation() + " while expecting a value of object: " + sampleJsonString());
218
+ }
102
219
  Value value = jsonTokenToValue(token);
103
220
  map.put(ValueFactory.newString(key), value);
104
221
  }
@@ -108,18 +225,8 @@ public class JsonParser
108
225
  case END_OBJECT:
109
226
  case NOT_AVAILABLE:
110
227
  default:
111
- throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
228
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation() + ": " + sampleJsonString());
112
229
  }
113
230
  }
114
231
  }
115
-
116
- private static String sampleJsonString(String json)
117
- {
118
- if (json.length() < 100) {
119
- return json;
120
- }
121
- else {
122
- return json.substring(0, 97) + "...";
123
- }
124
- }
125
232
  }
@@ -44,13 +44,13 @@ A configuration file consists of following sections:
44
44
 
45
45
  * **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
46
46
 
47
- * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
47
+ * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, bzip2, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
48
48
 
49
49
  * **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
50
50
 
51
51
  * **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
52
52
 
53
- * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
53
+ * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip or bzip2)
54
54
 
55
55
  * **filters:** Filter plugins options (optional).
56
56
 
@@ -298,6 +298,27 @@ Example
298
298
  - {type: gzip}
299
299
 
300
300
 
301
+ BZip2 decoder plugin
302
+ ------------------
303
+
304
+ The ``bzip2`` decoder plugin decompresses bzip2 files before input plugins read them.
305
+
306
+ Options
307
+ ~~~~~~~~~~~~~~~~~~
308
+
309
+ This plugin doesn't have any options.
310
+
311
+ Example
312
+ ~~~~~~~~~~~~~~~~~~
313
+
314
+ .. code-block:: yaml
315
+
316
+ in:
317
+ ...
318
+ decoders:
319
+ - {type: bzip2}
320
+
321
+
301
322
  File output plugin
302
323
  ------------------
303
324
 
@@ -448,6 +469,58 @@ Example
448
469
  - type: gzip
449
470
  level: 1
450
471
 
472
+
473
+ Gzip encoder plugin
474
+ ------------------
475
+
476
+ The ``gzip`` encoder plugin compresses output files using gzip.
477
+
478
+ Options
479
+ ~~~~~~~~~~~~~~~~~~
480
+
481
+ +---------+----------+----------------------------------------------------------------------+--------------------+
482
+ | name | type | description | required? |
483
+ +=========+==========+======================================================================+====================+
484
+ | level | integer | Compression level. From 0 (no compression) to 9 (best compression). | ``6`` by default |
485
+ +---------+----------+----------------------------------------------------------------------+--------------------+
486
+
487
+ Example
488
+ ~~~~~~~~~~~~~~~~~~
489
+
490
+ .. code-block:: yaml
491
+
492
+ out:
493
+ ...
494
+ encoders:
495
+ - type: gzip
496
+ level: 1
497
+
498
+ BZip2 encoder plugin
499
+ ------------------
500
+
501
+ The ``bzip2`` encoder plugin compresses output files using bzip2.
502
+
503
+ Options
504
+ ~~~~~~~~~~~~~~~~~~
505
+
506
+ +---------+----------+----------------------------------------------------------------------+--------------------+
507
+ | name | type | description | required? |
508
+ +=========+==========+======================================================================+====================+
509
+ | level | integer | Compression level. From 1 to 9 (best compression). | ``9`` by default |
510
+ +---------+----------+----------------------------------------------------------------------+--------------------+
511
+
512
+ Example
513
+ ~~~~~~~~~~~~~~~~~~
514
+
515
+ .. code-block:: yaml
516
+
517
+ out:
518
+ ...
519
+ encoders:
520
+ - type: bzip2
521
+ level: 6
522
+
523
+
451
524
  Rename filter plugin
452
525
  ------------------
453
526
 
@@ -24,9 +24,9 @@ For the smallest setup, you can unzip the package and run `./bin/elasticsearch`
24
24
 
25
25
  .. code-block:: console
26
26
 
27
- $ wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.4.4.zip
28
- $ unzip elasticsearch-1.4.4.zip
29
- $ cd elasticsearch-1.4.4
27
+ $ wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/zip/elasticsearch/2.2.0/elasticsearch-2.2.0.zip
28
+ $ unzip elasticsearch-2.2.0.zip
29
+ $ cd elasticsearch-2.2.0
30
30
  $ ./bin/elasticsearch
31
31
 
32
32
  Step 2. Download and unzip Kibana:
@@ -36,12 +36,12 @@ You can find releases from the `Kibana website <http://www.elasticsearch.org/ove
36
36
 
37
37
  .. code-block:: console
38
38
 
39
- $ wget https://download.elasticsearch.org/kibana/kibana/kibana-4.0.0-linux-x64.tar.gz
40
- $ tar zxvf kibana-4.0.0-linux-x64.tar.gz
41
- $ cd kibana-4.0.0-linux-x64
39
+ $ wget https://download.elastic.co/kibana/kibana/kibana-4.4.0-linux-x64.tar.gz
40
+ $ tar zxvf kibana-4.4.0-linux-x64.tar.gz
41
+ $ cd kibana-4.4.0-linux-x64
42
42
  $ ./bin/kibana
43
43
 
44
- Note: If you're using Mac OS X, https://download.elasticsearch.org/kibana/kibana/kibana-4.0.0-darwin-x64.tar.gz is the URL to download.
44
+ Note: If you're using Mac OS X, https://download.elastic.co/kibana/kibana/kibana-4.4.0-darwin-x64.tar.gz is the URL to download.
45
45
 
46
46
  Now Elasticsearch and Kibana started. Open http://localhost:5601/ using your browser to see the Kibana's graphical interface.
47
47
 
@@ -75,7 +75,7 @@ Loading a CSV file
75
75
 
76
76
  Assuming you have a CSV files at ``./mydata/csv/`` directory. If you don't have CSV files, you can create ones using ``embulk example ./mydata`` command.
77
77
 
78
- Create this configuration file and save as ``config.yml``:
78
+ Create this configuration file and save as ``seed.yml``:
79
79
 
80
80
  .. code-block:: yaml
81
81
 
@@ -93,9 +93,9 @@ In fact, this configuration lacks some important information. However, embulk gu
93
93
 
94
94
  .. code-block:: console
95
95
 
96
- $ embulk guess config.yml -o config-complete.yml
96
+ $ embulk guess ./mydata/seed.yml -o config.yml
97
97
 
98
- The generated config-complete.yml file should include complete information as following:
98
+ The generated config.yml file should include complete information as following:
99
99
 
100
100
  .. code-block:: yaml
101
101
 
@@ -137,24 +137,25 @@ Now, you can run the bulk loading:
137
137
 
138
138
  .. code-block:: console
139
139
 
140
- $ embulk run config-complete.yml -o next-config.yml
140
+ $ embulk run config.yml -c diff.yml
141
141
 
142
142
  Scheduling loading by cron
143
143
  ------------------
144
144
 
145
- At the last step, you ran embulk command with ``-o next-config.yml`` file. The ``next-config.yml`` file should include a parameter named ``last_path``:
145
+ At the last step, you ran embulk command with ``-c diff.yml`` file. The ``diff.yml`` file should include a parameter named ``last_path``:
146
146
 
147
147
  .. code-block:: yaml
148
148
 
149
- last_path: mydata/csv/sample_01.csv.gz
149
+ in: {last_path: mydata/csv/sample_01.csv.gz}
150
+ out: {}
150
151
 
151
152
  With this configuration, embulk loads the files newer than this file in alphabetical order.
152
153
 
153
- For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips ``sample_01.csv.gz`` file and loads ``sample_02.csv.gz`` only next time. And the next next-config.yml file has ``last_path: mydata/csv/sample_02.csv.gz`` for the next next execution.
154
+ For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips ``sample_01.csv.gz`` file and loads ``sample_02.csv.gz`` only next time. And the next ``diff.yml`` file has ``last_path: mydata/csv/sample_02.csv.gz`` for the next next execution.
154
155
 
155
156
  So, if you want to loads newly created files every day, you can setup this cron schedule:
156
157
 
157
158
  .. code-block:: cron
158
159
 
159
- 0 * * * * embulk run /path/to/next-config.yml -o /path/to/next-config.yml
160
+ 0 * * * * embulk run /path/to/config.yml -c /path/to/diff.yml
160
161
 
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.4
7
8
  release/release-0.8.3
8
9
  release/release-0.8.2
9
10
  release/release-0.8.1
@@ -0,0 +1,18 @@
1
+ Release 0.8.4
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Added ``bzip2`` encoder plugin, decoder plugin, and guess plugin.
8
+ * Fixed PageBuilder to close underlaying output plugin.
9
+ * Embulk::Runner accepts .yaml as well as .yml
10
+
11
+ Java API
12
+ ------------------
13
+
14
+ * Added JsonParser.Stream API for parser plugins to parse a stream of json objects.
15
+
16
+ Release Date
17
+ ------------------
18
+ 2016-02-16
@@ -1,5 +1,6 @@
1
1
  dependencies {
2
2
  compile project(':embulk-core')
3
+ compile 'org.apache.commons:commons-compress:1.10'
3
4
 
4
5
  testCompile project(':embulk-core').sourceSets.test.output
5
6
  }
@@ -0,0 +1,55 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.io.InputStream;
4
+ import java.io.IOException;
5
+ import org.embulk.config.Task;
6
+ import org.embulk.config.TaskSource;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.ConfigInject;
9
+ import org.embulk.spi.DecoderPlugin;
10
+ import org.embulk.spi.BufferAllocator;
11
+ import org.embulk.spi.FileInput;
12
+ import org.embulk.spi.util.FileInputInputStream;
13
+ import org.embulk.spi.util.InputStreamFileInput;
14
+ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
15
+
16
+ public class Bzip2FileDecoderPlugin
17
+ implements DecoderPlugin
18
+ {
19
+ public interface PluginTask
20
+ extends Task
21
+ {
22
+ @ConfigInject
23
+ BufferAllocator getBufferAllocator();
24
+ }
25
+
26
+ @Override
27
+ public void transaction(ConfigSource config, DecoderPlugin.Control control)
28
+ {
29
+ PluginTask task = config.loadConfig(PluginTask.class);
30
+ control.run(task.dump());
31
+ }
32
+
33
+ @Override
34
+ public FileInput open(TaskSource taskSource, FileInput fileInput)
35
+ {
36
+ PluginTask task = taskSource.loadTask(PluginTask.class);
37
+ final FileInputInputStream files = new FileInputInputStream(fileInput);
38
+ return new InputStreamFileInput(
39
+ task.getBufferAllocator(),
40
+ new InputStreamFileInput.Provider() {
41
+ public InputStream openNext() throws IOException
42
+ {
43
+ if (!files.nextFile()) {
44
+ return null;
45
+ }
46
+ return new BZip2CompressorInputStream(files, true);
47
+ }
48
+
49
+ public void close() throws IOException
50
+ {
51
+ files.close();
52
+ }
53
+ });
54
+ }
55
+ }
@@ -0,0 +1,67 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.io.OutputStream;
4
+ import java.io.IOException;
5
+ import javax.validation.constraints.Min;
6
+ import javax.validation.constraints.Max;
7
+ import org.embulk.config.Task;
8
+ import org.embulk.config.Config;
9
+ import org.embulk.config.ConfigInject;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.config.TaskSource;
12
+ import org.embulk.config.ConfigSource;
13
+ import org.embulk.spi.EncoderPlugin;
14
+ import org.embulk.spi.FileOutput;
15
+ import org.embulk.spi.BufferAllocator;
16
+ import org.embulk.spi.util.FileOutputOutputStream;
17
+ import org.embulk.spi.util.OutputStreamFileOutput;
18
+ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
19
+
20
+ public class Bzip2FileEncoderPlugin
21
+ implements EncoderPlugin
22
+ {
23
+ public interface PluginTask
24
+ extends Task
25
+ {
26
+ @Config("level")
27
+ @ConfigDefault("9")
28
+ @Min(1)
29
+ @Max(9)
30
+ int getLevel();
31
+
32
+ @ConfigInject
33
+ BufferAllocator getBufferAllocator();
34
+ }
35
+
36
+ public void transaction(ConfigSource config, EncoderPlugin.Control control)
37
+ {
38
+ PluginTask task = config.loadConfig(PluginTask.class);
39
+ control.run(task.dump());
40
+ }
41
+
42
+ @Override
43
+ public FileOutput open(TaskSource taskSource, final FileOutput fileOutput)
44
+ {
45
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
46
+
47
+ final FileOutputOutputStream output = new FileOutputOutputStream(fileOutput, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.FLUSH);
48
+
49
+ return new OutputStreamFileOutput(new OutputStreamFileOutput.Provider() {
50
+ public OutputStream openNext() throws IOException
51
+ {
52
+ output.nextFile();
53
+ return new BZip2CompressorOutputStream(output, task.getLevel());
54
+ }
55
+
56
+ public void finish() throws IOException
57
+ {
58
+ fileOutput.finish();
59
+ }
60
+
61
+ public void close() throws IOException
62
+ {
63
+ fileOutput.close();
64
+ }
65
+ });
66
+ }
67
+ }
@@ -30,6 +30,7 @@ public class StandardPluginModule
30
30
 
31
31
  // file decoder plugins
32
32
  registerPluginTo(binder, DecoderPlugin.class, "gzip", GzipFileDecoderPlugin.class);
33
+ registerPluginTo(binder, DecoderPlugin.class, "bzip2", Bzip2FileDecoderPlugin.class);
33
34
 
34
35
  // output plugins
35
36
  registerPluginTo(binder, OutputPlugin.class, "file", LocalFileOutputPlugin.class);
@@ -41,12 +42,14 @@ public class StandardPluginModule
41
42
 
42
43
  // file encoder plugins
43
44
  registerPluginTo(binder, EncoderPlugin.class, "gzip", GzipFileEncoderPlugin.class);
45
+ registerPluginTo(binder, EncoderPlugin.class, "bzip2", Bzip2FileEncoderPlugin.class);
44
46
 
45
47
  // filter plugins
46
48
  registerPluginTo(binder, FilterPlugin.class, "rename", RenameFilterPlugin.class);
47
49
 
48
50
  // default guess plugins
49
51
  registerDefaultGuessPluginTo(binder, new PluginType("gzip"));
52
+ registerDefaultGuessPluginTo(binder, new PluginType("bzip2"));
50
53
  registerDefaultGuessPluginTo(binder, new PluginType("csv"));
51
54
  // charset and newline guess plugins are loaded and invoked by CsvGuessPlugin
52
55
  }
@@ -0,0 +1,23 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ class Bzip2GuessPlugin < GuessPlugin
5
+ Plugin.register_guess('bzip2', self)
6
+
7
+ # magic: BZ
8
+ # version: 'h' = bzip2
9
+ # blocksize: 1 .. 9
10
+ # block magic: 0x314159265359 (6 bytes)
11
+ block_magic = [0x31, 0x41, 0x59, 0x26, 0x53, 0x59].pack('C*')
12
+ BZIP2_HEADER_PATTERN = /BZh[1-9]#{Regexp.quote(block_magic)}/n
13
+
14
+ def guess(config, sample_buffer)
15
+ if sample_buffer[0,10] =~ BZIP2_HEADER_PATTERN
16
+ return {"decoders" => [{"type" => "bzip2"}]}
17
+ end
18
+ return {}
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -122,12 +122,12 @@ module Embulk
122
122
  case config
123
123
  when String
124
124
  case config
125
- when /\.yml\.liquid$/
125
+ when /\.ya?ml\.liquid$/
126
126
  require 'liquid'
127
127
  template_params = options[:template_params] || {}
128
128
  template_include_path = File.expand_path(options[:template_include_path] || File.dirname(config)) unless options[:template_include_path] == false
129
129
  @embed.newConfigLoader.fromYamlString run_liquid(File.read(config), template_params, template_include_path)
130
- when /\.yml$/
130
+ when /\.ya?ml$/
131
131
  @embed.newConfigLoader.fromYamlString File.read(config)
132
132
  else
133
133
  raise ConfigError.new("Unsupported file extension. Supported file extensions are .yml and .yml.liquid: #{config}")
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.8.3'
2
+ VERSION = '0.8.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.8.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jruby-jars
@@ -106,9 +106,10 @@ files:
106
106
  - classpath/bval-core-0.5.jar
107
107
  - classpath/bval-jsr303-0.5.jar
108
108
  - classpath/commons-beanutils-core-1.8.3.jar
109
+ - classpath/commons-compress-1.10.jar
109
110
  - classpath/commons-lang3-3.1.jar
110
- - classpath/embulk-core-0.8.3.jar
111
- - classpath/embulk-standards-0.8.3.jar
111
+ - classpath/embulk-core-0.8.4.jar
112
+ - classpath/embulk-standards-0.8.4.jar
112
113
  - classpath/guava-18.0.jar
113
114
  - classpath/guice-4.0.jar
114
115
  - classpath/guice-bootstrap-0.1.1.jar
@@ -422,7 +423,10 @@ files:
422
423
  - embulk-docs/src/release/release-0.8.1.rst
423
424
  - embulk-docs/src/release/release-0.8.2.rst
424
425
  - embulk-docs/src/release/release-0.8.3.rst
426
+ - embulk-docs/src/release/release-0.8.4.rst
425
427
  - embulk-standards/build.gradle
428
+ - embulk-standards/src/main/java/org/embulk/standards/Bzip2FileDecoderPlugin.java
429
+ - embulk-standards/src/main/java/org/embulk/standards/Bzip2FileEncoderPlugin.java
426
430
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
427
431
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
428
432
  - embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java
@@ -507,6 +511,7 @@ files:
507
511
  - lib/embulk/file_output_plugin.rb
508
512
  - lib/embulk/filter_plugin.rb
509
513
  - lib/embulk/formatter_plugin.rb
514
+ - lib/embulk/guess/bzip2.rb
510
515
  - lib/embulk/guess/charset.rb
511
516
  - lib/embulk/guess/csv.rb
512
517
  - lib/embulk/guess/gzip.rb