embulk 0.8.18-java → 0.8.19-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -0
- data/build.gradle +10 -3
- data/embulk-cli/build.gradle +2 -0
- data/embulk-cli/src/main/bat/selfrun.bat +98 -0
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkExample.java +82 -0
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkMigrate.java +458 -0
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkNew.java +419 -0
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkSelfUpdate.java +248 -0
- data/embulk-cli/src/main/sh/selfrun.sh +0 -103
- data/embulk-cli/src/test/java/org/embulk/cli/SelfrunTest.java +158 -143
- data/embulk-core/build.gradle +2 -2
- data/embulk-core/src/main/java/org/embulk/EmbulkVersion.java +109 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +11 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +29 -3
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +47 -13
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +6 -3
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +385 -64
- data/embulk-core/src/main/java/org/embulk/spi/TempFileSpace.java +2 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +62 -0
- data/embulk-docs/src/built-in.rst +59 -21
- data/embulk-docs/src/customization.rst +8 -8
- data/embulk-docs/src/developers/index.rst +45 -0
- data/embulk-docs/src/index.rst +11 -7
- data/embulk-docs/src/recipe.rst +1 -1
- data/embulk-docs/src/recipe/{scheduled-csv-load-to-elasticsearch-kibana4.rst → scheduled-csv-load-to-elasticsearch-kibana5.rst} +26 -24
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.4.0.rst +1 -1
- data/embulk-docs/src/release/release-0.5.0.rst +1 -1
- data/embulk-docs/src/release/release-0.6.0.rst +1 -1
- data/embulk-docs/src/release/release-0.6.20.rst +1 -1
- data/embulk-docs/src/release/release-0.8.19.rst +43 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +30 -1
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +10 -0
- data/embulk-standards/src/test/java/org/embulk/standards/preview/TestFilePreview.java +73 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records_guessed.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_exec.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_load.yml +19 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_previewed.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_load.yml +19 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_previewed.csv +4 -0
- data/embulk-test/src/main/java/org/embulk/test/PreviewResultInputPlugin.java +65 -0
- data/embulk-test/src/main/java/org/embulk/test/TestingBulkLoader.java +5 -0
- data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +59 -2
- data/embulk.gemspec +2 -1
- data/lib/embulk/command/embulk_run.rb +11 -49
- data/lib/embulk/data/new/README.md.vm +106 -0
- data/lib/embulk/data/new/{gitignore.erb → gitignore.vm} +3 -3
- data/lib/embulk/data/new/java/{build.gradle.erb → build.gradle.vm} +8 -8
- data/lib/embulk/data/new/java/{decoder.java.erb → decoder.java.vm} +6 -4
- data/lib/embulk/data/new/java/{encoder.java.erb → encoder.java.vm} +7 -5
- data/lib/embulk/data/new/java/{file_input.java.erb → file_input.java.vm} +9 -7
- data/lib/embulk/data/new/java/{file_output.java.erb → file_output.java.vm} +7 -5
- data/lib/embulk/data/new/java/{filter.java.erb → filter.java.vm} +4 -3
- data/lib/embulk/data/new/java/{formatter.java.erb → formatter.java.vm} +5 -4
- data/lib/embulk/data/new/java/{input.java.erb → input.java.vm} +6 -4
- data/lib/embulk/data/new/java/{output.java.erb → output.java.vm} +7 -5
- data/lib/embulk/data/new/java/{parser.java.erb → parser.java.vm} +5 -4
- data/lib/embulk/data/new/java/plugin_loader.rb.vm +3 -0
- data/lib/embulk/data/new/java/test.java.vm +5 -0
- data/lib/embulk/data/new/ruby/decoder_guess.rb.vm +25 -0
- data/lib/embulk/data/new/ruby/{filter.rb.erb → filter.rb.vm} +2 -2
- data/lib/embulk/data/new/ruby/{formatter.rb.erb → formatter.rb.vm} +2 -2
- data/lib/embulk/data/new/ruby/gemspec.vm +20 -0
- data/lib/embulk/data/new/ruby/{input.rb.erb → input.rb.vm} +10 -10
- data/lib/embulk/data/new/ruby/{output.rb.erb → output.rb.vm} +7 -7
- data/lib/embulk/data/new/ruby/{parser.rb.erb → parser.rb.vm} +2 -2
- data/lib/embulk/data/new/ruby/parser_guess.rb.vm +65 -0
- data/lib/embulk/guess/csv.rb +5 -0
- data/lib/embulk/version.rb +22 -1
- metadata +55 -35
- data/lib/embulk/command/embulk_example.rb +0 -33
- data/lib/embulk/command/embulk_generate_bin.rb +0 -62
- data/lib/embulk/command/embulk_migrate_plugin.rb +0 -244
- data/lib/embulk/command/embulk_new_plugin.rb +0 -126
- data/lib/embulk/command/embulk_selfupdate.rb +0 -121
- data/lib/embulk/data/new/README.md.erb +0 -111
- data/lib/embulk/data/new/java/plugin_loader.rb.erb +0 -3
- data/lib/embulk/data/new/java/test.java.erb +0 -5
- data/lib/embulk/data/new/ruby/decoder_guess.rb.erb +0 -25
- data/lib/embulk/data/new/ruby/gemspec.erb +0 -20
- data/lib/embulk/data/new/ruby/parser_guess.rb.erb +0 -65
@@ -27,7 +27,8 @@ public class TempFileSpace
|
|
27
27
|
|
28
28
|
public File createTempFile(String fileExt)
|
29
29
|
{
|
30
|
-
|
30
|
+
// Thread names contain ':' which is not valid as file names in Windows.
|
31
|
+
return createTempFile(Thread.currentThread().getName().replaceAll(":", "_") + "_", fileExt);
|
31
32
|
}
|
32
33
|
|
33
34
|
public File createTempFile(String prefix, String fileExt)
|
@@ -327,6 +327,68 @@ public class TestPageBuilderReader
|
|
327
327
|
).size());
|
328
328
|
}
|
329
329
|
|
330
|
+
@Test
|
331
|
+
public void testDoubleWriteStringsToRow()
|
332
|
+
{
|
333
|
+
MockPageOutput output = new MockPageOutput();
|
334
|
+
Schema schema = Schema.builder()
|
335
|
+
.add("col0", STRING)
|
336
|
+
.add("col1", STRING)
|
337
|
+
.add("col2", STRING)
|
338
|
+
.build();
|
339
|
+
|
340
|
+
builder = new PageBuilder(bufferAllocator, schema, output);
|
341
|
+
builder.setString(0, "v0");
|
342
|
+
builder.setString(1, "v1");
|
343
|
+
builder.setNull(2);
|
344
|
+
builder.setString(0, "v2"); // stored to page for col0
|
345
|
+
builder.setNull(1); // null is stored to page for col1
|
346
|
+
builder.setString(2, "v3"); // stored to page for col2
|
347
|
+
builder.addRecord();
|
348
|
+
builder.finish();
|
349
|
+
builder.close();
|
350
|
+
|
351
|
+
reader = new PageReader(schema);
|
352
|
+
reader.setPage(output.pages.get(0));
|
353
|
+
assertTrue(reader.nextRecord());
|
354
|
+
assertEquals(reader.getString(0), "v2");
|
355
|
+
assertTrue(reader.isNull(1));
|
356
|
+
assertEquals(reader.getString(2), "v3");
|
357
|
+
assertFalse(reader.nextRecord());
|
358
|
+
reader.close();
|
359
|
+
}
|
360
|
+
|
361
|
+
@Test
|
362
|
+
public void testDoubleWriteJsonsToRow()
|
363
|
+
{
|
364
|
+
MockPageOutput output = new MockPageOutput();
|
365
|
+
Schema schema = Schema.builder()
|
366
|
+
.add("col0", JSON)
|
367
|
+
.add("col1", JSON)
|
368
|
+
.add("col2", JSON)
|
369
|
+
.build();
|
370
|
+
|
371
|
+
builder = new PageBuilder(bufferAllocator, schema, output);
|
372
|
+
builder.setJson(0, newString("v0"));
|
373
|
+
builder.setJson(1, newString("v1"));
|
374
|
+
builder.setNull(2);
|
375
|
+
builder.setJson(0, newString("v2")); // store to page for col0
|
376
|
+
builder.setNull(1); // null is stored to page for col1
|
377
|
+
builder.setJson(2, newString("v3")); // store to page for col2
|
378
|
+
builder.addRecord();
|
379
|
+
builder.finish();
|
380
|
+
builder.close();
|
381
|
+
|
382
|
+
reader = new PageReader(schema);
|
383
|
+
reader.setPage(output.pages.get(0));
|
384
|
+
assertTrue(reader.nextRecord());
|
385
|
+
assertEquals(reader.getJson(0), newString("v2"));
|
386
|
+
assertTrue(reader.isNull(1));
|
387
|
+
assertEquals(reader.getJson(2), newString("v3"));
|
388
|
+
assertFalse(reader.nextRecord());
|
389
|
+
reader.close();
|
390
|
+
}
|
391
|
+
|
330
392
|
@Test
|
331
393
|
public void testRepeatableClose()
|
332
394
|
{
|
@@ -48,7 +48,7 @@ A configuration file consists of following sections:
|
|
48
48
|
|
49
49
|
* **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
|
50
50
|
|
51
|
-
* **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `
|
51
|
+
* **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `jsonl <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
|
52
52
|
|
53
53
|
* **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip or bzip2)
|
54
54
|
|
@@ -95,7 +95,7 @@ Configuration file can include another configuration file. To use it, configurat
|
|
95
95
|
|
96
96
|
File will be searched from the relative path of the input configuration file. And file name will be ``_<name>.yml.liquid``. For example, if you add ``{% include 'subdir/inc' %}`` tag to ``myconfig/config.yml.liquid`` file, it includes ``myconfig/subdir/_inc.yml.liquid`` file.
|
97
97
|
|
98
|
-
.. code-block::
|
98
|
+
.. code-block:: liquid
|
99
99
|
|
100
100
|
# config.yml.liquid
|
101
101
|
{% include 'in_mysql' %}
|
@@ -128,17 +128,19 @@ The ``file`` input plugin reads files from local file system.
|
|
128
128
|
Options
|
129
129
|
~~~~~~~~
|
130
130
|
|
131
|
-
|
132
|
-
| name
|
133
|
-
|
134
|
-
| path\_prefix
|
135
|
-
|
136
|
-
| parsers
|
137
|
-
|
138
|
-
| decoders
|
139
|
-
|
140
|
-
| last\_path
|
141
|
-
|
131
|
+
+------------------+----------+------------------------------------------------+-----------------------+
|
132
|
+
| name | type | description | required? |
|
133
|
+
+==================+==========+================================================+=======================+
|
134
|
+
| path\_prefix | string | Path prefix of input files | required |
|
135
|
+
+------------------+----------+------------------------------------------------+-----------------------+
|
136
|
+
| parsers | hash | Parsers configurations (see below) | required |
|
137
|
+
+------------------+----------+------------------------------------------------+-----------------------+
|
138
|
+
| decoders | array | Decoder configuration (see below) | |
|
139
|
+
+------------------+----------+------------------------------------------------+-----------------------+
|
140
|
+
| last\_path | string | Name of last read file in previous operation | |
|
141
|
+
+------------------+----------+------------------------------------------------+-----------------------+
|
142
|
+
| follow\_symlinks | boolean | If `true`, follow symbolic link directories | ``false`` by default |
|
143
|
+
+------------------+----------+------------------------------------------------+-----------------------+
|
142
144
|
|
143
145
|
The ``path_prefix`` option is required. If you have files as following, you may set ``path_prefix: /path/to/files/sample_``:
|
144
146
|
|
@@ -322,7 +324,7 @@ JSON parser plugin
|
|
322
324
|
|
323
325
|
The ``json`` parser plugin parses a JSON file that contains a sequence of JSON objects. Example:
|
324
326
|
|
325
|
-
.. code-block::
|
327
|
+
.. code-block:: javascript
|
326
328
|
|
327
329
|
{"time":1455829282,"ip":"93.184.216.34","name":frsyuki}
|
328
330
|
{"time":1455829282,"ip":"172.36.8.109":sadayuki}
|
@@ -913,18 +915,22 @@ The guess executor is called by ``guess`` command. It executes default guess plu
|
|
913
915
|
Options
|
914
916
|
~~~~~~~~
|
915
917
|
|
916
|
-
|
917
|
-
| name
|
918
|
-
|
919
|
-
| guess_plugins
|
920
|
-
|
921
|
-
| exclude_guess_plugins
|
922
|
-
|
918
|
+
+---------------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
919
|
+
| name | type | description | required? |
|
920
|
+
+===========================+==========+======================================================================+======================================+
|
921
|
+
| guess_plugins | array | ``guess`` command uses specified guess plugins. | ``[]`` by default |
|
922
|
+
+---------------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
923
|
+
| exclude_guess_plugins | array | ``guess`` command doesn't use specified plugins. | ``[]`` by default |
|
924
|
+
+---------------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
925
|
+
| guess_sample_buffer_bytes | int | Bytes of sample buffer that it tries to read from input source. | 32768 (32KB) by default |
|
926
|
+
+-------------------------------+----------+----------------------------------------------------------------------+----------------------------------+
|
923
927
|
|
924
928
|
The ``guess_plugins`` option includes specified guess plugin in the bottom of the list of default guess plugins.
|
925
929
|
|
926
930
|
The ``exclude_guess_plugins`` option exclude specified guess plugins from the list of default guess plugins that the guess executor uses.
|
927
931
|
|
932
|
+
The ``guess_sample_buffer_bytes`` option controls the bytes of sample buffer that GuessExecutor tries to read from specified input source.
|
933
|
+
|
928
934
|
This example shows how to use ``csv_all_strings`` guess plugin, which suggests column types within CSV files as string types. It needs to be explicitly specified by users when it's used instead of ``csv`` guess plugin because the plugin is not included in default guess plugins. We also can exclude default ``csv`` guess plugin.
|
929
935
|
|
930
936
|
Example
|
@@ -941,3 +947,35 @@ Example
|
|
941
947
|
out:
|
942
948
|
type: ...
|
943
949
|
...
|
950
|
+
|
951
|
+
Preview executor
|
952
|
+
---------------
|
953
|
+
|
954
|
+
The preview executor is called by ``preview`` command. It tries to read sample buffer from a specified input source and writes them to Page objects. ``preview`` outputs the Page objects to console.
|
955
|
+
|
956
|
+
Options
|
957
|
+
~~~~~~~~
|
958
|
+
|
959
|
+
+-------------------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
960
|
+
| name | type | description | required? |
|
961
|
+
+===============================+==========+======================================================================+======================================+
|
962
|
+
| preview_sample_buffer_bytes | int | Bytes of sample buffer that it tries to read from input source. | 32768 (32KB) by default |
|
963
|
+
+-------------------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
964
|
+
|
965
|
+
The ``preview_sample_buffer_bytes`` option controls the bytes of sample buffer that PreviewExecutor tries to read from specified input source.
|
966
|
+
|
967
|
+
This example shows how to change the bytes of sample buffer.
|
968
|
+
|
969
|
+
Example
|
970
|
+
~~~~~~~~
|
971
|
+
|
972
|
+
.. code-block:: yaml
|
973
|
+
|
974
|
+
exec:
|
975
|
+
preview_sample_buffer_bytes: 65536 # 64KB
|
976
|
+
in:
|
977
|
+
type: ...
|
978
|
+
...
|
979
|
+
out:
|
980
|
+
type: ...
|
981
|
+
...
|
@@ -19,7 +19,7 @@ Creating a new plugin is 4 steps:
|
|
19
19
|
This article describes how to create a plugin step by step.
|
20
20
|
|
21
21
|
Step 1: Creating a new project
|
22
|
-
|
22
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
23
23
|
|
24
24
|
Embulk comes with a number of templates that generates a new project so that you can start development instantly. Because the generated project contains completely working code without additional code, you can focus on the necessary coding.
|
25
25
|
|
@@ -59,7 +59,7 @@ For example, if you want to parse a new file format using Java, type this comman
|
|
59
59
|
This will create a Java-based parser plugin called ``myformat`` in ``embulk-parser-myformat`` directory.
|
60
60
|
|
61
61
|
Step 2: Building the project
|
62
|
-
|
62
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
63
63
|
|
64
64
|
If the plugin is Java-based, you need to build the project. To build, type this command:
|
65
65
|
|
@@ -71,7 +71,7 @@ If the plugin is Java-based, you need to build the project. To build, type this
|
|
71
71
|
Now, the plugin is ready to use.
|
72
72
|
|
73
73
|
Step 3: Confirm it works
|
74
|
-
|
74
|
+
~~~~~~~~~~~~~~~~~~~~~~~~
|
75
75
|
|
76
76
|
The next step is to actually use the plugin.
|
77
77
|
|
@@ -82,7 +82,7 @@ Let's suppose you have a configuration file named ``your-config.yml``. You can u
|
|
82
82
|
$ embulk run -L ./embulk-parser-myformat/ your-config.yml
|
83
83
|
|
84
84
|
Step 4: Modifying the code
|
85
|
-
|
85
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
86
86
|
|
87
87
|
The final step is to modify code as you want!
|
88
88
|
|
@@ -104,7 +104,7 @@ Releasing plugins
|
|
104
104
|
You can release publicly so that all people can use your awesome plugins.
|
105
105
|
|
106
106
|
Checking plugin description
|
107
|
-
|
107
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
108
108
|
|
109
109
|
To prepare the plugin ready to release, you need to include some additional information. The plugin information is written in this file:
|
110
110
|
|
@@ -136,14 +136,14 @@ You will find following section in the file.
|
|
136
136
|
The items in above example are important. Please make sure that they are good.
|
137
137
|
|
138
138
|
Creating account on RubyGems.org
|
139
|
-
|
139
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
140
140
|
|
141
141
|
Embulk uses `RubyGems.org <https://rubygems.org/>`_ as a package distribution service. Please create an account there to release plugins at `Sign Up <https://rubygems.org/sign_up>`_ page.
|
142
142
|
|
143
143
|
Don't forget the password! It will be necessary at the next step.
|
144
144
|
|
145
145
|
Releasing the plugin to RubyGems.org
|
146
|
-
|
146
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
147
147
|
|
148
148
|
Now, you're ready to release the plugin. To release, type following command:
|
149
149
|
|
@@ -158,7 +158,7 @@ Now, you're ready to release the plugin. To release, type following command:
|
|
158
158
|
If everything is good, you can find your plugin at https://rubygems.org/. Congratulations!
|
159
159
|
|
160
160
|
Installing your plugin
|
161
|
-
|
161
|
+
~~~~~~~~~~~~~~~~~~~~~~
|
162
162
|
|
163
163
|
Usage of plugin installer is:
|
164
164
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
For core/plugin developers
|
2
|
+
===========================
|
3
|
+
|
4
|
+
Plugin API documents
|
5
|
+
---------------------
|
6
|
+
|
7
|
+
* `JavaDoc <javadoc/index.html>`_
|
8
|
+
* `RDoc <rdoc/_index.html>`_
|
9
|
+
|
10
|
+
Developer guidelines
|
11
|
+
---------------------
|
12
|
+
|
13
|
+
* Guidelines for core developers (TBA)
|
14
|
+
* Guidelines for plugin developers (TBA)
|
15
|
+
|
16
|
+
"Embulk v2"
|
17
|
+
-----------------------
|
18
|
+
|
19
|
+
The Embulk project plans a set of big updates in the Embulk core so that Embulk, its community and the eco-system keep sustainable. Backward compatibility in plugins and configurations is considered especially carefully. Other parts, for example command line options, may have some incompatibility. Join discussions if you have concerns or requests!
|
20
|
+
|
21
|
+
JFYI: "Embulk v2" is just a code name. The next versions will be still ``0.?``.
|
22
|
+
|
23
|
+
Summary for plugin developers
|
24
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
25
|
+
|
26
|
+
(TBA)
|
27
|
+
|
28
|
+
Design docs for core developers
|
29
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
30
|
+
|
31
|
+
* `Summary <https://docs.google.com/document/d/1-8jDAisymt9SwnJpQiEnACIKC-OGMt7Ygv5FpkCeApU/edit>`_
|
32
|
+
|
33
|
+
* `Un-JRuby in the boot process <https://docs.google.com/document/d/1f-ziuHY4fXXmSLNHCXP3AAXUajEyPMLQAvH4jgTjpGg/edit>`_
|
34
|
+
|
35
|
+
* [TBD] Update plugin loading: non-Gem plugins, namespaces, and versions
|
36
|
+
|
37
|
+
* [TBD] Un-JRuby in the plugin load process
|
38
|
+
|
39
|
+
* [TBD] Provide better OAuth support in plugins
|
40
|
+
|
41
|
+
* [TBD] Buffer in output plugins
|
42
|
+
|
43
|
+
* [TBD] Structure documents: configs and errors
|
44
|
+
|
45
|
+
* [TBD] Deliver errors and notifications more flexibly
|
data/embulk-docs/src/index.rst
CHANGED
@@ -29,7 +29,7 @@ You can define a bulk data loading using combination of input and output plugins
|
|
29
29
|
:width: 640px
|
30
30
|
:target: https://github.com/embulk/embulk#quick-start
|
31
31
|
|
32
|
-
For example, `this tutorial <recipe/scheduled-csv-load-to-elasticsearch-
|
32
|
+
For example, `this tutorial <recipe/scheduled-csv-load-to-elasticsearch-kibana5.html>`_ describes how to use **file** input plugin with **csv** parser plugin and **gzip** decoder plugin to read CSV files, and **elasticsearch** output plugin to load the records to Elasticsearch.
|
33
33
|
|
34
34
|
Documents
|
35
35
|
------------------
|
@@ -71,18 +71,22 @@ Documents
|
|
71
71
|
built-in
|
72
72
|
|
73
73
|
.. toctree::
|
74
|
-
:maxdepth:
|
74
|
+
:maxdepth: 2
|
75
75
|
|
76
|
-
|
76
|
+
release
|
77
|
+
|
78
|
+
For developers
|
79
|
+
---------------
|
80
|
+
|
81
|
+
* `Github <https://github.com/embulk/embulk>`_
|
77
82
|
|
78
83
|
* `JavaDoc <javadoc/index.html>`_
|
79
84
|
|
80
85
|
* `RDoc <rdoc/_index.html>`_
|
81
86
|
|
82
|
-
* `
|
87
|
+
* `Other resources for developers <developers/index.html>`_
|
83
88
|
|
84
89
|
.. toctree::
|
85
|
-
:maxdepth:
|
86
|
-
|
87
|
-
release
|
90
|
+
:maxdepth: 3
|
88
91
|
|
92
|
+
customization
|
data/embulk-docs/src/recipe.rst
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
Scheduled bulk data loading to Elasticsearch + Kibana
|
2
|
-
|
1
|
+
Scheduled bulk data loading to Elasticsearch + Kibana 5 from CSV files
|
2
|
+
======================================================================
|
3
3
|
|
4
4
|
.. contents::
|
5
5
|
:local:
|
@@ -11,56 +11,58 @@ This article shows how to:
|
|
11
11
|
* Visualize the data with Kibana interactively.
|
12
12
|
* Schedule the data loading every hour using cron.
|
13
13
|
|
14
|
-
This guide assumes you are using Ubuntu
|
14
|
+
This guide assumes you are using Ubuntu 16.10 Precise or macOS.
|
15
15
|
|
16
|
-
Setup Elasticsearch and Kibana
|
17
|
-
|
16
|
+
Setup Elasticsearch and Kibana 5
|
17
|
+
--------------------------------
|
18
18
|
|
19
19
|
Step 1. Download and start Elasticsearch.
|
20
|
-
|
20
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
21
21
|
|
22
|
-
You can find releases from the `Elasticsearch website <
|
22
|
+
You can find releases from the `Elasticsearch website <https://www.elastic.co/downloads/elasticsearch>`_.
|
23
23
|
For the smallest setup, you can unzip the package and run `./bin/elasticsearch` command:
|
24
24
|
|
25
25
|
.. code-block:: console
|
26
26
|
|
27
|
-
$ wget https://
|
28
|
-
$ unzip elasticsearch-
|
29
|
-
$ cd elasticsearch-
|
27
|
+
$ wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.3.0.zip
|
28
|
+
$ unzip elasticsearch-5.3.0.zip
|
29
|
+
$ cd elasticsearch-5.3.0
|
30
30
|
$ ./bin/elasticsearch
|
31
31
|
|
32
32
|
Step 2. Download and unzip Kibana:
|
33
|
-
|
33
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
34
34
|
|
35
|
-
You can find releases from the `Kibana website <
|
35
|
+
You can find releases from the `Kibana website <https://www.elastic.co/downloads/kibana>`_. Open a new console and run following commands:
|
36
36
|
|
37
37
|
.. code-block:: console
|
38
38
|
|
39
|
-
$ wget https://
|
40
|
-
$ tar zxvf kibana-
|
41
|
-
$ cd kibana-
|
39
|
+
$ wget https://artifacts.elastic.co/downloads/kibana/kibana-5.3.0-linux-x86_64.tar.gz
|
40
|
+
$ tar zxvf kibana-5.3.0-linux-x86_64.tar.gz
|
41
|
+
$ cd kibana-5.3.0-linux-x86_64
|
42
42
|
$ ./bin/kibana
|
43
43
|
|
44
|
-
Note: If you're using
|
44
|
+
Note: If you're using macOS, https://artifacts.elastic.co/downloads/kibana/kibana-5.3.0-darwin-x86_64.tar.gz is the URL to download.
|
45
45
|
|
46
46
|
Now Elasticsearch and Kibana started. Open http://localhost:5601/ using your browser to see the Kibana's graphical interface.
|
47
47
|
|
48
48
|
|
49
49
|
Setup Embulk
|
50
|
-
|
50
|
+
------------
|
51
51
|
|
52
52
|
Step 1. Download Embulk binary:
|
53
|
-
|
53
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
54
54
|
|
55
|
-
You can find the latest embulk binary from the `releases <https://bintray.com/embulk/maven/embulk/view#files>`_. Because Embulk is a single executable binary, you can simply download it to /
|
55
|
+
You can find the latest embulk binary from the `releases <https://bintray.com/embulk/maven/embulk/view#files>`_. Because Embulk is a single executable binary, you can simply download it to ~/.embulk/bin directory and set executable flag as following:
|
56
56
|
|
57
57
|
.. code-block:: console
|
58
58
|
|
59
|
-
$
|
60
|
-
$
|
59
|
+
$ curl --create-dirs -o ~/.embulk/bin/embulk -L "https://dl.embulk.org/embulk-latest.jar"
|
60
|
+
$ chmod +x ~/.embulk/bin/embulk
|
61
|
+
$ echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
62
|
+
$ source ~/.bashrc
|
61
63
|
|
62
64
|
Step 2. Install Elasticsearch plugin
|
63
|
-
|
65
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
64
66
|
|
65
67
|
You also need Elasticsearch plugin for Embulk. You can install the plugin with this command:
|
66
68
|
|
@@ -140,7 +142,7 @@ Now, you can run the bulk loading:
|
|
140
142
|
$ embulk run config.yml -c diff.yml
|
141
143
|
|
142
144
|
Scheduling loading by cron
|
143
|
-
|
145
|
+
--------------------------
|
144
146
|
|
145
147
|
At the last step, you ran embulk command with ``-c diff.yml`` file. The ``diff.yml`` file should include a parameter named ``last_path``:
|
146
148
|
|
@@ -155,7 +157,7 @@ For example, if you create ``./mydata/csv/sample_02.csv.gz`` file, embulk skips
|
|
155
157
|
|
156
158
|
So, if you want to loads newly created files every day, you can setup this cron schedule:
|
157
159
|
|
158
|
-
.. code-block::
|
160
|
+
.. code-block:: text
|
159
161
|
|
160
162
|
0 * * * * embulk run /path/to/config.yml -c /path/to/diff.yml
|
161
163
|
|