embulk 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +5 -0
- data/embulk-docs/plugins/index.html.erb +1 -1
- data/embulk-docs/src/_static/embulk-architecture.png +0 -0
- data/embulk-docs/src/_static/embulk-logo.png +0 -0
- data/embulk-docs/src/built-in.rst +335 -0
- data/embulk-docs/src/index.rst +49 -4
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +2 -2
- data/embulk-docs/src/release.rst +22 -21
- data/embulk-docs/src/release/release-0.5.5.rst +18 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +23 -23
- data/lib/embulk/data/new/java/build.gradle.erb +1 -1
- data/lib/embulk/guess/csv.rb +2 -0
- data/lib/embulk/version.rb +1 -1
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f78ef9df69c9d408b6a7df450706a54cb646c596
|
4
|
+
data.tar.gz: 79a55069daa0ec3f952fd3e6d66f9536b9b6eac5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51c83cefab8712f70e350ccc446ecab9c11779447c94ddc5c7b4c10597e1c41fe9d6853e7c7b3ff0f7d132082a604901cf7344b0382711c42d101b20f210a359
|
7
|
+
data.tar.gz: 17d4ab5552e98c6bc0fa3368cbf22ab87814e035dbae5d8b6e6c91aa6778f9d89ab2a666935b382e0dd0e7252a68460c10ea54ac9d18bf4664dfc4b00d5cf84e
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
30
30
|
Following 4 commands install embulk to your home directory:
|
31
31
|
|
32
32
|
```
|
33
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
33
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar
|
34
34
|
chmod +x ~/.embulk/bin/embulk
|
35
35
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
36
36
|
source ~/.bashrc
|
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
45
45
|
You can assume the jar file is a .bat file.
|
46
46
|
|
47
47
|
```
|
48
|
-
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
48
|
+
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -OutFile embulk.bat}"
|
49
49
|
```
|
50
50
|
|
51
51
|
Next step: [Trying examples](#trying-examples)
|
data/build.gradle
CHANGED
@@ -2,6 +2,7 @@ package org.embulk.spi;
|
|
2
2
|
|
3
3
|
import org.slf4j.Logger;
|
4
4
|
import org.embulk.config.Task;
|
5
|
+
import org.embulk.config.ModelManager;
|
5
6
|
import org.embulk.config.CommitReport;
|
6
7
|
import org.embulk.config.ConfigDiff;
|
7
8
|
import org.embulk.config.ConfigSource;
|
@@ -49,6 +50,11 @@ public class Exec
|
|
49
50
|
return session().getBufferAllocator();
|
50
51
|
}
|
51
52
|
|
53
|
+
public static ModelManager getModelManager()
|
54
|
+
{
|
55
|
+
return session().getModelManager();
|
56
|
+
}
|
57
|
+
|
52
58
|
public static <T> T newPlugin(Class<T> iface, PluginType type)
|
53
59
|
{
|
54
60
|
return session().newPlugin(iface, type);
|
Binary file
|
Binary file
|
@@ -0,0 +1,335 @@
|
|
1
|
+
Configuration
|
2
|
+
==================================
|
3
|
+
|
4
|
+
.. contents::
|
5
|
+
:local:
|
6
|
+
:depth: 2
|
7
|
+
|
8
|
+
Embulk configuration file format
|
9
|
+
------------------
|
10
|
+
|
11
|
+
Embulk uses a YAML file to define a bulk data loading. Here is an example of the file:
|
12
|
+
|
13
|
+
.. code-block:: yaml
|
14
|
+
|
15
|
+
in:
|
16
|
+
type: file
|
17
|
+
path_prefix: ./mydata/csv/
|
18
|
+
decoders:
|
19
|
+
- {type: gzip}
|
20
|
+
parser:
|
21
|
+
charset: UTF-8
|
22
|
+
newline: CRLF
|
23
|
+
type: csv
|
24
|
+
delimiter: ','
|
25
|
+
quote: '"'
|
26
|
+
escape: ''
|
27
|
+
null_string: 'NULL'
|
28
|
+
skip_header_lines: 1
|
29
|
+
columns:
|
30
|
+
- {name: id, type: long}
|
31
|
+
- {name: account, type: long}
|
32
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
33
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
34
|
+
- {name: comment, type: string}
|
35
|
+
filters:
|
36
|
+
- type: speedometer
|
37
|
+
speed_limit: 250000
|
38
|
+
out:
|
39
|
+
type: stdout
|
40
|
+
|
41
|
+
A configuration file consists of following sections:
|
42
|
+
|
43
|
+
* **in:** Input plugin options. An input plugin is either record-based (`MySQL <https://github.com/embulk/embulk-input-jdbc>`_, `DynamoDB <https://github.com/lulichn/embulk-input-dynamodb>`_, etc) or file-based (`S3 <https://github.com/embulk/embulk-input-s3>`_, `HTTP <https://github.com/takumakanari/embulk-input-http>`_, etc).
|
44
|
+
|
45
|
+
* **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
|
46
|
+
|
47
|
+
* **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
|
48
|
+
|
49
|
+
* **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
|
50
|
+
|
51
|
+
* **formatter:** If the output is file-based, fromatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
|
52
|
+
|
53
|
+
* **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
|
54
|
+
|
55
|
+
* **filters:** Filter plugins options (optional).
|
56
|
+
|
57
|
+
* **exec:** Executor plugin options. An executor plugin control parallel processing (such as built-in thread executor, `Hadoop MapReduce executor <https://github.com/embulk/embulk-executor-mapreduce>`_)
|
58
|
+
|
59
|
+
In many cases, what you need to write is **in:**, **out**: and **formatter** sections only because ``guess`` command guesses **parser** and **decoder** options for you. See also the `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
|
60
|
+
|
61
|
+
|
62
|
+
Local file input plugin
|
63
|
+
------------------
|
64
|
+
|
65
|
+
The ``file`` input plugin reads files from local file system.
|
66
|
+
|
67
|
+
Options
|
68
|
+
~~~~~~~~~~~~~~~~~~
|
69
|
+
|
70
|
+
+----------------+----------+------------------------------------------------+-----------+
|
71
|
+
| name | type | description | required? |
|
72
|
+
+================+==========+================================================+===========+
|
73
|
+
| path\_prefix | string | Path prefix of input files | required |
|
74
|
+
+----------------+----------+------------------------------------------------+-----------+
|
75
|
+
| parsers | hash | Parsers configurations (see below) | required |
|
76
|
+
+----------------+----------+------------------------------------------------+-----------+
|
77
|
+
| decoders | array | Decoder configuration (see below) | |
|
78
|
+
+----------------+----------+------------------------------------------------+-----------+
|
79
|
+
| last\_path | string | Name of last read file in previous operation | |
|
80
|
+
+----------------+----------+------------------------------------------------+-----------+
|
81
|
+
|
82
|
+
The ``path_prefix`` option is required. If you have files as following, you may set ``path_prefix: /path/to/files/sample_``:
|
83
|
+
|
84
|
+
::
|
85
|
+
|
86
|
+
.
|
87
|
+
`-- path
|
88
|
+
`-- to
|
89
|
+
`-- files
|
90
|
+
|-- sample_01.csv -> read
|
91
|
+
|-- sample_02.csv -> read
|
92
|
+
|-- sample_03.csv -> read
|
93
|
+
|-- sample_04.csv -> read
|
94
|
+
|
95
|
+
The ``last_path`` option is used to skip files older than or same with the file in dictionary order.
|
96
|
+
For example, if you set ``last_path: /path/to/files/sample_02.csv``, Embulk reads following files:
|
97
|
+
|
98
|
+
::
|
99
|
+
|
100
|
+
.
|
101
|
+
`-- path
|
102
|
+
`-- to
|
103
|
+
`-- files
|
104
|
+
|-- sample_01.csv -> skip
|
105
|
+
|-- sample_02.csv -> skip
|
106
|
+
|-- sample_03.csv -> read
|
107
|
+
|-- sample_04.csv -> read
|
108
|
+
|
109
|
+
Example
|
110
|
+
~~~~~~~~~~~~~~~~~~
|
111
|
+
|
112
|
+
.. code-block:: yaml
|
113
|
+
|
114
|
+
in:
|
115
|
+
type: file
|
116
|
+
path_prefix: /path/to/files/sample_
|
117
|
+
last_path: /path/to/files/sample_02.csv
|
118
|
+
parser:
|
119
|
+
...
|
120
|
+
|
121
|
+
In most of cases, you'll use guess to configure the parsers and decoders. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
|
122
|
+
|
123
|
+
CSV parser plugin
|
124
|
+
------------------
|
125
|
+
|
126
|
+
The ``csv`` parser plugin parses CSV and TSV files.
|
127
|
+
|
128
|
+
Options
|
129
|
+
~~~~~~~~~~~~~~~~~~
|
130
|
+
|
131
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
132
|
+
| name | type | description | required? |
|
133
|
+
+============================+==========+================================================================================================================+========================+
|
134
|
+
| delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
|
135
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
136
|
+
| quote | string | The character surrounding a quoted value | ``\"`` by default |
|
137
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
138
|
+
| escape | string | Escape character to escape a special character | ``\\`` by default |
|
139
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
140
|
+
| skip\_header\_lines | integer | Skip this number of lines first. Set 1 if the file has header line. | ``0`` by default |
|
141
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
142
|
+
| null\_string | string | If a value is this string, converts it to NULL. For example, set ``\N`` for CSV files created by mysqldump | |
|
143
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
144
|
+
| trim\_if\_not\_quoted | boolean | If true, remove spaces of a value if the value is not surrounded by the quote character | ``false`` by default |
|
145
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
146
|
+
| allow\_optional\_columns | boolean | If true, set null to insufficient columns. Otherwise, skip the row in case of insufficient number of columns | ``false`` by default |
|
147
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
148
|
+
| max\_quoted\_size\_limit | integer | Maximum number of bytes of a quoted value. If a value exceeds the limit, the row will be skipped | ``131072`` by default |
|
149
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
150
|
+
| default\_timezone | string | Time zone of timestamp columns if the value itself doesn't include time zone description (eg. Asia/Tokyo) | ``UTC`` by default |
|
151
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
152
|
+
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
153
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
154
|
+
| charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
|
155
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
156
|
+
| columns | hash | Columns (see below) | required |
|
157
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
158
|
+
|
159
|
+
The ``columns`` option declares the list of columns. This CSV parser plugin ignores the header line.
|
160
|
+
|
161
|
+
+----------+-------------------------------------------------+
|
162
|
+
| name | description |
|
163
|
+
+==========+=================================================+
|
164
|
+
| name | Name of the column |
|
165
|
+
+----------+-------------------------------------------------+
|
166
|
+
| type | Type of the column (see below) |
|
167
|
+
+----------+-------------------------------------------------+
|
168
|
+
| format | Format of the timestamp if type is timestamp |
|
169
|
+
+----------+-------------------------------------------------+
|
170
|
+
|
171
|
+
List of types:
|
172
|
+
|
173
|
+
+-------------+----------------------------------------------+
|
174
|
+
| name | description |
|
175
|
+
+=============+==============================================+
|
176
|
+
| boolean | true or false |
|
177
|
+
+-------------+----------------------------------------------+
|
178
|
+
| long | 64-bit signed integers |
|
179
|
+
+-------------+----------------------------------------------+
|
180
|
+
| timestamp | Date and time with nano-seconds precision |
|
181
|
+
+-------------+----------------------------------------------+
|
182
|
+
| double | 64-bit floating point numbers |
|
183
|
+
+-------------+----------------------------------------------+
|
184
|
+
| string | Strings |
|
185
|
+
+-------------+----------------------------------------------+
|
186
|
+
|
187
|
+
You can use ``guess`` to automatically generate the column settings. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
|
188
|
+
|
189
|
+
Example
|
190
|
+
~~~~~~~~~~~~~~~~~~
|
191
|
+
|
192
|
+
.. code-block:: yaml
|
193
|
+
|
194
|
+
in:
|
195
|
+
...
|
196
|
+
parser:
|
197
|
+
type: csv
|
198
|
+
charset: UTF-8
|
199
|
+
newline: CRLF
|
200
|
+
delimiter: "\t"
|
201
|
+
quote: '"'
|
202
|
+
escape: ''
|
203
|
+
null_string: 'NULL'
|
204
|
+
skip_header_lines: 1
|
205
|
+
columns:
|
206
|
+
- {name: id, type: long}
|
207
|
+
- {name: account, type: long}
|
208
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
209
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
210
|
+
- {name: comment, type: string}
|
211
|
+
|
212
|
+
Gzip decoder plugin
|
213
|
+
------------------
|
214
|
+
|
215
|
+
The ``gzip`` decoder plugin decompresses gzip files before input plugins read them.
|
216
|
+
|
217
|
+
Options
|
218
|
+
~~~~~~~~~~~~~~~~~~
|
219
|
+
|
220
|
+
This plugin doesn't have any options.
|
221
|
+
|
222
|
+
Example
|
223
|
+
~~~~~~~~~~~~~~~~~~
|
224
|
+
|
225
|
+
.. code-block:: yaml
|
226
|
+
|
227
|
+
in:
|
228
|
+
...
|
229
|
+
decoders:
|
230
|
+
- {type: gzip}
|
231
|
+
|
232
|
+
|
233
|
+
File output plugin
|
234
|
+
------------------
|
235
|
+
|
236
|
+
The ``file`` output plugin writes records to local file system.
|
237
|
+
|
238
|
+
Options
|
239
|
+
~~~~~~~~~~~~~~~~~~
|
240
|
+
|
241
|
+
+--------------------+----------+---------------------------------------------------+----------------------------+
|
242
|
+
| name | type | description | required? |
|
243
|
+
+====================+==========+===================================================+============================+
|
244
|
+
| path\_prefix | string | Path prefix of the output files | required |
|
245
|
+
+--------------------+----------+---------------------------------------------------+----------------------------+
|
246
|
+
| sequence\_format | string | Format of the sequence number of the output files | ``.%03d.%02d`` by default |
|
247
|
+
+--------------------+----------+---------------------------------------------------+----------------------------+
|
248
|
+
| file\_ext | string | Path suffix of the output files | required |
|
249
|
+
+--------------------+----------+---------------------------------------------------+----------------------------+
|
250
|
+
|
251
|
+
For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: ".%03d.%02d"``, and ``file_ext: .csv``, name of the output files will be as following:
|
252
|
+
|
253
|
+
::
|
254
|
+
|
255
|
+
.
|
256
|
+
`-- path
|
257
|
+
`-- to
|
258
|
+
`-- output
|
259
|
+
|-- sample.01.000.csv
|
260
|
+
|-- sample.02.000.csv
|
261
|
+
|-- sample.03.000.csv
|
262
|
+
|-- sample.04.000.csv
|
263
|
+
|
264
|
+
``sequence_format`` formats task index and sequence number in a task.
|
265
|
+
|
266
|
+
Example
|
267
|
+
~~~~~~~~~~~~~~~~~~
|
268
|
+
|
269
|
+
.. code-block:: yaml
|
270
|
+
|
271
|
+
out:
|
272
|
+
type: file
|
273
|
+
path_prefix: /path/to/output/sample
|
274
|
+
file_ext: .csv
|
275
|
+
formatter:
|
276
|
+
...
|
277
|
+
|
278
|
+
CSV formatter plugin
|
279
|
+
------------------
|
280
|
+
|
281
|
+
The ``csv`` formatter plugin formats records using CSV or TSV format.
|
282
|
+
|
283
|
+
Options
|
284
|
+
~~~~~~~~~~~~~~~~~~
|
285
|
+
|
286
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
287
|
+
| name | type | description | required? |
|
288
|
+
+================+==========+=======================================================================================================+========================+
|
289
|
+
| delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
|
290
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
291
|
+
| header\_line | boolean | If true, write the header line with column name at the first line | |
|
292
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
293
|
+
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
294
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
295
|
+
| charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
|
296
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
297
|
+
|
298
|
+
Example
|
299
|
+
~~~~~~~~~~~~~~~~~~
|
300
|
+
|
301
|
+
.. code-block:: yaml
|
302
|
+
|
303
|
+
out:
|
304
|
+
...
|
305
|
+
formatter:
|
306
|
+
- type: csv
|
307
|
+
delimiter: "\t"
|
308
|
+
newline: LF
|
309
|
+
charset: UTF-8
|
310
|
+
|
311
|
+
Gzip encoder plugin
|
312
|
+
------------------
|
313
|
+
|
314
|
+
The ``gzip`` encoder plugin compresses output files using gzip.
|
315
|
+
|
316
|
+
Options
|
317
|
+
~~~~~~~~~~~~~~~~~~
|
318
|
+
|
319
|
+
+---------+----------+----------------------------------------------------------------------+--------------------+
|
320
|
+
| name | type | description | required? |
|
321
|
+
+=========+==========+======================================================================+====================+
|
322
|
+
| level | integer | Compression level. From 0 (no compression) to 9 (best compression). | ``6`` by default |
|
323
|
+
+---------+----------+----------------------------------------------------------------------+--------------------+
|
324
|
+
|
325
|
+
Example
|
326
|
+
~~~~~~~~~~~~~~~~~~
|
327
|
+
|
328
|
+
.. code-block:: yaml
|
329
|
+
|
330
|
+
out:
|
331
|
+
...
|
332
|
+
encoders:
|
333
|
+
- type: gzip
|
334
|
+
level: 1
|
335
|
+
|
data/embulk-docs/src/index.rst
CHANGED
@@ -3,26 +3,71 @@
|
|
3
3
|
You can adapt this file completely to your liking, but it should at least
|
4
4
|
contain the root `toctree` directive.
|
5
5
|
|
6
|
-
Embulk
|
6
|
+
Embulk
|
7
7
|
==================================
|
8
8
|
|
9
|
-
|
9
|
+
.. image:: _static/embulk-logo.png
|
10
|
+
:width: 512px
|
11
|
+
:target: https://github.com/embulk/embulk
|
12
|
+
|
13
|
+
What's Embulk?
|
14
|
+
------------------
|
15
|
+
|
16
|
+
Embulk is a open-source bulk data loader that helps data transfer between various databases, storages, file formats, and cloud services.
|
17
|
+
|
18
|
+
Embulk supports:
|
19
|
+
|
20
|
+
* Automatic guessing of input file formats
|
21
|
+
* Parallel & distributed execution to deal with big data sets
|
22
|
+
* Transaction control to guarantee All-or-Nothing
|
23
|
+
* Resuming
|
24
|
+
* Plugins released on RubyGems.org
|
25
|
+
|
26
|
+
You can define a bulk data loading using combination of input and output plugins:
|
27
|
+
|
28
|
+
.. image:: _static/embulk-architecture.png
|
29
|
+
:width: 640px
|
30
|
+
:target: https://github.com/embulk/embulk#quick-start
|
31
|
+
|
32
|
+
For example, `this tutorial <recipe/scheduled-csv-load-to-elasticsearch-kibana4.html>`_ describes how to use **file** input plugin with **csv** parser plugin and **gzip** decoder plugin to read CSV files, and **elasticsearch** output plugin to load the records to Elasticsearch.
|
33
|
+
|
34
|
+
Documents
|
35
|
+
------------------
|
10
36
|
|
11
37
|
* `Quick Start <https://github.com/embulk/embulk#quick-start>`_
|
12
38
|
|
13
|
-
* `Linux
|
39
|
+
* `Linux <https://github.com/embulk/embulk#linux--mac--bsd>`_
|
40
|
+
|
41
|
+
* `Mac OS X <https://github.com/embulk/embulk#linux--mac--bsd>`_
|
14
42
|
|
15
43
|
* `Windows <https://github.com/embulk/embulk#windows>`_
|
16
44
|
|
45
|
+
.. toctree::
|
46
|
+
:maxdepth: 2
|
47
|
+
|
48
|
+
recipe
|
49
|
+
|
17
50
|
* `List of Plugins by Category <http://www.embulk.org/plugins/>`_
|
18
51
|
|
52
|
+
* `Input plugins <http://www.embulk.org/plugins/#input>`_
|
53
|
+
|
54
|
+
* `Output plugins <http://www.embulk.org/plugins/#output>`_
|
55
|
+
|
56
|
+
* `File parser plugins <http://www.embulk.org/plugins/#file-parser>`_
|
57
|
+
|
58
|
+
* `File formatter plugins <http://www.embulk.org/plugins/#file-formatter>`_
|
59
|
+
|
60
|
+
* `Filter plugins <http://www.embulk.org/plugins/#filter>`_
|
61
|
+
|
19
62
|
.. toctree::
|
20
63
|
:maxdepth: 2
|
21
64
|
|
22
|
-
|
65
|
+
built-in
|
23
66
|
release
|
24
67
|
|
25
68
|
* `JavaDoc <javadoc/index.html>`_
|
26
69
|
|
27
70
|
* `RDoc <rdoc/_index.html>`_
|
28
71
|
|
72
|
+
* `Github <https://github.com/embulk/embulk>`_
|
73
|
+
|
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
|
|
56
56
|
|
57
57
|
.. code-block:: console
|
58
58
|
|
59
|
-
$ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
59
|
+
$ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -O /usr/local/bin/embulk
|
60
60
|
$ sudo chmod +x /usr/local/bin/embulk
|
61
61
|
|
62
62
|
Step 2. Install Elasticsearch plugin
|
@@ -112,7 +112,7 @@ The generated config-complete.yml file should include complete information as fo
|
|
112
112
|
quote: '"'
|
113
113
|
escape: ''
|
114
114
|
null_string: 'NULL'
|
115
|
-
|
115
|
+
skip_header_lines: 1
|
116
116
|
columns:
|
117
117
|
- {name: id, type: long}
|
118
118
|
- {name: account, type: long}
|
data/embulk-docs/src/release.rst
CHANGED
@@ -4,26 +4,27 @@ Release Notes
|
|
4
4
|
.. toctree::
|
5
5
|
:maxdepth: 1
|
6
6
|
|
7
|
-
release/release-0.
|
8
|
-
release/release-0.2.0
|
9
|
-
release/release-0.2.1
|
10
|
-
release/release-0.3.0
|
11
|
-
release/release-0.3.1
|
12
|
-
release/release-0.3.2
|
13
|
-
release/release-0.4.0
|
14
|
-
release/release-0.4.1
|
15
|
-
release/release-0.4.2
|
16
|
-
release/release-0.4.3
|
17
|
-
release/release-0.4.4
|
18
|
-
release/release-0.4.5
|
19
|
-
release/release-0.4.6
|
20
|
-
release/release-0.4.7
|
21
|
-
release/release-0.4.8
|
22
|
-
release/release-0.4.9
|
23
|
-
release/release-0.4.10
|
24
|
-
release/release-0.5.0
|
25
|
-
release/release-0.5.1
|
26
|
-
release/release-0.5.2
|
27
|
-
release/release-0.5.3
|
7
|
+
release/release-0.5.5
|
28
8
|
release/release-0.5.4
|
9
|
+
release/release-0.5.3
|
10
|
+
release/release-0.5.2
|
11
|
+
release/release-0.5.1
|
12
|
+
release/release-0.5.0
|
13
|
+
release/release-0.4.10
|
14
|
+
release/release-0.4.9
|
15
|
+
release/release-0.4.8
|
16
|
+
release/release-0.4.7
|
17
|
+
release/release-0.4.6
|
18
|
+
release/release-0.4.5
|
19
|
+
release/release-0.4.4
|
20
|
+
release/release-0.4.3
|
21
|
+
release/release-0.4.2
|
22
|
+
release/release-0.4.1
|
23
|
+
release/release-0.4.0
|
24
|
+
release/release-0.3.2
|
25
|
+
release/release-0.3.1
|
26
|
+
release/release-0.3.0
|
27
|
+
release/release-0.2.1
|
28
|
+
release/release-0.2.0
|
29
|
+
release/release-0.1.0
|
29
30
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Release 0.5.5
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Plugin API
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Added ``spi.Exec.getModelManager()`` method.
|
8
|
+
|
9
|
+
General Changes
|
10
|
+
------------------
|
11
|
+
|
12
|
+
* ``guess-csv`` does not add config parameters if ``type`` is already set by other guess plugins (@shun0102++)
|
13
|
+
* Fixed double double-quoting in build.gradle file generated by the plugin template generator
|
14
|
+
|
15
|
+
|
16
|
+
Release Date
|
17
|
+
------------------
|
18
|
+
2015-04-07
|
@@ -150,7 +150,7 @@ public class CsvParserPlugin
|
|
150
150
|
schema.visitColumns(new ColumnVisitor() {
|
151
151
|
public void booleanColumn(Column column)
|
152
152
|
{
|
153
|
-
String v = nextColumn(
|
153
|
+
String v = nextColumn();
|
154
154
|
if (v == null) {
|
155
155
|
pageBuilder.setNull(column);
|
156
156
|
} else {
|
@@ -160,7 +160,7 @@ public class CsvParserPlugin
|
|
160
160
|
|
161
161
|
public void longColumn(Column column)
|
162
162
|
{
|
163
|
-
String v = nextColumn(
|
163
|
+
String v = nextColumn();
|
164
164
|
if (v == null) {
|
165
165
|
pageBuilder.setNull(column);
|
166
166
|
} else {
|
@@ -175,7 +175,7 @@ public class CsvParserPlugin
|
|
175
175
|
|
176
176
|
public void doubleColumn(Column column)
|
177
177
|
{
|
178
|
-
String v = nextColumn(
|
178
|
+
String v = nextColumn();
|
179
179
|
if (v == null) {
|
180
180
|
pageBuilder.setNull(column);
|
181
181
|
} else {
|
@@ -190,7 +190,7 @@ public class CsvParserPlugin
|
|
190
190
|
|
191
191
|
public void stringColumn(Column column)
|
192
192
|
{
|
193
|
-
String v = nextColumn(
|
193
|
+
String v = nextColumn();
|
194
194
|
if (v == null) {
|
195
195
|
pageBuilder.setNull(column);
|
196
196
|
} else {
|
@@ -200,7 +200,7 @@ public class CsvParserPlugin
|
|
200
200
|
|
201
201
|
public void timestampColumn(Column column)
|
202
202
|
{
|
203
|
-
String v = nextColumn(
|
203
|
+
String v = nextColumn();
|
204
204
|
if (v == null) {
|
205
205
|
pageBuilder.setNull(column);
|
206
206
|
} else {
|
@@ -212,6 +212,24 @@ public class CsvParserPlugin
|
|
212
212
|
}
|
213
213
|
}
|
214
214
|
}
|
215
|
+
|
216
|
+
private String nextColumn()
|
217
|
+
{
|
218
|
+
if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
|
219
|
+
return null;
|
220
|
+
}
|
221
|
+
String v = tokenizer.nextColumn();
|
222
|
+
if (!v.isEmpty()) {
|
223
|
+
if (v.equals(nullStringOrNull)) {
|
224
|
+
return null;
|
225
|
+
}
|
226
|
+
return v;
|
227
|
+
} else if (tokenizer.wasQuotedColumn()) {
|
228
|
+
return "";
|
229
|
+
} else {
|
230
|
+
return null;
|
231
|
+
}
|
232
|
+
}
|
215
233
|
});
|
216
234
|
pageBuilder.addRecord();
|
217
235
|
|
@@ -228,24 +246,6 @@ public class CsvParserPlugin
|
|
228
246
|
}
|
229
247
|
}
|
230
248
|
|
231
|
-
private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull, boolean allowOptionalColumns)
|
232
|
-
{
|
233
|
-
if(allowOptionalColumns && !tokenizer.hasNextColumn()) {
|
234
|
-
return null;
|
235
|
-
}
|
236
|
-
String v = tokenizer.nextColumn();
|
237
|
-
if (!v.isEmpty()) {
|
238
|
-
if (v.equals(nullStringOrNull)) {
|
239
|
-
return null;
|
240
|
-
}
|
241
|
-
return v;
|
242
|
-
} else if (tokenizer.wasQuotedColumn()) {
|
243
|
-
return "";
|
244
|
-
} else {
|
245
|
-
return null;
|
246
|
-
}
|
247
|
-
}
|
248
|
-
|
249
249
|
static class CsvRecordValidateException
|
250
250
|
extends RuntimeException
|
251
251
|
{
|
@@ -40,7 +40,7 @@ Gem::Specification.new do |spec|
|
|
40
40
|
spec.version = "${project.version}"
|
41
41
|
spec.authors = [<%= author.dump %>]
|
42
42
|
spec.summary = %[<%= display_name %> <%= display_category %> plugin for Embulk]
|
43
|
-
spec.description = %[<%=
|
43
|
+
spec.description = %[<%= description %>]
|
44
44
|
spec.email = [<%= email.dump %>]
|
45
45
|
spec.licenses = ["MIT"]
|
46
46
|
# TODO set this: spec.homepage = <%= "https://github.com/#{email[/([^@]*)/]}/#{project_name}".dump %>
|
data/lib/embulk/guess/csv.rb
CHANGED
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -249,6 +249,9 @@ files:
|
|
249
249
|
- embulk-docs/plugins/index.html.erb
|
250
250
|
- embulk-docs/plugins/plugins.css
|
251
251
|
- embulk-docs/push-gh-pages.sh
|
252
|
+
- embulk-docs/src/_static/embulk-architecture.png
|
253
|
+
- embulk-docs/src/_static/embulk-logo.png
|
254
|
+
- embulk-docs/src/built-in.rst
|
252
255
|
- embulk-docs/src/conf.py
|
253
256
|
- embulk-docs/src/index.rst
|
254
257
|
- embulk-docs/src/recipe.rst
|
@@ -276,6 +279,7 @@ files:
|
|
276
279
|
- embulk-docs/src/release/release-0.5.2.rst
|
277
280
|
- embulk-docs/src/release/release-0.5.3.rst
|
278
281
|
- embulk-docs/src/release/release-0.5.4.rst
|
282
|
+
- embulk-docs/src/release/release-0.5.5.rst
|
279
283
|
- embulk-standards/build.gradle
|
280
284
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
281
285
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -380,8 +384,8 @@ files:
|
|
380
384
|
- classpath/bval-jsr303-0.5.jar
|
381
385
|
- classpath/commons-beanutils-core-1.8.3.jar
|
382
386
|
- classpath/commons-lang3-3.1.jar
|
383
|
-
- classpath/embulk-core-0.5.
|
384
|
-
- classpath/embulk-standards-0.5.
|
387
|
+
- classpath/embulk-core-0.5.5.jar
|
388
|
+
- classpath/embulk-standards-0.5.5.jar
|
385
389
|
- classpath/guava-18.0.jar
|
386
390
|
- classpath/guice-3.0.jar
|
387
391
|
- classpath/guice-multibindings-3.0.jar
|