embulk 0.5.4 → 0.5.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +5 -0
- data/embulk-docs/plugins/index.html.erb +1 -1
- data/embulk-docs/src/_static/embulk-architecture.png +0 -0
- data/embulk-docs/src/_static/embulk-logo.png +0 -0
- data/embulk-docs/src/built-in.rst +335 -0
- data/embulk-docs/src/index.rst +49 -4
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +2 -2
- data/embulk-docs/src/release.rst +22 -21
- data/embulk-docs/src/release/release-0.5.5.rst +18 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +23 -23
- data/lib/embulk/data/new/java/build.gradle.erb +1 -1
- data/lib/embulk/guess/csv.rb +2 -0
- data/lib/embulk/version.rb +1 -1
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f78ef9df69c9d408b6a7df450706a54cb646c596
|
4
|
+
data.tar.gz: 79a55069daa0ec3f952fd3e6d66f9536b9b6eac5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51c83cefab8712f70e350ccc446ecab9c11779447c94ddc5c7b4c10597e1c41fe9d6853e7c7b3ff0f7d132082a604901cf7344b0382711c42d101b20f210a359
|
7
|
+
data.tar.gz: 17d4ab5552e98c6bc0fa3368cbf22ab87814e035dbae5d8b6e6c91aa6778f9d89ab2a666935b382e0dd0e7252a68460c10ea54ac9d18bf4664dfc4b00d5cf84e
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
30
30
|
Following 4 commands install embulk to your home directory:
|
31
31
|
|
32
32
|
```
|
33
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
33
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar
|
34
34
|
chmod +x ~/.embulk/bin/embulk
|
35
35
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
36
36
|
source ~/.bashrc
|
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
45
45
|
You can assume the jar file is a .bat file.
|
46
46
|
|
47
47
|
```
|
48
|
-
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
48
|
+
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -OutFile embulk.bat}"
|
49
49
|
```
|
50
50
|
|
51
51
|
Next step: [Trying examples](#trying-examples)
|
data/build.gradle
CHANGED
@@ -2,6 +2,7 @@ package org.embulk.spi;
|
|
2
2
|
|
3
3
|
import org.slf4j.Logger;
|
4
4
|
import org.embulk.config.Task;
|
5
|
+
import org.embulk.config.ModelManager;
|
5
6
|
import org.embulk.config.CommitReport;
|
6
7
|
import org.embulk.config.ConfigDiff;
|
7
8
|
import org.embulk.config.ConfigSource;
|
@@ -49,6 +50,11 @@ public class Exec
|
|
49
50
|
return session().getBufferAllocator();
|
50
51
|
}
|
51
52
|
|
53
|
+
public static ModelManager getModelManager()
|
54
|
+
{
|
55
|
+
return session().getModelManager();
|
56
|
+
}
|
57
|
+
|
52
58
|
public static <T> T newPlugin(Class<T> iface, PluginType type)
|
53
59
|
{
|
54
60
|
return session().newPlugin(iface, type);
|
Binary file
|
Binary file
|
@@ -0,0 +1,335 @@
|
|
1
|
+
Configuration
|
2
|
+
==================================
|
3
|
+
|
4
|
+
.. contents::
|
5
|
+
:local:
|
6
|
+
:depth: 2
|
7
|
+
|
8
|
+
Embulk configuration file format
|
9
|
+
------------------
|
10
|
+
|
11
|
+
Embulk uses a YAML file to define a bulk data loading. Here is an example of the file:
|
12
|
+
|
13
|
+
.. code-block:: yaml
|
14
|
+
|
15
|
+
in:
|
16
|
+
type: file
|
17
|
+
path_prefix: ./mydata/csv/
|
18
|
+
decoders:
|
19
|
+
- {type: gzip}
|
20
|
+
parser:
|
21
|
+
charset: UTF-8
|
22
|
+
newline: CRLF
|
23
|
+
type: csv
|
24
|
+
delimiter: ','
|
25
|
+
quote: '"'
|
26
|
+
escape: ''
|
27
|
+
null_string: 'NULL'
|
28
|
+
skip_header_lines: 1
|
29
|
+
columns:
|
30
|
+
- {name: id, type: long}
|
31
|
+
- {name: account, type: long}
|
32
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
33
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
34
|
+
- {name: comment, type: string}
|
35
|
+
filters:
|
36
|
+
- type: speedometer
|
37
|
+
speed_limit: 250000
|
38
|
+
out:
|
39
|
+
type: stdout
|
40
|
+
|
41
|
+
A configuration file consists of following sections:
|
42
|
+
|
43
|
+
* **in:** Input plugin options. An input plugin is either record-based (`MySQL <https://github.com/embulk/embulk-input-jdbc>`_, `DynamoDB <https://github.com/lulichn/embulk-input-dynamodb>`_, etc) or file-based (`S3 <https://github.com/embulk/embulk-input-s3>`_, `HTTP <https://github.com/takumakanari/embulk-input-http>`_, etc).
|
44
|
+
|
45
|
+
* **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
|
46
|
+
|
47
|
+
* **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
|
48
|
+
|
49
|
+
* **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
|
50
|
+
|
51
|
+
* **formatter:** If the output is file-based, fromatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
|
52
|
+
|
53
|
+
* **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
|
54
|
+
|
55
|
+
* **filters:** Filter plugins options (optional).
|
56
|
+
|
57
|
+
* **exec:** Executor plugin options. An executor plugin control parallel processing (such as built-in thread executor, `Hadoop MapReduce executor <https://github.com/embulk/embulk-executor-mapreduce>`_)
|
58
|
+
|
59
|
+
In many cases, what you need to write is **in:**, **out**: and **formatter** sections only because ``guess`` command guesses **parser** and **decoder** options for you. See also the `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
|
60
|
+
|
61
|
+
|
62
|
+
Local file input plugin
|
63
|
+
------------------
|
64
|
+
|
65
|
+
The ``file`` input plugin reads files from local file system.
|
66
|
+
|
67
|
+
Options
|
68
|
+
~~~~~~~~~~~~~~~~~~
|
69
|
+
|
70
|
+
+----------------+----------+------------------------------------------------+-----------+
|
71
|
+
| name | type | description | required? |
|
72
|
+
+================+==========+================================================+===========+
|
73
|
+
| path\_prefix | string | Path prefix of input files | required |
|
74
|
+
+----------------+----------+------------------------------------------------+-----------+
|
75
|
+
| parsers | hash | Parsers configurations (see below) | required |
|
76
|
+
+----------------+----------+------------------------------------------------+-----------+
|
77
|
+
| decoders | array | Decoder configuration (see below) | |
|
78
|
+
+----------------+----------+------------------------------------------------+-----------+
|
79
|
+
| last\_path | string | Name of last read file in previous operation | |
|
80
|
+
+----------------+----------+------------------------------------------------+-----------+
|
81
|
+
|
82
|
+
The ``path_prefix`` option is required. If you have files as following, you may set ``path_prefix: /path/to/files/sample_``:
|
83
|
+
|
84
|
+
::
|
85
|
+
|
86
|
+
.
|
87
|
+
`-- path
|
88
|
+
`-- to
|
89
|
+
`-- files
|
90
|
+
|-- sample_01.csv -> read
|
91
|
+
|-- sample_02.csv -> read
|
92
|
+
|-- sample_03.csv -> read
|
93
|
+
|-- sample_04.csv -> read
|
94
|
+
|
95
|
+
The ``last_path`` option is used to skip files older than or same with the file in dictionary order.
|
96
|
+
For example, if you set ``last_path: /path/to/files/sample_02.csv``, Embulk reads following files:
|
97
|
+
|
98
|
+
::
|
99
|
+
|
100
|
+
.
|
101
|
+
`-- path
|
102
|
+
`-- to
|
103
|
+
`-- files
|
104
|
+
|-- sample_01.csv -> skip
|
105
|
+
|-- sample_02.csv -> skip
|
106
|
+
|-- sample_03.csv -> read
|
107
|
+
|-- sample_04.csv -> read
|
108
|
+
|
109
|
+
Example
|
110
|
+
~~~~~~~~~~~~~~~~~~
|
111
|
+
|
112
|
+
.. code-block:: yaml
|
113
|
+
|
114
|
+
in:
|
115
|
+
type: file
|
116
|
+
path_prefix: /path/to/files/sample_
|
117
|
+
last_path: /path/to/files/sample_02.csv
|
118
|
+
parser:
|
119
|
+
...
|
120
|
+
|
121
|
+
In most of cases, you'll use guess to configure the parsers and decoders. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
|
122
|
+
|
123
|
+
CSV parser plugin
|
124
|
+
------------------
|
125
|
+
|
126
|
+
The ``csv`` parser plugin parses CSV and TSV files.
|
127
|
+
|
128
|
+
Options
|
129
|
+
~~~~~~~~~~~~~~~~~~
|
130
|
+
|
131
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
132
|
+
| name | type | description | required? |
|
133
|
+
+============================+==========+================================================================================================================+========================+
|
134
|
+
| delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
|
135
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
136
|
+
| quote | string | The character surrounding a quoted value | ``\"`` by default |
|
137
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
138
|
+
| escape | string | Escape character to escape a special character | ``\\`` by default |
|
139
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
140
|
+
| skip\_header\_lines | integer | Skip this number of lines first. Set 1 if the file has header line. | ``0`` by default |
|
141
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
142
|
+
| null\_string | string | If a value is this string, converts it to NULL. For example, set ``\N`` for CSV files created by mysqldump | |
|
143
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
144
|
+
| trim\_if\_not\_quoted | boolean | If true, remove spaces of a value if the value is not surrounded by the quote character | ``false`` by default |
|
145
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
146
|
+
| allow\_optional\_columns | boolean | If true, set null to insufficient columns. Otherwise, skip the row in case of insufficient number of columns | ``false`` by default |
|
147
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
148
|
+
| max\_quoted\_size\_limit | integer | Maximum number of bytes of a quoted value. If a value exceeds the limit, the row will be skipped | ``131072`` by default |
|
149
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
150
|
+
| default\_timezone | string | Time zone of timestamp columns if the value itself doesn't include time zone description (eg. Asia/Tokyo) | ``UTC`` by default |
|
151
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
152
|
+
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
153
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
154
|
+
| charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
|
155
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
156
|
+
| columns | hash | Columns (see below) | required |
|
157
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
158
|
+
|
159
|
+
The ``columns`` option declares the list of columns. This CSV parser plugin ignores the header line.
|
160
|
+
|
161
|
+
+----------+-------------------------------------------------+
|
162
|
+
| name | description |
|
163
|
+
+==========+=================================================+
|
164
|
+
| name | Name of the column |
|
165
|
+
+----------+-------------------------------------------------+
|
166
|
+
| type | Type of the column (see below) |
|
167
|
+
+----------+-------------------------------------------------+
|
168
|
+
| format | Format of the timestamp if type is timestamp |
|
169
|
+
+----------+-------------------------------------------------+
|
170
|
+
|
171
|
+
List of types:
|
172
|
+
|
173
|
+
+-------------+----------------------------------------------+
|
174
|
+
| name | description |
|
175
|
+
+=============+==============================================+
|
176
|
+
| boolean | true or false |
|
177
|
+
+-------------+----------------------------------------------+
|
178
|
+
| long | 64-bit signed integers |
|
179
|
+
+-------------+----------------------------------------------+
|
180
|
+
| timestamp | Date and time with nano-seconds precision |
|
181
|
+
+-------------+----------------------------------------------+
|
182
|
+
| double | 64-bit floating point numbers |
|
183
|
+
+-------------+----------------------------------------------+
|
184
|
+
| string | Strings |
|
185
|
+
+-------------+----------------------------------------------+
|
186
|
+
|
187
|
+
You can use ``guess`` to automatically generate the column settings. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
|
188
|
+
|
189
|
+
Example
|
190
|
+
~~~~~~~~~~~~~~~~~~
|
191
|
+
|
192
|
+
.. code-block:: yaml
|
193
|
+
|
194
|
+
in:
|
195
|
+
...
|
196
|
+
parser:
|
197
|
+
type: csv
|
198
|
+
charset: UTF-8
|
199
|
+
newline: CRLF
|
200
|
+
delimiter: "\t"
|
201
|
+
quote: '"'
|
202
|
+
escape: ''
|
203
|
+
null_string: 'NULL'
|
204
|
+
skip_header_lines: 1
|
205
|
+
columns:
|
206
|
+
- {name: id, type: long}
|
207
|
+
- {name: account, type: long}
|
208
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
209
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
210
|
+
- {name: comment, type: string}
|
211
|
+
|
212
|
+
Gzip decoder plugin
|
213
|
+
------------------
|
214
|
+
|
215
|
+
The ``gzip`` decoder plugin decompresses gzip files before input plugins read them.
|
216
|
+
|
217
|
+
Options
|
218
|
+
~~~~~~~~~~~~~~~~~~
|
219
|
+
|
220
|
+
This plugin doesn't have any options.
|
221
|
+
|
222
|
+
Example
|
223
|
+
~~~~~~~~~~~~~~~~~~
|
224
|
+
|
225
|
+
.. code-block:: yaml
|
226
|
+
|
227
|
+
in:
|
228
|
+
...
|
229
|
+
decoders:
|
230
|
+
- {type: gzip}
|
231
|
+
|
232
|
+
|
233
|
+
File output plugin
|
234
|
+
------------------
|
235
|
+
|
236
|
+
The ``file`` output plugin writes records to local file system.
|
237
|
+
|
238
|
+
Options
|
239
|
+
~~~~~~~~~~~~~~~~~~
|
240
|
+
|
241
|
+
+--------------------+----------+---------------------------------------------------+----------------------------+
|
242
|
+
| name | type | description | required? |
|
243
|
+
+====================+==========+===================================================+============================+
|
244
|
+
| path\_prefix | string | Path prefix of the output files | required |
|
245
|
+
+--------------------+----------+---------------------------------------------------+----------------------------+
|
246
|
+
| sequence\_format | string | Format of the sequence number of the output files | ``.%03d.%02d`` by default |
|
247
|
+
+--------------------+----------+---------------------------------------------------+----------------------------+
|
248
|
+
| file\_ext | string | Path suffix of the output files | required |
|
249
|
+
+--------------------+----------+---------------------------------------------------+----------------------------+
|
250
|
+
|
251
|
+
For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: ".%03d.%02d"``, and ``file_ext: .csv``, name of the output files will be as following:
|
252
|
+
|
253
|
+
::
|
254
|
+
|
255
|
+
.
|
256
|
+
`-- path
|
257
|
+
`-- to
|
258
|
+
`-- output
|
259
|
+
|-- sample.01.000.csv
|
260
|
+
|-- sample.02.000.csv
|
261
|
+
|-- sample.03.000.csv
|
262
|
+
|-- sample.04.000.csv
|
263
|
+
|
264
|
+
``sequence_format`` formats task index and sequence number in a task.
|
265
|
+
|
266
|
+
Example
|
267
|
+
~~~~~~~~~~~~~~~~~~
|
268
|
+
|
269
|
+
.. code-block:: yaml
|
270
|
+
|
271
|
+
out:
|
272
|
+
type: file
|
273
|
+
path_prefix: /path/to/output/sample
|
274
|
+
file_ext: .csv
|
275
|
+
formatter:
|
276
|
+
...
|
277
|
+
|
278
|
+
CSV formatter plugin
|
279
|
+
------------------
|
280
|
+
|
281
|
+
The ``csv`` formatter plugin formats records using CSV or TSV format.
|
282
|
+
|
283
|
+
Options
|
284
|
+
~~~~~~~~~~~~~~~~~~
|
285
|
+
|
286
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
287
|
+
| name | type | description | required? |
|
288
|
+
+================+==========+=======================================================================================================+========================+
|
289
|
+
| delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
|
290
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
291
|
+
| header\_line | boolean | If true, write the header line with column name at the first line | |
|
292
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
293
|
+
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
294
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
295
|
+
| charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
|
296
|
+
+----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
|
297
|
+
|
298
|
+
Example
|
299
|
+
~~~~~~~~~~~~~~~~~~
|
300
|
+
|
301
|
+
.. code-block:: yaml
|
302
|
+
|
303
|
+
out:
|
304
|
+
...
|
305
|
+
formatter:
|
306
|
+
- type: csv
|
307
|
+
delimiter: "\t"
|
308
|
+
newline: LF
|
309
|
+
charset: UTF-8
|
310
|
+
|
311
|
+
Gzip encoder plugin
|
312
|
+
------------------
|
313
|
+
|
314
|
+
The ``gzip`` encoder plugin compresses output files using gzip.
|
315
|
+
|
316
|
+
Options
|
317
|
+
~~~~~~~~~~~~~~~~~~
|
318
|
+
|
319
|
+
+---------+----------+----------------------------------------------------------------------+--------------------+
|
320
|
+
| name | type | description | required? |
|
321
|
+
+=========+==========+======================================================================+====================+
|
322
|
+
| level | integer | Compression level. From 0 (no compression) to 9 (best compression). | ``6`` by default |
|
323
|
+
+---------+----------+----------------------------------------------------------------------+--------------------+
|
324
|
+
|
325
|
+
Example
|
326
|
+
~~~~~~~~~~~~~~~~~~
|
327
|
+
|
328
|
+
.. code-block:: yaml
|
329
|
+
|
330
|
+
out:
|
331
|
+
...
|
332
|
+
encoders:
|
333
|
+
- type: gzip
|
334
|
+
level: 1
|
335
|
+
|
data/embulk-docs/src/index.rst
CHANGED
@@ -3,26 +3,71 @@
|
|
3
3
|
You can adapt this file completely to your liking, but it should at least
|
4
4
|
contain the root `toctree` directive.
|
5
5
|
|
6
|
-
Embulk
|
6
|
+
Embulk
|
7
7
|
==================================
|
8
8
|
|
9
|
-
|
9
|
+
.. image:: _static/embulk-logo.png
|
10
|
+
:width: 512px
|
11
|
+
:target: https://github.com/embulk/embulk
|
12
|
+
|
13
|
+
What's Embulk?
|
14
|
+
------------------
|
15
|
+
|
16
|
+
Embulk is a open-source bulk data loader that helps data transfer between various databases, storages, file formats, and cloud services.
|
17
|
+
|
18
|
+
Embulk supports:
|
19
|
+
|
20
|
+
* Automatic guessing of input file formats
|
21
|
+
* Parallel & distributed execution to deal with big data sets
|
22
|
+
* Transaction control to guarantee All-or-Nothing
|
23
|
+
* Resuming
|
24
|
+
* Plugins released on RubyGems.org
|
25
|
+
|
26
|
+
You can define a bulk data loading using combination of input and output plugins:
|
27
|
+
|
28
|
+
.. image:: _static/embulk-architecture.png
|
29
|
+
:width: 640px
|
30
|
+
:target: https://github.com/embulk/embulk#quick-start
|
31
|
+
|
32
|
+
For example, `this tutorial <recipe/scheduled-csv-load-to-elasticsearch-kibana4.html>`_ describes how to use **file** input plugin with **csv** parser plugin and **gzip** decoder plugin to read CSV files, and **elasticsearch** output plugin to load the records to Elasticsearch.
|
33
|
+
|
34
|
+
Documents
|
35
|
+
------------------
|
10
36
|
|
11
37
|
* `Quick Start <https://github.com/embulk/embulk#quick-start>`_
|
12
38
|
|
13
|
-
* `Linux
|
39
|
+
* `Linux <https://github.com/embulk/embulk#linux--mac--bsd>`_
|
40
|
+
|
41
|
+
* `Mac OS X <https://github.com/embulk/embulk#linux--mac--bsd>`_
|
14
42
|
|
15
43
|
* `Windows <https://github.com/embulk/embulk#windows>`_
|
16
44
|
|
45
|
+
.. toctree::
|
46
|
+
:maxdepth: 2
|
47
|
+
|
48
|
+
recipe
|
49
|
+
|
17
50
|
* `List of Plugins by Category <http://www.embulk.org/plugins/>`_
|
18
51
|
|
52
|
+
* `Input plugins <http://www.embulk.org/plugins/#input>`_
|
53
|
+
|
54
|
+
* `Output plugins <http://www.embulk.org/plugins/#output>`_
|
55
|
+
|
56
|
+
* `File parser plugins <http://www.embulk.org/plugins/#file-parser>`_
|
57
|
+
|
58
|
+
* `File formatter plugins <http://www.embulk.org/plugins/#file-formatter>`_
|
59
|
+
|
60
|
+
* `Filter plugins <http://www.embulk.org/plugins/#filter>`_
|
61
|
+
|
19
62
|
.. toctree::
|
20
63
|
:maxdepth: 2
|
21
64
|
|
22
|
-
|
65
|
+
built-in
|
23
66
|
release
|
24
67
|
|
25
68
|
* `JavaDoc <javadoc/index.html>`_
|
26
69
|
|
27
70
|
* `RDoc <rdoc/_index.html>`_
|
28
71
|
|
72
|
+
* `Github <https://github.com/embulk/embulk>`_
|
73
|
+
|
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
|
|
56
56
|
|
57
57
|
.. code-block:: console
|
58
58
|
|
59
|
-
$ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
59
|
+
$ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -O /usr/local/bin/embulk
|
60
60
|
$ sudo chmod +x /usr/local/bin/embulk
|
61
61
|
|
62
62
|
Step 2. Install Elasticsearch plugin
|
@@ -112,7 +112,7 @@ The generated config-complete.yml file should include complete information as fo
|
|
112
112
|
quote: '"'
|
113
113
|
escape: ''
|
114
114
|
null_string: 'NULL'
|
115
|
-
|
115
|
+
skip_header_lines: 1
|
116
116
|
columns:
|
117
117
|
- {name: id, type: long}
|
118
118
|
- {name: account, type: long}
|
data/embulk-docs/src/release.rst
CHANGED
@@ -4,26 +4,27 @@ Release Notes
|
|
4
4
|
.. toctree::
|
5
5
|
:maxdepth: 1
|
6
6
|
|
7
|
-
release/release-0.
|
8
|
-
release/release-0.2.0
|
9
|
-
release/release-0.2.1
|
10
|
-
release/release-0.3.0
|
11
|
-
release/release-0.3.1
|
12
|
-
release/release-0.3.2
|
13
|
-
release/release-0.4.0
|
14
|
-
release/release-0.4.1
|
15
|
-
release/release-0.4.2
|
16
|
-
release/release-0.4.3
|
17
|
-
release/release-0.4.4
|
18
|
-
release/release-0.4.5
|
19
|
-
release/release-0.4.6
|
20
|
-
release/release-0.4.7
|
21
|
-
release/release-0.4.8
|
22
|
-
release/release-0.4.9
|
23
|
-
release/release-0.4.10
|
24
|
-
release/release-0.5.0
|
25
|
-
release/release-0.5.1
|
26
|
-
release/release-0.5.2
|
27
|
-
release/release-0.5.3
|
7
|
+
release/release-0.5.5
|
28
8
|
release/release-0.5.4
|
9
|
+
release/release-0.5.3
|
10
|
+
release/release-0.5.2
|
11
|
+
release/release-0.5.1
|
12
|
+
release/release-0.5.0
|
13
|
+
release/release-0.4.10
|
14
|
+
release/release-0.4.9
|
15
|
+
release/release-0.4.8
|
16
|
+
release/release-0.4.7
|
17
|
+
release/release-0.4.6
|
18
|
+
release/release-0.4.5
|
19
|
+
release/release-0.4.4
|
20
|
+
release/release-0.4.3
|
21
|
+
release/release-0.4.2
|
22
|
+
release/release-0.4.1
|
23
|
+
release/release-0.4.0
|
24
|
+
release/release-0.3.2
|
25
|
+
release/release-0.3.1
|
26
|
+
release/release-0.3.0
|
27
|
+
release/release-0.2.1
|
28
|
+
release/release-0.2.0
|
29
|
+
release/release-0.1.0
|
29
30
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Release 0.5.5
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Plugin API
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Added ``spi.Exec.getModelManager()`` method.
|
8
|
+
|
9
|
+
General Changes
|
10
|
+
------------------
|
11
|
+
|
12
|
+
* ``guess-csv`` does not add config parameters if ``type`` is already set by other guess plugins (@shun0102++)
|
13
|
+
* Fixed double double-quoting in build.gradle file generated by the plugin template generator
|
14
|
+
|
15
|
+
|
16
|
+
Release Date
|
17
|
+
------------------
|
18
|
+
2015-04-07
|
@@ -150,7 +150,7 @@ public class CsvParserPlugin
|
|
150
150
|
schema.visitColumns(new ColumnVisitor() {
|
151
151
|
public void booleanColumn(Column column)
|
152
152
|
{
|
153
|
-
String v = nextColumn(
|
153
|
+
String v = nextColumn();
|
154
154
|
if (v == null) {
|
155
155
|
pageBuilder.setNull(column);
|
156
156
|
} else {
|
@@ -160,7 +160,7 @@ public class CsvParserPlugin
|
|
160
160
|
|
161
161
|
public void longColumn(Column column)
|
162
162
|
{
|
163
|
-
String v = nextColumn(
|
163
|
+
String v = nextColumn();
|
164
164
|
if (v == null) {
|
165
165
|
pageBuilder.setNull(column);
|
166
166
|
} else {
|
@@ -175,7 +175,7 @@ public class CsvParserPlugin
|
|
175
175
|
|
176
176
|
public void doubleColumn(Column column)
|
177
177
|
{
|
178
|
-
String v = nextColumn(
|
178
|
+
String v = nextColumn();
|
179
179
|
if (v == null) {
|
180
180
|
pageBuilder.setNull(column);
|
181
181
|
} else {
|
@@ -190,7 +190,7 @@ public class CsvParserPlugin
|
|
190
190
|
|
191
191
|
public void stringColumn(Column column)
|
192
192
|
{
|
193
|
-
String v = nextColumn(
|
193
|
+
String v = nextColumn();
|
194
194
|
if (v == null) {
|
195
195
|
pageBuilder.setNull(column);
|
196
196
|
} else {
|
@@ -200,7 +200,7 @@ public class CsvParserPlugin
|
|
200
200
|
|
201
201
|
public void timestampColumn(Column column)
|
202
202
|
{
|
203
|
-
String v = nextColumn(
|
203
|
+
String v = nextColumn();
|
204
204
|
if (v == null) {
|
205
205
|
pageBuilder.setNull(column);
|
206
206
|
} else {
|
@@ -212,6 +212,24 @@ public class CsvParserPlugin
|
|
212
212
|
}
|
213
213
|
}
|
214
214
|
}
|
215
|
+
|
216
|
+
private String nextColumn()
|
217
|
+
{
|
218
|
+
if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
|
219
|
+
return null;
|
220
|
+
}
|
221
|
+
String v = tokenizer.nextColumn();
|
222
|
+
if (!v.isEmpty()) {
|
223
|
+
if (v.equals(nullStringOrNull)) {
|
224
|
+
return null;
|
225
|
+
}
|
226
|
+
return v;
|
227
|
+
} else if (tokenizer.wasQuotedColumn()) {
|
228
|
+
return "";
|
229
|
+
} else {
|
230
|
+
return null;
|
231
|
+
}
|
232
|
+
}
|
215
233
|
});
|
216
234
|
pageBuilder.addRecord();
|
217
235
|
|
@@ -228,24 +246,6 @@ public class CsvParserPlugin
|
|
228
246
|
}
|
229
247
|
}
|
230
248
|
|
231
|
-
private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull, boolean allowOptionalColumns)
|
232
|
-
{
|
233
|
-
if(allowOptionalColumns && !tokenizer.hasNextColumn()) {
|
234
|
-
return null;
|
235
|
-
}
|
236
|
-
String v = tokenizer.nextColumn();
|
237
|
-
if (!v.isEmpty()) {
|
238
|
-
if (v.equals(nullStringOrNull)) {
|
239
|
-
return null;
|
240
|
-
}
|
241
|
-
return v;
|
242
|
-
} else if (tokenizer.wasQuotedColumn()) {
|
243
|
-
return "";
|
244
|
-
} else {
|
245
|
-
return null;
|
246
|
-
}
|
247
|
-
}
|
248
|
-
|
249
249
|
static class CsvRecordValidateException
|
250
250
|
extends RuntimeException
|
251
251
|
{
|
@@ -40,7 +40,7 @@ Gem::Specification.new do |spec|
|
|
40
40
|
spec.version = "${project.version}"
|
41
41
|
spec.authors = [<%= author.dump %>]
|
42
42
|
spec.summary = %[<%= display_name %> <%= display_category %> plugin for Embulk]
|
43
|
-
spec.description = %[<%=
|
43
|
+
spec.description = %[<%= description %>]
|
44
44
|
spec.email = [<%= email.dump %>]
|
45
45
|
spec.licenses = ["MIT"]
|
46
46
|
# TODO set this: spec.homepage = <%= "https://github.com/#{email[/([^@]*)/]}/#{project_name}".dump %>
|
data/lib/embulk/guess/csv.rb
CHANGED
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -249,6 +249,9 @@ files:
|
|
249
249
|
- embulk-docs/plugins/index.html.erb
|
250
250
|
- embulk-docs/plugins/plugins.css
|
251
251
|
- embulk-docs/push-gh-pages.sh
|
252
|
+
- embulk-docs/src/_static/embulk-architecture.png
|
253
|
+
- embulk-docs/src/_static/embulk-logo.png
|
254
|
+
- embulk-docs/src/built-in.rst
|
252
255
|
- embulk-docs/src/conf.py
|
253
256
|
- embulk-docs/src/index.rst
|
254
257
|
- embulk-docs/src/recipe.rst
|
@@ -276,6 +279,7 @@ files:
|
|
276
279
|
- embulk-docs/src/release/release-0.5.2.rst
|
277
280
|
- embulk-docs/src/release/release-0.5.3.rst
|
278
281
|
- embulk-docs/src/release/release-0.5.4.rst
|
282
|
+
- embulk-docs/src/release/release-0.5.5.rst
|
279
283
|
- embulk-standards/build.gradle
|
280
284
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
281
285
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -380,8 +384,8 @@ files:
|
|
380
384
|
- classpath/bval-jsr303-0.5.jar
|
381
385
|
- classpath/commons-beanutils-core-1.8.3.jar
|
382
386
|
- classpath/commons-lang3-3.1.jar
|
383
|
-
- classpath/embulk-core-0.5.
|
384
|
-
- classpath/embulk-standards-0.5.
|
387
|
+
- classpath/embulk-core-0.5.5.jar
|
388
|
+
- classpath/embulk-standards-0.5.5.jar
|
385
389
|
- classpath/guava-18.0.jar
|
386
390
|
- classpath/guice-3.0.jar
|
387
391
|
- classpath/guice-multibindings-3.0.jar
|