embulk 0.5.4 → 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e0be98e5dbe81e40c6562142d2cdf44cc4f8cf34
4
- data.tar.gz: f907d431af0add753761547f09dec47113b3b236
3
+ metadata.gz: f78ef9df69c9d408b6a7df450706a54cb646c596
4
+ data.tar.gz: 79a55069daa0ec3f952fd3e6d66f9536b9b6eac5
5
5
  SHA512:
6
- metadata.gz: 2a1690e94a7622db588cc6511f1dec583320192960ecdf9851c74ac1f5feaf7bda478b8e6dbf16a7b58f870e38dd464ee949cc20c6f5cadb3473b04bf3cf23db
7
- data.tar.gz: 0eb2a31661f7772cadee71642781d2ef30cfc5015ce5bf549a3cc36310fbe29283f80d272b91ee788a4513c52ae8c09ecad8e4dce28e459ad11c5370f68a6e22
6
+ metadata.gz: 51c83cefab8712f70e350ccc446ecab9c11779447c94ddc5c7b4c10597e1c41fe9d6853e7c7b3ff0f7d132082a604901cf7344b0382711c42d101b20f210a359
7
+ data.tar.gz: 17d4ab5552e98c6bc0fa3368cbf22ab87814e035dbae5d8b6e6c91aa6778f9d89ab2a666935b382e0dd0e7252a68460c10ea54ac9d18bf4664dfc4b00d5cf84e
data/README.md CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
30
30
  Following 4 commands install embulk to your home directory:
31
31
 
32
32
  ```
33
- curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar
33
+ curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar
34
34
  chmod +x ~/.embulk/bin/embulk
35
35
  echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
36
36
  source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
45
45
  You can assume the jar file is a .bat file.
46
46
 
47
47
  ```
48
- PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -OutFile embulk.bat}"
48
+ PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -OutFile embulk.bat}"
49
49
  ```
50
50
 
51
51
  Next step: [Trying examples](#trying-examples)
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
12
12
 
13
13
  allprojects {
14
14
  group = 'org.embulk'
15
- version = '0.5.4'
15
+ version = '0.5.5'
16
16
 
17
17
  apply plugin: 'java'
18
18
  apply plugin: 'maven-publish'
@@ -2,6 +2,7 @@ package org.embulk.spi;
2
2
 
3
3
  import org.slf4j.Logger;
4
4
  import org.embulk.config.Task;
5
+ import org.embulk.config.ModelManager;
5
6
  import org.embulk.config.CommitReport;
6
7
  import org.embulk.config.ConfigDiff;
7
8
  import org.embulk.config.ConfigSource;
@@ -49,6 +50,11 @@ public class Exec
49
50
  return session().getBufferAllocator();
50
51
  }
51
52
 
53
+ public static ModelManager getModelManager()
54
+ {
55
+ return session().getModelManager();
56
+ }
57
+
52
58
  public static <T> T newPlugin(Class<T> iface, PluginType type)
53
59
  {
54
60
  return session().newPlugin(iface, type);
@@ -119,6 +119,11 @@ public class ExecSession
119
119
  return bufferAllocator;
120
120
  }
121
121
 
122
+ public ModelManager getModelManager()
123
+ {
124
+ return modelManager;
125
+ }
126
+
122
127
  public ConfigSource getExecConfig()
123
128
  {
124
129
  return execConfig;
@@ -21,7 +21,7 @@
21
21
 
22
22
  <% categories.each do |category,gems| %>
23
23
  <div class="section">
24
- <h2><%= category.upcase %></h2>
24
+ <h2 id="<%= category.gsub(/[^a-zA-Z0-9]/, '-') %>"><%= category.upcase %></h2>
25
25
  <table class="plugins">
26
26
  <thead>
27
27
  <tr>
@@ -0,0 +1,335 @@
1
+ Configuration
2
+ ==================================
3
+
4
+ .. contents::
5
+ :local:
6
+ :depth: 2
7
+
8
+ Embulk configuration file format
9
+ ------------------
10
+
11
+ Embulk uses a YAML file to define a bulk data loading. Here is an example of the file:
12
+
13
+ .. code-block:: yaml
14
+
15
+ in:
16
+ type: file
17
+ path_prefix: ./mydata/csv/
18
+ decoders:
19
+ - {type: gzip}
20
+ parser:
21
+ charset: UTF-8
22
+ newline: CRLF
23
+ type: csv
24
+ delimiter: ','
25
+ quote: '"'
26
+ escape: ''
27
+ null_string: 'NULL'
28
+ skip_header_lines: 1
29
+ columns:
30
+ - {name: id, type: long}
31
+ - {name: account, type: long}
32
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
33
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
34
+ - {name: comment, type: string}
35
+ filters:
36
+ - type: speedometer
37
+ speed_limit: 250000
38
+ out:
39
+ type: stdout
40
+
41
+ A configuration file consists of following sections:
42
+
43
+ * **in:** Input plugin options. An input plugin is either record-based (`MySQL <https://github.com/embulk/embulk-input-jdbc>`_, `DynamoDB <https://github.com/lulichn/embulk-input-dynamodb>`_, etc) or file-based (`S3 <https://github.com/embulk/embulk-input-s3>`_, `HTTP <https://github.com/takumakanari/embulk-input-http>`_, etc).
44
+
45
+ * **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
46
+
47
+ * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
48
+
49
+ * **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
50
+
51
+ * **formatter:** If the output is file-based, fromatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
52
+
53
+ * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
54
+
55
+ * **filters:** Filter plugins options (optional).
56
+
57
+ * **exec:** Executor plugin options. An executor plugin control parallel processing (such as built-in thread executor, `Hadoop MapReduce executor <https://github.com/embulk/embulk-executor-mapreduce>`_)
58
+
59
+ In many cases, what you need to write is **in:**, **out**: and **formatter** sections only because ``guess`` command guesses **parser** and **decoder** options for you. See also the `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
60
+
61
+
62
+ Local file input plugin
63
+ ------------------
64
+
65
+ The ``file`` input plugin reads files from local file system.
66
+
67
+ Options
68
+ ~~~~~~~~~~~~~~~~~~
69
+
70
+ +----------------+----------+------------------------------------------------+-----------+
71
+ | name | type | description | required? |
72
+ +================+==========+================================================+===========+
73
+ | path\_prefix | string | Path prefix of input files | required |
74
+ +----------------+----------+------------------------------------------------+-----------+
75
+ | parsers | hash | Parsers configurations (see below) | required |
76
+ +----------------+----------+------------------------------------------------+-----------+
77
+ | decoders | array | Decoder configuration (see below) | |
78
+ +----------------+----------+------------------------------------------------+-----------+
79
+ | last\_path | string | Name of last read file in previous operation | |
80
+ +----------------+----------+------------------------------------------------+-----------+
81
+
82
+ The ``path_prefix`` option is required. If you have files as following, you may set ``path_prefix: /path/to/files/sample_``:
83
+
84
+ ::
85
+
86
+ .
87
+ `-- path
88
+ `-- to
89
+ `-- files
90
+ |-- sample_01.csv -> read
91
+ |-- sample_02.csv -> read
92
+ |-- sample_03.csv -> read
93
+ |-- sample_04.csv -> read
94
+
95
+ The ``last_path`` option is used to skip files older than or same with the file in dictionary order.
96
+ For example, if you set ``last_path: /path/to/files/sample_02.csv``, Embulk reads following files:
97
+
98
+ ::
99
+
100
+ .
101
+ `-- path
102
+ `-- to
103
+ `-- files
104
+ |-- sample_01.csv -> skip
105
+ |-- sample_02.csv -> skip
106
+ |-- sample_03.csv -> read
107
+ |-- sample_04.csv -> read
108
+
109
+ Example
110
+ ~~~~~~~~~~~~~~~~~~
111
+
112
+ .. code-block:: yaml
113
+
114
+ in:
115
+ type: file
116
+ path_prefix: /path/to/files/sample_
117
+ last_path: /path/to/files/sample_02.csv
118
+ parser:
119
+ ...
120
+
121
+ In most of cases, you'll use guess to configure the parsers and decoders. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
122
+
123
+ CSV parser plugin
124
+ ------------------
125
+
126
+ The ``csv`` parser plugin parses CSV and TSV files.
127
+
128
+ Options
129
+ ~~~~~~~~~~~~~~~~~~
130
+
131
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
132
+ | name | type | description | required? |
133
+ +============================+==========+================================================================================================================+========================+
134
+ | delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
135
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
136
+ | quote | string | The character surrounding a quoted value | ``\"`` by default |
137
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
138
+ | escape | string | Escape character to escape a special character | ``\\`` by default |
139
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
140
+ | skip\_header\_lines | integer | Skip this number of lines first. Set 1 if the file has header line. | ``0`` by default |
141
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
142
+ | null\_string | string | If a value is this string, converts it to NULL. For example, set ``\N`` for CSV files created by mysqldump | |
143
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
144
+ | trim\_if\_not\_quoted | boolean | If true, remove spaces of a value if the value is not surrounded by the quote character | ``false`` by default |
145
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
146
+ | allow\_optional\_columns | boolean | If true, set null to insufficient columns. Otherwise, skip the row in case of insufficient number of columns | ``false`` by default |
147
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
148
+ | max\_quoted\_size\_limit | integer | Maximum number of bytes of a quoted value. If a value exceeds the limit, the row will be skipped | ``131072`` by default |
149
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
150
+ | default\_timezone | string | Time zone of timestamp columns if the value itself doesn't include time zone description (eg. Asia/Tokyo) | ``UTC`` by default |
151
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
152
+ | newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
153
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
154
+ | charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
155
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
156
+ | columns | hash | Columns (see below) | required |
157
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
158
+
159
+ The ``columns`` option declares the list of columns. This CSV parser plugin ignores the header line.
160
+
161
+ +----------+-------------------------------------------------+
162
+ | name | description |
163
+ +==========+=================================================+
164
+ | name | Name of the column |
165
+ +----------+-------------------------------------------------+
166
+ | type | Type of the column (see below) |
167
+ +----------+-------------------------------------------------+
168
+ | format | Format of the timestamp if type is timestamp |
169
+ +----------+-------------------------------------------------+
170
+
171
+ List of types:
172
+
173
+ +-------------+----------------------------------------------+
174
+ | name | description |
175
+ +=============+==============================================+
176
+ | boolean | true or false |
177
+ +-------------+----------------------------------------------+
178
+ | long | 64-bit signed integers |
179
+ +-------------+----------------------------------------------+
180
+ | timestamp | Date and time with nano-seconds precision |
181
+ +-------------+----------------------------------------------+
182
+ | double | 64-bit floating point numbers |
183
+ +-------------+----------------------------------------------+
184
+ | string | Strings |
185
+ +-------------+----------------------------------------------+
186
+
187
+ You can use ``guess`` to automatically generate the column settings. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
188
+
189
+ Example
190
+ ~~~~~~~~~~~~~~~~~~
191
+
192
+ .. code-block:: yaml
193
+
194
+ in:
195
+ ...
196
+ parser:
197
+ type: csv
198
+ charset: UTF-8
199
+ newline: CRLF
200
+ delimiter: "\t"
201
+ quote: '"'
202
+ escape: ''
203
+ null_string: 'NULL'
204
+ skip_header_lines: 1
205
+ columns:
206
+ - {name: id, type: long}
207
+ - {name: account, type: long}
208
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
209
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
210
+ - {name: comment, type: string}
211
+
212
+ Gzip decoder plugin
213
+ ------------------
214
+
215
+ The ``gzip`` decoder plugin decompresses gzip files before input plugins read them.
216
+
217
+ Options
218
+ ~~~~~~~~~~~~~~~~~~
219
+
220
+ This plugin doesn't have any options.
221
+
222
+ Example
223
+ ~~~~~~~~~~~~~~~~~~
224
+
225
+ .. code-block:: yaml
226
+
227
+ in:
228
+ ...
229
+ decoders:
230
+ - {type: gzip}
231
+
232
+
233
+ File output plugin
234
+ ------------------
235
+
236
+ The ``file`` output plugin writes records to local file system.
237
+
238
+ Options
239
+ ~~~~~~~~~~~~~~~~~~
240
+
241
+ +--------------------+----------+---------------------------------------------------+----------------------------+
242
+ | name | type | description | required? |
243
+ +====================+==========+===================================================+============================+
244
+ | path\_prefix | string | Path prefix of the output files | required |
245
+ +--------------------+----------+---------------------------------------------------+----------------------------+
246
+ | sequence\_format | string | Format of the sequence number of the output files | ``.%03d.%02d`` by default |
247
+ +--------------------+----------+---------------------------------------------------+----------------------------+
248
+ | file\_ext | string | Path suffix of the output files | required |
249
+ +--------------------+----------+---------------------------------------------------+----------------------------+
250
+
251
+ For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: ".%03d.%02d"``, and ``file_ext: .csv``, name of the output files will be as following:
252
+
253
+ ::
254
+
255
+ .
256
+ `-- path
257
+ `-- to
258
+ `-- output
259
+ |-- sample.01.000.csv
260
+ |-- sample.02.000.csv
261
+ |-- sample.03.000.csv
262
+ |-- sample.04.000.csv
263
+
264
+ ``sequence_format`` formats task index and sequence number in a task.
265
+
266
+ Example
267
+ ~~~~~~~~~~~~~~~~~~
268
+
269
+ .. code-block:: yaml
270
+
271
+ out:
272
+ type: file
273
+ path_prefix: /path/to/output/sample
274
+ file_ext: .csv
275
+ formatter:
276
+ ...
277
+
278
+ CSV formatter plugin
279
+ ------------------
280
+
281
+ The ``csv`` formatter plugin formats records using CSV or TSV format.
282
+
283
+ Options
284
+ ~~~~~~~~~~~~~~~~~~
285
+
286
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
287
+ | name | type | description | required? |
288
+ +================+==========+=======================================================================================================+========================+
289
+ | delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
290
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
291
+ | header\_line | boolean | If true, write the header line with column name at the first line | |
292
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
293
+ | newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
294
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
295
+ | charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
296
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
297
+
298
+ Example
299
+ ~~~~~~~~~~~~~~~~~~
300
+
301
+ .. code-block:: yaml
302
+
303
+ out:
304
+ ...
305
+ formatter:
306
+ - type: csv
307
+ delimiter: "\t"
308
+ newline: LF
309
+ charset: UTF-8
310
+
311
+ Gzip encoder plugin
312
+ ------------------
313
+
314
+ The ``gzip`` encoder plugin compresses output files using gzip.
315
+
316
+ Options
317
+ ~~~~~~~~~~~~~~~~~~
318
+
319
+ +---------+----------+----------------------------------------------------------------------+--------------------+
320
+ | name | type | description | required? |
321
+ +=========+==========+======================================================================+====================+
322
+ | level | integer | Compression level. From 0 (no compression) to 9 (best compression). | ``6`` by default |
323
+ +---------+----------+----------------------------------------------------------------------+--------------------+
324
+
325
+ Example
326
+ ~~~~~~~~~~~~~~~~~~
327
+
328
+ .. code-block:: yaml
329
+
330
+ out:
331
+ ...
332
+ encoders:
333
+ - type: gzip
334
+ level: 1
335
+
@@ -3,26 +3,71 @@
3
3
  You can adapt this file completely to your liking, but it should at least
4
4
  contain the root `toctree` directive.
5
5
 
6
- Embulk documentation
6
+ Embulk
7
7
  ==================================
8
8
 
9
- https://github.com/embulk/embulk
9
+ .. image:: _static/embulk-logo.png
10
+ :width: 512px
11
+ :target: https://github.com/embulk/embulk
12
+
13
+ What's Embulk?
14
+ ------------------
15
+
16
+ Embulk is a open-source bulk data loader that helps data transfer between various databases, storages, file formats, and cloud services.
17
+
18
+ Embulk supports:
19
+
20
+ * Automatic guessing of input file formats
21
+ * Parallel & distributed execution to deal with big data sets
22
+ * Transaction control to guarantee All-or-Nothing
23
+ * Resuming
24
+ * Plugins released on RubyGems.org
25
+
26
+ You can define a bulk data loading using combination of input and output plugins:
27
+
28
+ .. image:: _static/embulk-architecture.png
29
+ :width: 640px
30
+ :target: https://github.com/embulk/embulk#quick-start
31
+
32
+ For example, `this tutorial <recipe/scheduled-csv-load-to-elasticsearch-kibana4.html>`_ describes how to use **file** input plugin with **csv** parser plugin and **gzip** decoder plugin to read CSV files, and **elasticsearch** output plugin to load the records to Elasticsearch.
33
+
34
+ Documents
35
+ ------------------
10
36
 
11
37
  * `Quick Start <https://github.com/embulk/embulk#quick-start>`_
12
38
 
13
- * `Linux and Mac OS X <https://github.com/embulk/embulk#linux--mac--bsd>`_
39
+ * `Linux <https://github.com/embulk/embulk#linux--mac--bsd>`_
40
+
41
+ * `Mac OS X <https://github.com/embulk/embulk#linux--mac--bsd>`_
14
42
 
15
43
  * `Windows <https://github.com/embulk/embulk#windows>`_
16
44
 
45
+ .. toctree::
46
+ :maxdepth: 2
47
+
48
+ recipe
49
+
17
50
  * `List of Plugins by Category <http://www.embulk.org/plugins/>`_
18
51
 
52
+ * `Input plugins <http://www.embulk.org/plugins/#input>`_
53
+
54
+ * `Output plugins <http://www.embulk.org/plugins/#output>`_
55
+
56
+ * `File parser plugins <http://www.embulk.org/plugins/#file-parser>`_
57
+
58
+ * `File formatter plugins <http://www.embulk.org/plugins/#file-formatter>`_
59
+
60
+ * `Filter plugins <http://www.embulk.org/plugins/#filter>`_
61
+
19
62
  .. toctree::
20
63
  :maxdepth: 2
21
64
 
22
- recipe
65
+ built-in
23
66
  release
24
67
 
25
68
  * `JavaDoc <javadoc/index.html>`_
26
69
 
27
70
  * `RDoc <rdoc/_index.html>`_
28
71
 
72
+ * `Github <https://github.com/embulk/embulk>`_
73
+
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
56
56
 
57
57
  .. code-block:: console
58
58
 
59
- $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -O /usr/local/bin/embulk
59
+ $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -O /usr/local/bin/embulk
60
60
  $ sudo chmod +x /usr/local/bin/embulk
61
61
 
62
62
  Step 2. Install Elasticsearch plugin
@@ -112,7 +112,7 @@ The generated config-complete.yml file should include complete information as fo
112
112
  quote: '"'
113
113
  escape: ''
114
114
  null_string: 'NULL'
115
- header_line: true
115
+ skip_header_lines: 1
116
116
  columns:
117
117
  - {name: id, type: long}
118
118
  - {name: account, type: long}
@@ -4,26 +4,27 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
- release/release-0.1.0
8
- release/release-0.2.0
9
- release/release-0.2.1
10
- release/release-0.3.0
11
- release/release-0.3.1
12
- release/release-0.3.2
13
- release/release-0.4.0
14
- release/release-0.4.1
15
- release/release-0.4.2
16
- release/release-0.4.3
17
- release/release-0.4.4
18
- release/release-0.4.5
19
- release/release-0.4.6
20
- release/release-0.4.7
21
- release/release-0.4.8
22
- release/release-0.4.9
23
- release/release-0.4.10
24
- release/release-0.5.0
25
- release/release-0.5.1
26
- release/release-0.5.2
27
- release/release-0.5.3
7
+ release/release-0.5.5
28
8
  release/release-0.5.4
9
+ release/release-0.5.3
10
+ release/release-0.5.2
11
+ release/release-0.5.1
12
+ release/release-0.5.0
13
+ release/release-0.4.10
14
+ release/release-0.4.9
15
+ release/release-0.4.8
16
+ release/release-0.4.7
17
+ release/release-0.4.6
18
+ release/release-0.4.5
19
+ release/release-0.4.4
20
+ release/release-0.4.3
21
+ release/release-0.4.2
22
+ release/release-0.4.1
23
+ release/release-0.4.0
24
+ release/release-0.3.2
25
+ release/release-0.3.1
26
+ release/release-0.3.0
27
+ release/release-0.2.1
28
+ release/release-0.2.0
29
+ release/release-0.1.0
29
30
 
@@ -0,0 +1,18 @@
1
+ Release 0.5.5
2
+ ==================================
3
+
4
+ Plugin API
5
+ ------------------
6
+
7
+ * Added ``spi.Exec.getModelManager()`` method.
8
+
9
+ General Changes
10
+ ------------------
11
+
12
+ * ``guess-csv`` does not add config parameters if ``type`` is already set by other guess plugins (@shun0102++)
13
+ * Fixed double double-quoting in build.gradle file generated by the plugin template generator
14
+
15
+
16
+ Release Date
17
+ ------------------
18
+ 2015-04-07
@@ -150,7 +150,7 @@ public class CsvParserPlugin
150
150
  schema.visitColumns(new ColumnVisitor() {
151
151
  public void booleanColumn(Column column)
152
152
  {
153
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
153
+ String v = nextColumn();
154
154
  if (v == null) {
155
155
  pageBuilder.setNull(column);
156
156
  } else {
@@ -160,7 +160,7 @@ public class CsvParserPlugin
160
160
 
161
161
  public void longColumn(Column column)
162
162
  {
163
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
163
+ String v = nextColumn();
164
164
  if (v == null) {
165
165
  pageBuilder.setNull(column);
166
166
  } else {
@@ -175,7 +175,7 @@ public class CsvParserPlugin
175
175
 
176
176
  public void doubleColumn(Column column)
177
177
  {
178
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
178
+ String v = nextColumn();
179
179
  if (v == null) {
180
180
  pageBuilder.setNull(column);
181
181
  } else {
@@ -190,7 +190,7 @@ public class CsvParserPlugin
190
190
 
191
191
  public void stringColumn(Column column)
192
192
  {
193
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
193
+ String v = nextColumn();
194
194
  if (v == null) {
195
195
  pageBuilder.setNull(column);
196
196
  } else {
@@ -200,7 +200,7 @@ public class CsvParserPlugin
200
200
 
201
201
  public void timestampColumn(Column column)
202
202
  {
203
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
203
+ String v = nextColumn();
204
204
  if (v == null) {
205
205
  pageBuilder.setNull(column);
206
206
  } else {
@@ -212,6 +212,24 @@ public class CsvParserPlugin
212
212
  }
213
213
  }
214
214
  }
215
+
216
+ private String nextColumn()
217
+ {
218
+ if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
219
+ return null;
220
+ }
221
+ String v = tokenizer.nextColumn();
222
+ if (!v.isEmpty()) {
223
+ if (v.equals(nullStringOrNull)) {
224
+ return null;
225
+ }
226
+ return v;
227
+ } else if (tokenizer.wasQuotedColumn()) {
228
+ return "";
229
+ } else {
230
+ return null;
231
+ }
232
+ }
215
233
  });
216
234
  pageBuilder.addRecord();
217
235
 
@@ -228,24 +246,6 @@ public class CsvParserPlugin
228
246
  }
229
247
  }
230
248
 
231
- private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull, boolean allowOptionalColumns)
232
- {
233
- if(allowOptionalColumns && !tokenizer.hasNextColumn()) {
234
- return null;
235
- }
236
- String v = tokenizer.nextColumn();
237
- if (!v.isEmpty()) {
238
- if (v.equals(nullStringOrNull)) {
239
- return null;
240
- }
241
- return v;
242
- } else if (tokenizer.wasQuotedColumn()) {
243
- return "";
244
- } else {
245
- return null;
246
- }
247
- }
248
-
249
249
  static class CsvRecordValidateException
250
250
  extends RuntimeException
251
251
  {
@@ -40,7 +40,7 @@ Gem::Specification.new do |spec|
40
40
  spec.version = "${project.version}"
41
41
  spec.authors = [<%= author.dump %>]
42
42
  spec.summary = %[<%= display_name %> <%= display_category %> plugin for Embulk]
43
- spec.description = %[<%= "#{description}".dump %>]
43
+ spec.description = %[<%= description %>]
44
44
  spec.email = [<%= email.dump %>]
45
45
  spec.licenses = ["MIT"]
46
46
  # TODO set this: spec.homepage = <%= "https://github.com/#{email[/([^@]*)/]}/#{project_name}".dump %>
@@ -28,6 +28,8 @@ module Embulk
28
28
  NO_SKIP_DETECT_LINES = 10
29
29
 
30
30
  def guess_lines(config, sample_lines)
31
+ return {} unless config.fetch("type", "csv") == "csv"
32
+
31
33
  delim = guess_delimiter(sample_lines)
32
34
  unless delim
33
35
  # not CSV file
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.5.4'
2
+ VERSION = '0.5.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-24 00:00:00.000000000 Z
11
+ date: 2015-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -249,6 +249,9 @@ files:
249
249
  - embulk-docs/plugins/index.html.erb
250
250
  - embulk-docs/plugins/plugins.css
251
251
  - embulk-docs/push-gh-pages.sh
252
+ - embulk-docs/src/_static/embulk-architecture.png
253
+ - embulk-docs/src/_static/embulk-logo.png
254
+ - embulk-docs/src/built-in.rst
252
255
  - embulk-docs/src/conf.py
253
256
  - embulk-docs/src/index.rst
254
257
  - embulk-docs/src/recipe.rst
@@ -276,6 +279,7 @@ files:
276
279
  - embulk-docs/src/release/release-0.5.2.rst
277
280
  - embulk-docs/src/release/release-0.5.3.rst
278
281
  - embulk-docs/src/release/release-0.5.4.rst
282
+ - embulk-docs/src/release/release-0.5.5.rst
279
283
  - embulk-standards/build.gradle
280
284
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
281
285
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -380,8 +384,8 @@ files:
380
384
  - classpath/bval-jsr303-0.5.jar
381
385
  - classpath/commons-beanutils-core-1.8.3.jar
382
386
  - classpath/commons-lang3-3.1.jar
383
- - classpath/embulk-core-0.5.4.jar
384
- - classpath/embulk-standards-0.5.4.jar
387
+ - classpath/embulk-core-0.5.5.jar
388
+ - classpath/embulk-standards-0.5.5.jar
385
389
  - classpath/guava-18.0.jar
386
390
  - classpath/guice-3.0.jar
387
391
  - classpath/guice-multibindings-3.0.jar