embulk 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e0be98e5dbe81e40c6562142d2cdf44cc4f8cf34
4
- data.tar.gz: f907d431af0add753761547f09dec47113b3b236
3
+ metadata.gz: f78ef9df69c9d408b6a7df450706a54cb646c596
4
+ data.tar.gz: 79a55069daa0ec3f952fd3e6d66f9536b9b6eac5
5
5
  SHA512:
6
- metadata.gz: 2a1690e94a7622db588cc6511f1dec583320192960ecdf9851c74ac1f5feaf7bda478b8e6dbf16a7b58f870e38dd464ee949cc20c6f5cadb3473b04bf3cf23db
7
- data.tar.gz: 0eb2a31661f7772cadee71642781d2ef30cfc5015ce5bf549a3cc36310fbe29283f80d272b91ee788a4513c52ae8c09ecad8e4dce28e459ad11c5370f68a6e22
6
+ metadata.gz: 51c83cefab8712f70e350ccc446ecab9c11779447c94ddc5c7b4c10597e1c41fe9d6853e7c7b3ff0f7d132082a604901cf7344b0382711c42d101b20f210a359
7
+ data.tar.gz: 17d4ab5552e98c6bc0fa3368cbf22ab87814e035dbae5d8b6e6c91aa6778f9d89ab2a666935b382e0dd0e7252a68460c10ea54ac9d18bf4664dfc4b00d5cf84e
data/README.md CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
30
30
  Following 4 commands install embulk to your home directory:
31
31
 
32
32
  ```
33
- curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar
33
+ curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar
34
34
  chmod +x ~/.embulk/bin/embulk
35
35
  echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
36
36
  source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
45
45
  You can assume the jar file is a .bat file.
46
46
 
47
47
  ```
48
- PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -OutFile embulk.bat}"
48
+ PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -OutFile embulk.bat}"
49
49
  ```
50
50
 
51
51
  Next step: [Trying examples](#trying-examples)
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
12
12
 
13
13
  allprojects {
14
14
  group = 'org.embulk'
15
- version = '0.5.4'
15
+ version = '0.5.5'
16
16
 
17
17
  apply plugin: 'java'
18
18
  apply plugin: 'maven-publish'
@@ -2,6 +2,7 @@ package org.embulk.spi;
2
2
 
3
3
  import org.slf4j.Logger;
4
4
  import org.embulk.config.Task;
5
+ import org.embulk.config.ModelManager;
5
6
  import org.embulk.config.CommitReport;
6
7
  import org.embulk.config.ConfigDiff;
7
8
  import org.embulk.config.ConfigSource;
@@ -49,6 +50,11 @@ public class Exec
49
50
  return session().getBufferAllocator();
50
51
  }
51
52
 
53
+ public static ModelManager getModelManager()
54
+ {
55
+ return session().getModelManager();
56
+ }
57
+
52
58
  public static <T> T newPlugin(Class<T> iface, PluginType type)
53
59
  {
54
60
  return session().newPlugin(iface, type);
@@ -119,6 +119,11 @@ public class ExecSession
119
119
  return bufferAllocator;
120
120
  }
121
121
 
122
+ public ModelManager getModelManager()
123
+ {
124
+ return modelManager;
125
+ }
126
+
122
127
  public ConfigSource getExecConfig()
123
128
  {
124
129
  return execConfig;
@@ -21,7 +21,7 @@
21
21
 
22
22
  <% categories.each do |category,gems| %>
23
23
  <div class="section">
24
- <h2><%= category.upcase %></h2>
24
+ <h2 id="<%= category.gsub(/[^a-zA-Z0-9]/, '-') %>"><%= category.upcase %></h2>
25
25
  <table class="plugins">
26
26
  <thead>
27
27
  <tr>
@@ -0,0 +1,335 @@
1
+ Configuration
2
+ ==================================
3
+
4
+ .. contents::
5
+ :local:
6
+ :depth: 2
7
+
8
+ Embulk configuration file format
9
+ ------------------
10
+
11
+ Embulk uses a YAML file to define a bulk data loading. Here is an example of the file:
12
+
13
+ .. code-block:: yaml
14
+
15
+ in:
16
+ type: file
17
+ path_prefix: ./mydata/csv/
18
+ decoders:
19
+ - {type: gzip}
20
+ parser:
21
+ charset: UTF-8
22
+ newline: CRLF
23
+ type: csv
24
+ delimiter: ','
25
+ quote: '"'
26
+ escape: ''
27
+ null_string: 'NULL'
28
+ skip_header_lines: 1
29
+ columns:
30
+ - {name: id, type: long}
31
+ - {name: account, type: long}
32
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
33
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
34
+ - {name: comment, type: string}
35
+ filters:
36
+ - type: speedometer
37
+ speed_limit: 250000
38
+ out:
39
+ type: stdout
40
+
41
+ A configuration file consists of following sections:
42
+
43
+ * **in:** Input plugin options. An input plugin is either record-based (`MySQL <https://github.com/embulk/embulk-input-jdbc>`_, `DynamoDB <https://github.com/lulichn/embulk-input-dynamodb>`_, etc) or file-based (`S3 <https://github.com/embulk/embulk-input-s3>`_, `HTTP <https://github.com/takumakanari/embulk-input-http>`_, etc).
44
+
45
+ * **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
46
+
47
+ * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
48
+
49
+ * **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
50
+
51
+ * **formatter:** If the output is file-based, fromatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
52
+
53
+ * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
54
+
55
+ * **filters:** Filter plugins options (optional).
56
+
57
+ * **exec:** Executor plugin options. An executor plugin control parallel processing (such as built-in thread executor, `Hadoop MapReduce executor <https://github.com/embulk/embulk-executor-mapreduce>`_)
58
+
59
+ In many cases, what you need to write is **in:**, **out**: and **formatter** sections only because ``guess`` command guesses **parser** and **decoder** options for you. See also the `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
60
+
61
+
62
+ Local file input plugin
63
+ ------------------
64
+
65
+ The ``file`` input plugin reads files from local file system.
66
+
67
+ Options
68
+ ~~~~~~~~~~~~~~~~~~
69
+
70
+ +----------------+----------+------------------------------------------------+-----------+
71
+ | name | type | description | required? |
72
+ +================+==========+================================================+===========+
73
+ | path\_prefix | string | Path prefix of input files | required |
74
+ +----------------+----------+------------------------------------------------+-----------+
75
+ | parsers | hash | Parsers configurations (see below) | required |
76
+ +----------------+----------+------------------------------------------------+-----------+
77
+ | decoders | array | Decoder configuration (see below) | |
78
+ +----------------+----------+------------------------------------------------+-----------+
79
+ | last\_path | string | Name of last read file in previous operation | |
80
+ +----------------+----------+------------------------------------------------+-----------+
81
+
82
+ The ``path_prefix`` option is required. If you have files as following, you may set ``path_prefix: /path/to/files/sample_``:
83
+
84
+ ::
85
+
86
+ .
87
+ `-- path
88
+ `-- to
89
+ `-- files
90
+ |-- sample_01.csv -> read
91
+ |-- sample_02.csv -> read
92
+ |-- sample_03.csv -> read
93
+ |-- sample_04.csv -> read
94
+
95
+ The ``last_path`` option is used to skip files older than or same with the file in dictionary order.
96
+ For example, if you set ``last_path: /path/to/files/sample_02.csv``, Embulk reads following files:
97
+
98
+ ::
99
+
100
+ .
101
+ `-- path
102
+ `-- to
103
+ `-- files
104
+ |-- sample_01.csv -> skip
105
+ |-- sample_02.csv -> skip
106
+ |-- sample_03.csv -> read
107
+ |-- sample_04.csv -> read
108
+
109
+ Example
110
+ ~~~~~~~~~~~~~~~~~~
111
+
112
+ .. code-block:: yaml
113
+
114
+ in:
115
+ type: file
116
+ path_prefix: /path/to/files/sample_
117
+ last_path: /path/to/files/sample_02.csv
118
+ parser:
119
+ ...
120
+
121
+ In most of cases, you'll use guess to configure the parsers and decoders. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
122
+
123
+ CSV parser plugin
124
+ ------------------
125
+
126
+ The ``csv`` parser plugin parses CSV and TSV files.
127
+
128
+ Options
129
+ ~~~~~~~~~~~~~~~~~~
130
+
131
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
132
+ | name | type | description | required? |
133
+ +============================+==========+================================================================================================================+========================+
134
+ | delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
135
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
136
+ | quote | string | The character surrounding a quoted value | ``\"`` by default |
137
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
138
+ | escape | string | Escape character to escape a special character | ``\\`` by default |
139
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
140
+ | skip\_header\_lines | integer | Skip this number of lines first. Set 1 if the file has header line. | ``0`` by default |
141
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
142
+ | null\_string | string | If a value is this string, converts it to NULL. For example, set ``\N`` for CSV files created by mysqldump | |
143
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
144
+ | trim\_if\_not\_quoted | boolean | If true, remove spaces of a value if the value is not surrounded by the quote character | ``false`` by default |
145
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
146
+ | allow\_optional\_columns | boolean | If true, set null to insufficient columns. Otherwise, skip the row in case of insufficient number of columns | ``false`` by default |
147
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
148
+ | max\_quoted\_size\_limit | integer | Maximum number of bytes of a quoted value. If a value exceeds the limit, the row will be skipped | ``131072`` by default |
149
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
150
+ | default\_timezone | string | Time zone of timestamp columns if the value itself doesn't include time zone description (eg. Asia/Tokyo) | ``UTC`` by default |
151
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
152
+ | newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
153
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
154
+ | charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
155
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
156
+ | columns | hash | Columns (see below) | required |
157
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
158
+
159
+ The ``columns`` option declares the list of columns. This CSV parser plugin ignores the header line.
160
+
161
+ +----------+-------------------------------------------------+
162
+ | name | description |
163
+ +==========+=================================================+
164
+ | name | Name of the column |
165
+ +----------+-------------------------------------------------+
166
+ | type | Type of the column (see below) |
167
+ +----------+-------------------------------------------------+
168
+ | format | Format of the timestamp if type is timestamp |
169
+ +----------+-------------------------------------------------+
170
+
171
+ List of types:
172
+
173
+ +-------------+----------------------------------------------+
174
+ | name | description |
175
+ +=============+==============================================+
176
+ | boolean | true or false |
177
+ +-------------+----------------------------------------------+
178
+ | long | 64-bit signed integers |
179
+ +-------------+----------------------------------------------+
180
+ | timestamp | Date and time with nano-seconds precision |
181
+ +-------------+----------------------------------------------+
182
+ | double | 64-bit floating point numbers |
183
+ +-------------+----------------------------------------------+
184
+ | string | Strings |
185
+ +-------------+----------------------------------------------+
186
+
187
+ You can use ``guess`` to automatically generate the column settings. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
188
+
189
+ Example
190
+ ~~~~~~~~~~~~~~~~~~
191
+
192
+ .. code-block:: yaml
193
+
194
+ in:
195
+ ...
196
+ parser:
197
+ type: csv
198
+ charset: UTF-8
199
+ newline: CRLF
200
+ delimiter: "\t"
201
+ quote: '"'
202
+ escape: ''
203
+ null_string: 'NULL'
204
+ skip_header_lines: 1
205
+ columns:
206
+ - {name: id, type: long}
207
+ - {name: account, type: long}
208
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
209
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
210
+ - {name: comment, type: string}
211
+
212
+ Gzip decoder plugin
213
+ ------------------
214
+
215
+ The ``gzip`` decoder plugin decompresses gzip files before input plugins read them.
216
+
217
+ Options
218
+ ~~~~~~~~~~~~~~~~~~
219
+
220
+ This plugin doesn't have any options.
221
+
222
+ Example
223
+ ~~~~~~~~~~~~~~~~~~
224
+
225
+ .. code-block:: yaml
226
+
227
+ in:
228
+ ...
229
+ decoders:
230
+ - {type: gzip}
231
+
232
+
233
+ File output plugin
234
+ ------------------
235
+
236
+ The ``file`` output plugin writes records to local file system.
237
+
238
+ Options
239
+ ~~~~~~~~~~~~~~~~~~
240
+
241
+ +--------------------+----------+---------------------------------------------------+----------------------------+
242
+ | name | type | description | required? |
243
+ +====================+==========+===================================================+============================+
244
+ | path\_prefix | string | Path prefix of the output files | required |
245
+ +--------------------+----------+---------------------------------------------------+----------------------------+
246
+ | sequence\_format | string | Format of the sequence number of the output files | ``.%03d.%02d`` by default |
247
+ +--------------------+----------+---------------------------------------------------+----------------------------+
248
+ | file\_ext | string | Path suffix of the output files | required |
249
+ +--------------------+----------+---------------------------------------------------+----------------------------+
250
+
251
+ For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: ".%03d.%02d"``, and ``file_ext: .csv``, name of the output files will be as following:
252
+
253
+ ::
254
+
255
+ .
256
+ `-- path
257
+ `-- to
258
+ `-- output
259
+ |-- sample.01.000.csv
260
+ |-- sample.02.000.csv
261
+ |-- sample.03.000.csv
262
+ |-- sample.04.000.csv
263
+
264
+ ``sequence_format`` formats task index and sequence number in a task.
265
+
266
+ Example
267
+ ~~~~~~~~~~~~~~~~~~
268
+
269
+ .. code-block:: yaml
270
+
271
+ out:
272
+ type: file
273
+ path_prefix: /path/to/output/sample
274
+ file_ext: .csv
275
+ formatter:
276
+ ...
277
+
278
+ CSV formatter plugin
279
+ ------------------
280
+
281
+ The ``csv`` formatter plugin formats records using CSV or TSV format.
282
+
283
+ Options
284
+ ~~~~~~~~~~~~~~~~~~
285
+
286
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
287
+ | name | type | description | required? |
288
+ +================+==========+=======================================================================================================+========================+
289
+ | delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
290
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
291
+ | header\_line | boolean | If true, write the header line with column name at the first line | |
292
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
293
+ | newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
294
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
295
+ | charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
296
+ +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
297
+
298
+ Example
299
+ ~~~~~~~~~~~~~~~~~~
300
+
301
+ .. code-block:: yaml
302
+
303
+ out:
304
+ ...
305
+ formatter:
306
+ - type: csv
307
+ delimiter: "\t"
308
+ newline: LF
309
+ charset: UTF-8
310
+
311
+ Gzip encoder plugin
312
+ ------------------
313
+
314
+ The ``gzip`` encoder plugin compresses output files using gzip.
315
+
316
+ Options
317
+ ~~~~~~~~~~~~~~~~~~
318
+
319
+ +---------+----------+----------------------------------------------------------------------+--------------------+
320
+ | name | type | description | required? |
321
+ +=========+==========+======================================================================+====================+
322
+ | level | integer | Compression level. From 0 (no compression) to 9 (best compression). | ``6`` by default |
323
+ +---------+----------+----------------------------------------------------------------------+--------------------+
324
+
325
+ Example
326
+ ~~~~~~~~~~~~~~~~~~
327
+
328
+ .. code-block:: yaml
329
+
330
+ out:
331
+ ...
332
+ encoders:
333
+ - type: gzip
334
+ level: 1
335
+
@@ -3,26 +3,71 @@
3
3
  You can adapt this file completely to your liking, but it should at least
4
4
  contain the root `toctree` directive.
5
5
 
6
- Embulk documentation
6
+ Embulk
7
7
  ==================================
8
8
 
9
- https://github.com/embulk/embulk
9
+ .. image:: _static/embulk-logo.png
10
+ :width: 512px
11
+ :target: https://github.com/embulk/embulk
12
+
13
+ What's Embulk?
14
+ ------------------
15
+
16
+ Embulk is a open-source bulk data loader that helps data transfer between various databases, storages, file formats, and cloud services.
17
+
18
+ Embulk supports:
19
+
20
+ * Automatic guessing of input file formats
21
+ * Parallel & distributed execution to deal with big data sets
22
+ * Transaction control to guarantee All-or-Nothing
23
+ * Resuming
24
+ * Plugins released on RubyGems.org
25
+
26
+ You can define a bulk data loading using combination of input and output plugins:
27
+
28
+ .. image:: _static/embulk-architecture.png
29
+ :width: 640px
30
+ :target: https://github.com/embulk/embulk#quick-start
31
+
32
+ For example, `this tutorial <recipe/scheduled-csv-load-to-elasticsearch-kibana4.html>`_ describes how to use **file** input plugin with **csv** parser plugin and **gzip** decoder plugin to read CSV files, and **elasticsearch** output plugin to load the records to Elasticsearch.
33
+
34
+ Documents
35
+ ------------------
10
36
 
11
37
  * `Quick Start <https://github.com/embulk/embulk#quick-start>`_
12
38
 
13
- * `Linux and Mac OS X <https://github.com/embulk/embulk#linux--mac--bsd>`_
39
+ * `Linux <https://github.com/embulk/embulk#linux--mac--bsd>`_
40
+
41
+ * `Mac OS X <https://github.com/embulk/embulk#linux--mac--bsd>`_
14
42
 
15
43
  * `Windows <https://github.com/embulk/embulk#windows>`_
16
44
 
45
+ .. toctree::
46
+ :maxdepth: 2
47
+
48
+ recipe
49
+
17
50
  * `List of Plugins by Category <http://www.embulk.org/plugins/>`_
18
51
 
52
+ * `Input plugins <http://www.embulk.org/plugins/#input>`_
53
+
54
+ * `Output plugins <http://www.embulk.org/plugins/#output>`_
55
+
56
+ * `File parser plugins <http://www.embulk.org/plugins/#file-parser>`_
57
+
58
+ * `File formatter plugins <http://www.embulk.org/plugins/#file-formatter>`_
59
+
60
+ * `Filter plugins <http://www.embulk.org/plugins/#filter>`_
61
+
19
62
  .. toctree::
20
63
  :maxdepth: 2
21
64
 
22
- recipe
65
+ built-in
23
66
  release
24
67
 
25
68
  * `JavaDoc <javadoc/index.html>`_
26
69
 
27
70
  * `RDoc <rdoc/_index.html>`_
28
71
 
72
+ * `Github <https://github.com/embulk/embulk>`_
73
+
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
56
56
 
57
57
  .. code-block:: console
58
58
 
59
- $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -O /usr/local/bin/embulk
59
+ $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -O /usr/local/bin/embulk
60
60
  $ sudo chmod +x /usr/local/bin/embulk
61
61
 
62
62
  Step 2. Install Elasticsearch plugin
@@ -112,7 +112,7 @@ The generated config-complete.yml file should include complete information as fo
112
112
  quote: '"'
113
113
  escape: ''
114
114
  null_string: 'NULL'
115
- header_line: true
115
+ skip_header_lines: 1
116
116
  columns:
117
117
  - {name: id, type: long}
118
118
  - {name: account, type: long}
@@ -4,26 +4,27 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
- release/release-0.1.0
8
- release/release-0.2.0
9
- release/release-0.2.1
10
- release/release-0.3.0
11
- release/release-0.3.1
12
- release/release-0.3.2
13
- release/release-0.4.0
14
- release/release-0.4.1
15
- release/release-0.4.2
16
- release/release-0.4.3
17
- release/release-0.4.4
18
- release/release-0.4.5
19
- release/release-0.4.6
20
- release/release-0.4.7
21
- release/release-0.4.8
22
- release/release-0.4.9
23
- release/release-0.4.10
24
- release/release-0.5.0
25
- release/release-0.5.1
26
- release/release-0.5.2
27
- release/release-0.5.3
7
+ release/release-0.5.5
28
8
  release/release-0.5.4
9
+ release/release-0.5.3
10
+ release/release-0.5.2
11
+ release/release-0.5.1
12
+ release/release-0.5.0
13
+ release/release-0.4.10
14
+ release/release-0.4.9
15
+ release/release-0.4.8
16
+ release/release-0.4.7
17
+ release/release-0.4.6
18
+ release/release-0.4.5
19
+ release/release-0.4.4
20
+ release/release-0.4.3
21
+ release/release-0.4.2
22
+ release/release-0.4.1
23
+ release/release-0.4.0
24
+ release/release-0.3.2
25
+ release/release-0.3.1
26
+ release/release-0.3.0
27
+ release/release-0.2.1
28
+ release/release-0.2.0
29
+ release/release-0.1.0
29
30
 
@@ -0,0 +1,18 @@
1
+ Release 0.5.5
2
+ ==================================
3
+
4
+ Plugin API
5
+ ------------------
6
+
7
+ * Added ``spi.Exec.getModelManager()`` method.
8
+
9
+ General Changes
10
+ ------------------
11
+
12
+ * ``guess-csv`` does not add config parameters if ``type`` is already set by other guess plugins (@shun0102++)
13
+ * Fixed double double-quoting in build.gradle file generated by the plugin template generator
14
+
15
+
16
+ Release Date
17
+ ------------------
18
+ 2015-04-07
@@ -150,7 +150,7 @@ public class CsvParserPlugin
150
150
  schema.visitColumns(new ColumnVisitor() {
151
151
  public void booleanColumn(Column column)
152
152
  {
153
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
153
+ String v = nextColumn();
154
154
  if (v == null) {
155
155
  pageBuilder.setNull(column);
156
156
  } else {
@@ -160,7 +160,7 @@ public class CsvParserPlugin
160
160
 
161
161
  public void longColumn(Column column)
162
162
  {
163
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
163
+ String v = nextColumn();
164
164
  if (v == null) {
165
165
  pageBuilder.setNull(column);
166
166
  } else {
@@ -175,7 +175,7 @@ public class CsvParserPlugin
175
175
 
176
176
  public void doubleColumn(Column column)
177
177
  {
178
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
178
+ String v = nextColumn();
179
179
  if (v == null) {
180
180
  pageBuilder.setNull(column);
181
181
  } else {
@@ -190,7 +190,7 @@ public class CsvParserPlugin
190
190
 
191
191
  public void stringColumn(Column column)
192
192
  {
193
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
193
+ String v = nextColumn();
194
194
  if (v == null) {
195
195
  pageBuilder.setNull(column);
196
196
  } else {
@@ -200,7 +200,7 @@ public class CsvParserPlugin
200
200
 
201
201
  public void timestampColumn(Column column)
202
202
  {
203
- String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
203
+ String v = nextColumn();
204
204
  if (v == null) {
205
205
  pageBuilder.setNull(column);
206
206
  } else {
@@ -212,6 +212,24 @@ public class CsvParserPlugin
212
212
  }
213
213
  }
214
214
  }
215
+
216
+ private String nextColumn()
217
+ {
218
+ if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
219
+ return null;
220
+ }
221
+ String v = tokenizer.nextColumn();
222
+ if (!v.isEmpty()) {
223
+ if (v.equals(nullStringOrNull)) {
224
+ return null;
225
+ }
226
+ return v;
227
+ } else if (tokenizer.wasQuotedColumn()) {
228
+ return "";
229
+ } else {
230
+ return null;
231
+ }
232
+ }
215
233
  });
216
234
  pageBuilder.addRecord();
217
235
 
@@ -228,24 +246,6 @@ public class CsvParserPlugin
228
246
  }
229
247
  }
230
248
 
231
- private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull, boolean allowOptionalColumns)
232
- {
233
- if(allowOptionalColumns && !tokenizer.hasNextColumn()) {
234
- return null;
235
- }
236
- String v = tokenizer.nextColumn();
237
- if (!v.isEmpty()) {
238
- if (v.equals(nullStringOrNull)) {
239
- return null;
240
- }
241
- return v;
242
- } else if (tokenizer.wasQuotedColumn()) {
243
- return "";
244
- } else {
245
- return null;
246
- }
247
- }
248
-
249
249
  static class CsvRecordValidateException
250
250
  extends RuntimeException
251
251
  {
@@ -40,7 +40,7 @@ Gem::Specification.new do |spec|
40
40
  spec.version = "${project.version}"
41
41
  spec.authors = [<%= author.dump %>]
42
42
  spec.summary = %[<%= display_name %> <%= display_category %> plugin for Embulk]
43
- spec.description = %[<%= "#{description}".dump %>]
43
+ spec.description = %[<%= description %>]
44
44
  spec.email = [<%= email.dump %>]
45
45
  spec.licenses = ["MIT"]
46
46
  # TODO set this: spec.homepage = <%= "https://github.com/#{email[/([^@]*)/]}/#{project_name}".dump %>
@@ -28,6 +28,8 @@ module Embulk
28
28
  NO_SKIP_DETECT_LINES = 10
29
29
 
30
30
  def guess_lines(config, sample_lines)
31
+ return {} unless config.fetch("type", "csv") == "csv"
32
+
31
33
  delim = guess_delimiter(sample_lines)
32
34
  unless delim
33
35
  # not CSV file
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.5.4'
2
+ VERSION = '0.5.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-24 00:00:00.000000000 Z
11
+ date: 2015-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -249,6 +249,9 @@ files:
249
249
  - embulk-docs/plugins/index.html.erb
250
250
  - embulk-docs/plugins/plugins.css
251
251
  - embulk-docs/push-gh-pages.sh
252
+ - embulk-docs/src/_static/embulk-architecture.png
253
+ - embulk-docs/src/_static/embulk-logo.png
254
+ - embulk-docs/src/built-in.rst
252
255
  - embulk-docs/src/conf.py
253
256
  - embulk-docs/src/index.rst
254
257
  - embulk-docs/src/recipe.rst
@@ -276,6 +279,7 @@ files:
276
279
  - embulk-docs/src/release/release-0.5.2.rst
277
280
  - embulk-docs/src/release/release-0.5.3.rst
278
281
  - embulk-docs/src/release/release-0.5.4.rst
282
+ - embulk-docs/src/release/release-0.5.5.rst
279
283
  - embulk-standards/build.gradle
280
284
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
281
285
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -380,8 +384,8 @@ files:
380
384
  - classpath/bval-jsr303-0.5.jar
381
385
  - classpath/commons-beanutils-core-1.8.3.jar
382
386
  - classpath/commons-lang3-3.1.jar
383
- - classpath/embulk-core-0.5.4.jar
384
- - classpath/embulk-standards-0.5.4.jar
387
+ - classpath/embulk-core-0.5.5.jar
388
+ - classpath/embulk-standards-0.5.5.jar
385
389
  - classpath/guava-18.0.jar
386
390
  - classpath/guice-3.0.jar
387
391
  - classpath/guice-multibindings-3.0.jar