RubyGems - embulk - Versions diffs - 0.5.4 → 0.5.5 - Mend

embulk 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/README.md +2 -2
data/build.gradle +1 -1
data/embulk-core/src/main/java/org/embulk/spi/Exec.java +6 -0
data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +5 -0
data/embulk-docs/plugins/index.html.erb +1 -1
data/embulk-docs/src/_static/embulk-architecture.png +0 -0
data/embulk-docs/src/_static/embulk-logo.png +0 -0
data/embulk-docs/src/built-in.rst +335 -0
data/embulk-docs/src/index.rst +49 -4
data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +2 -2
data/embulk-docs/src/release.rst +22 -21
data/embulk-docs/src/release/release-0.5.5.rst +18 -0
data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +23 -23
data/lib/embulk/data/new/java/build.gradle.erb +1 -1
data/lib/embulk/guess/csv.rb +2 -0
data/lib/embulk/version.rb +1 -1
metadata +8 -4

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: e0be98e5dbe81e40c6562142d2cdf44cc4f8cf34
-  data.tar.gz: f907d431af0add753761547f09dec47113b3b236
+  metadata.gz: f78ef9df69c9d408b6a7df450706a54cb646c596
+  data.tar.gz: 79a55069daa0ec3f952fd3e6d66f9536b9b6eac5
 SHA512:
-  metadata.gz: 2a1690e94a7622db588cc6511f1dec583320192960ecdf9851c74ac1f5feaf7bda478b8e6dbf16a7b58f870e38dd464ee949cc20c6f5cadb3473b04bf3cf23db
-  data.tar.gz: 0eb2a31661f7772cadee71642781d2ef30cfc5015ce5bf549a3cc36310fbe29283f80d272b91ee788a4513c52ae8c09ecad8e4dce28e459ad11c5370f68a6e22
+  metadata.gz: 51c83cefab8712f70e350ccc446ecab9c11779447c94ddc5c7b4c10597e1c41fe9d6853e7c7b3ff0f7d132082a604901cf7344b0382711c42d101b20f210a359
+  data.tar.gz: 17d4ab5552e98c6bc0fa3368cbf22ab87814e035dbae5d8b6e6c91aa6778f9d89ab2a666935b382e0dd0e7252a68460c10ea54ac9d18bf4664dfc4b00d5cf84e

data/README.md CHANGED

@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
 Following 4 commands install embulk to your home directory:
 ```
-curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar
+curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar
 chmod +x ~/.embulk/bin/embulk
 echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
 source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
 You can assume the jar file is a .bat file.
 ```
-PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -OutFile embulk.bat}"
+PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -OutFile embulk.bat}"
 ```
 Next step: [Trying examples](#trying-examples)

data/build.gradle CHANGED

@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
 allprojects {
     group = 'org.embulk'
-    version = '0.5.4'
+    version = '0.5.5'
     apply plugin: 'java'
     apply plugin: 'maven-publish'

data/embulk-core/src/main/java/org/embulk/spi/Exec.java CHANGED

@@ -2,6 +2,7 @@ package org.embulk.spi;
 import org.slf4j.Logger;
 import org.embulk.config.Task;
+import org.embulk.config.ModelManager;
 import org.embulk.config.CommitReport;
 import org.embulk.config.ConfigDiff;
 import org.embulk.config.ConfigSource;
@@ -49,6 +50,11 @@ public class Exec
         return session().getBufferAllocator();
     }
+    public static ModelManager getModelManager()
+    {
+        return session().getModelManager();
+    }
     public static <T> T newPlugin(Class<T> iface, PluginType type)
     {
         return session().newPlugin(iface, type);

data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java CHANGED

@@ -119,6 +119,11 @@ public class ExecSession
         return bufferAllocator;
     }
+    public ModelManager getModelManager()
+    {
+        return modelManager;
+    }
     public ConfigSource getExecConfig()
     {
         return execConfig;

data/embulk-docs/plugins/index.html.erb CHANGED

@@ -21,7 +21,7 @@
         <% categories.each do |category,gems| %>
         <div class="section">
-          <h2><%= category.upcase %></h2>
+          <h2 id="<%= category.gsub(/[^a-zA-Z0-9]/, '-') %>"><%= category.upcase %></h2>
           <table class="plugins">
             <thead>
               <tr>

data/embulk-docs/src/_static/embulk-architecture.png ADDED

Binary file

data/embulk-docs/src/_static/embulk-logo.png ADDED

Binary file

data/embulk-docs/src/built-in.rst ADDED

@@ -0,0 +1,335 @@
+Configuration
+==================================
+.. contents::
+   :local:
+   :depth: 2
+Embulk configuration file format
+------------------
+Embulk uses a YAML file to define a bulk data loading. Here is an example of the file:
+.. code-block:: yaml
+    in:
+      type: file
+      path_prefix: ./mydata/csv/
+      decoders:
+      - {type: gzip}
+      parser:
+        charset: UTF-8
+        newline: CRLF
+        type: csv
+        delimiter: ','
+        quote: '"'
+        escape: ''
+        null_string: 'NULL'
+        skip_header_lines: 1
+        columns:
+        - {name: id, type: long}
+        - {name: account, type: long}
+        - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
+        - {name: purchase, type: timestamp, format: '%Y%m%d'}
+        - {name: comment, type: string}
+    filters:
+      - type: speedometer
+        speed_limit: 250000
+    out:
+      type: stdout
+A configuration file consists of following sections:
+* **in:** Input plugin options. An input plugin is either record-based (`MySQL <https://github.com/embulk/embulk-input-jdbc>`_, `DynamoDB <https://github.com/lulichn/embulk-input-dynamodb>`_, etc) or file-based (`S3 <https://github.com/embulk/embulk-input-s3>`_, `HTTP <https://github.com/takumakanari/embulk-input-http>`_, etc).
+  * **parser:** If the input is file-based, parser plugin parses a file format (built-in csv, `json <https://github.com/takumakanari/embulk-parser-json>`_, etc).
+  * **decoder:** If the input is file-based, decoder plugin decodes compression or encryption (built-in gzip, `zip <https://github.com/hata/embulk-decoder-commons-compress>`_, `tar.gz <https://github.com/hata/embulk-decoder-commons-compress>`_, etc).
+* **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
+  * **formatter:** If the output is file-based, fromatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
+  * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
+* **filters:** Filter plugins options (optional).
+* **exec:** Executor plugin options. An executor plugin control parallel processing (such as built-in thread executor, `Hadoop MapReduce executor <https://github.com/embulk/embulk-executor-mapreduce>`_)
+In many cases, what you need to write is **in:**, **out**: and **formatter** sections only because ``guess`` command guesses **parser** and **decoder** options for you. See also the `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
+Local file input plugin
+------------------
+The ``file`` input plugin reads files from local file system.
+Options
+~~~~~~~~~~~~~~~~~~
++----------------+----------+------------------------------------------------+-----------+
+| name           | type     | description                                    | required? |
++================+==========+================================================+===========+
+| path\_prefix   | string   | Path prefix of input files                     | required  |
++----------------+----------+------------------------------------------------+-----------+
+| parsers        | hash     | Parsers configurations (see below)             | required  |
++----------------+----------+------------------------------------------------+-----------+
+| decoders       | array    | Decoder configuration (see below)              |           |
++----------------+----------+------------------------------------------------+-----------+
+| last\_path     | string   | Name of last read file in previous operation   |           |
++----------------+----------+------------------------------------------------+-----------+
+The ``path_prefix`` option is required. If you have files as following, you may set ``path_prefix: /path/to/files/sample_``:
+::
+    .
+    `-- path
+        `-- to
+            `-- files
+                |-- sample_01.csv   -> read
+                |-- sample_02.csv   -> read
+                |-- sample_03.csv   -> read
+                |-- sample_04.csv   -> read
+The ``last_path`` option is used to skip files older than or same with the file in dictionary order.
+For example, if you set ``last_path: /path/to/files/sample_02.csv``, Embulk reads following files:
+::
+    .
+    `-- path
+        `-- to
+            `-- files
+                |-- sample_01.csv   -> skip
+                |-- sample_02.csv   -> skip
+                |-- sample_03.csv   -> read
+                |-- sample_04.csv   -> read
+Example
+~~~~~~~~~~~~~~~~~~
+.. code-block:: yaml
+    in:
+      type: file
+      path_prefix: /path/to/files/sample_
+      last_path: /path/to/files/sample_02.csv
+      parser:
+        ...
+In most of cases, you'll use guess to configure the parsers and decoders. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
+CSV parser plugin
+------------------
+The ``csv`` parser plugin parses CSV and TSV files.
+Options
+~~~~~~~~~~~~~~~~~~
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| name                       | type     | description                                                                                                    |              required? |
++============================+==========+================================================================================================================+========================+
+| delimiter                  | string   | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character              | ``,`` by default       |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| quote                      | string   | The character surrounding a quoted value                                                                       | ``\"`` by default      |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| escape                     | string   | Escape character to escape a special character                                                                 | ``\\`` by default      |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| skip\_header\_lines        | integer  | Skip this number of lines first. Set 1 if the file has header line.                                            | ``0`` by default       |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| null\_string               | string   | If a value is this string, converts it to NULL. For example, set ``\N`` for CSV files created by mysqldump     |                        |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| trim\_if\_not\_quoted      | boolean  | If true, remove spaces of a value if the value is not surrounded by the quote character                        | ``false`` by default   |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| allow\_optional\_columns   | boolean  | If true, set null to insufficient columns. Otherwise, skip the row in case of insufficient number of columns   | ``false`` by default   |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| max\_quoted\_size\_limit   | integer  | Maximum number of bytes of a quoted value. If a value exceeds the limit, the row will be skipped               | ``131072`` by default  |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| default\_timezone          | string   | Time zone of timestamp columns if the value itself doesn't include time zone description (eg. Asia/Tokyo)      | ``UTC`` by default     |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| newline                    | enum     | Newline character (CRLF, LF or CR)                                                                             | ``CRLF`` by default    |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| charset                    | enum     | Character encoding (eg. ISO-8859-1, UTF-8)                                                                     | ``UTF-8`` by default   |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+| columns                    | hash     | Columns (see below)                                                                                            | required               |
++----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
+The ``columns`` option declares the list of columns. This CSV parser plugin ignores the header line.
++----------+-------------------------------------------------+
+| name     | description                                     |
++==========+=================================================+
+| name     | Name of the column                              |
++----------+-------------------------------------------------+
+| type     | Type of the column (see below)                  |
++----------+-------------------------------------------------+
+| format   | Format of the timestamp if type is timestamp    |
++----------+-------------------------------------------------+
+List of types:
++-------------+----------------------------------------------+
+| name        | description                                  |
++=============+==============================================+
+| boolean     | true or false                                |
++-------------+----------------------------------------------+
+| long        | 64-bit signed integers                       |
++-------------+----------------------------------------------+
+| timestamp   | Date and time with nano-seconds precision    |
++-------------+----------------------------------------------+
+| double      | 64-bit floating point numbers                |
++-------------+----------------------------------------------+
+| string      | Strings                                      |
++-------------+----------------------------------------------+
+You can use ``guess`` to automatically generate the column settings. See also `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
+Example
+~~~~~~~~~~~~~~~~~~
+.. code-block:: yaml
+    in:
+      ...
+      parser:
+        type: csv
+        charset: UTF-8
+        newline: CRLF
+        delimiter: "\t"
+        quote: '"'
+        escape: ''
+        null_string: 'NULL'
+        skip_header_lines: 1
+        columns:
+        - {name: id, type: long}
+        - {name: account, type: long}
+        - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
+        - {name: purchase, type: timestamp, format: '%Y%m%d'}
+        - {name: comment, type: string}
+Gzip decoder plugin
+------------------
+The ``gzip`` decoder plugin decompresses gzip files before input plugins read them.
+Options
+~~~~~~~~~~~~~~~~~~
+This plugin doesn't have any options.
+Example
+~~~~~~~~~~~~~~~~~~
+.. code-block:: yaml
+    in:
+      ...
+      decoders:
+      - {type: gzip}
+File output plugin
+------------------
+The ``file`` output plugin writes records to local file system.
+Options
+~~~~~~~~~~~~~~~~~~
++--------------------+----------+---------------------------------------------------+----------------------------+
+| name               | type     | description                                       | required?                  |
++====================+==========+===================================================+============================+
+| path\_prefix       | string   | Path prefix of the output files                   | required                   |
++--------------------+----------+---------------------------------------------------+----------------------------+
+| sequence\_format   | string   | Format of the sequence number of the output files | ``.%03d.%02d`` by default  |
++--------------------+----------+---------------------------------------------------+----------------------------+
+| file\_ext          | string   | Path suffix of the output files                   | required                   |
++--------------------+----------+---------------------------------------------------+----------------------------+
+For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: ".%03d.%02d"``, and ``file_ext: .csv``, name of the output files will be as following:
+::
+    .
+    `-- path
+        `-- to
+            `-- output
+                |-- sample.01.000.csv
+                |-- sample.02.000.csv
+                |-- sample.03.000.csv
+                |-- sample.04.000.csv
+``sequence_format`` formats task index and sequence number in a task.
+Example
+~~~~~~~~~~~~~~~~~~
+.. code-block:: yaml
+    out:
+      type: file
+      path_prefix: /path/to/output/sample
+      file_ext: .csv
+      formatter:
+        ...
+CSV formatter plugin
+------------------
+The ``csv`` formatter plugin formats records using CSV or TSV format.
+Options
+~~~~~~~~~~~~~~~~~~
++----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
+| name           | type     | description                                                                                           | required?              |
++================+==========+=======================================================================================================+========================+
+| delimiter      | string   | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character     | ``,`` by default       |
++----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
+| header\_line   | boolean  | If true, write the header line with column name at the first line                                     |                        |
++----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
+| newline        | enum     | Newline character (CRLF, LF or CR)                                                                    | ``CRLF`` by default    |
++----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
+| charset        | enum     | Character encoding (eg. ISO-8859-1, UTF-8)                                                            | ``UTF-8`` by default   |
++----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
+Example
+~~~~~~~~~~~~~~~~~~
+.. code-block:: yaml
+    out:
+      ...
+      formatter:
+      - type: csv
+        delimiter: "\t"
+        newline: LF
+        charset: UTF-8
+Gzip encoder plugin
+------------------
+The ``gzip`` encoder plugin compresses output files using gzip.
+Options
+~~~~~~~~~~~~~~~~~~
++---------+----------+----------------------------------------------------------------------+--------------------+
+| name    | type     | description                                                          | required?          |
++=========+==========+======================================================================+====================+
+| level   | integer  | Compression level. From 0 (no compression) to 9 (best compression).  | ``6`` by default   |
++---------+----------+----------------------------------------------------------------------+--------------------+
+Example
+~~~~~~~~~~~~~~~~~~
+.. code-block:: yaml
+    out:
+      ...
+      encoders:
+      - type: gzip
+        level: 1

data/embulk-docs/src/index.rst CHANGED

@@ -3,26 +3,71 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
-Embulk documentation
+Embulk
 ==================================
-https://github.com/embulk/embulk
+.. image:: _static/embulk-logo.png
+   :width: 512px
+   :target: https://github.com/embulk/embulk
+What's Embulk?
+------------------
+Embulk is a open-source bulk data loader that helps data transfer between various databases, storages, file formats, and cloud services.
+Embulk supports:
+* Automatic guessing of input file formats
+* Parallel & distributed execution to deal with big data sets
+* Transaction control to guarantee All-or-Nothing
+* Resuming
+* Plugins released on RubyGems.org
+You can define a bulk data loading using combination of input and output plugins:
+.. image:: _static/embulk-architecture.png
+   :width: 640px
+   :target: https://github.com/embulk/embulk#quick-start
+For example, `this tutorial <recipe/scheduled-csv-load-to-elasticsearch-kibana4.html>`_ describes how to use **file** input plugin with **csv** parser plugin and **gzip** decoder plugin to read CSV files, and **elasticsearch** output plugin to load the records to Elasticsearch.
+Documents
+------------------
 * `Quick Start <https://github.com/embulk/embulk#quick-start>`_
-  * `Linux and Mac OS X <https://github.com/embulk/embulk#linux--mac--bsd>`_
+  * `Linux <https://github.com/embulk/embulk#linux--mac--bsd>`_
+  * `Mac OS X <https://github.com/embulk/embulk#linux--mac--bsd>`_
   * `Windows <https://github.com/embulk/embulk#windows>`_
+.. toctree::
+   :maxdepth: 2
+   recipe
 * `List of Plugins by Category <http://www.embulk.org/plugins/>`_
+  * `Input plugins <http://www.embulk.org/plugins/#input>`_
+  * `Output plugins <http://www.embulk.org/plugins/#output>`_
+  * `File parser plugins <http://www.embulk.org/plugins/#file-parser>`_
+  * `File formatter plugins <http://www.embulk.org/plugins/#file-formatter>`_
+  * `Filter plugins <http://www.embulk.org/plugins/#filter>`_
 .. toctree::
    :maxdepth: 2
-   recipe
+   built-in
    release
 * `JavaDoc <javadoc/index.html>`_
 * `RDoc <rdoc/_index.html>`_
+* `Github <https://github.com/embulk/embulk>`_

data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst CHANGED

@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
 .. code-block:: console
-    $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -O /usr/local/bin/embulk
+    $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -O /usr/local/bin/embulk
     $ sudo chmod +x /usr/local/bin/embulk
 Step 2. Install Elasticsearch plugin
@@ -112,7 +112,7 @@ The generated config-complete.yml file should include complete information as fo
         quote: '"'
         escape: ''
         null_string: 'NULL'
-        header_line: true
+        skip_header_lines: 1
         columns:
         - {name: id, type: long}
         - {name: account, type: long}

data/embulk-docs/src/release.rst CHANGED

@@ -4,26 +4,27 @@ Release Notes
 .. toctree::
     :maxdepth: 1
-    release/release-0.1.0
-    release/release-0.2.0
-    release/release-0.2.1
-    release/release-0.3.0
-    release/release-0.3.1
-    release/release-0.3.2
-    release/release-0.4.0
-    release/release-0.4.1
-    release/release-0.4.2
-    release/release-0.4.3
-    release/release-0.4.4
-    release/release-0.4.5
-    release/release-0.4.6
-    release/release-0.4.7
-    release/release-0.4.8
-    release/release-0.4.9
-    release/release-0.4.10
-    release/release-0.5.0
-    release/release-0.5.1
-    release/release-0.5.2
-    release/release-0.5.3
+    release/release-0.5.5
     release/release-0.5.4
+    release/release-0.5.3
+    release/release-0.5.2
+    release/release-0.5.1
+    release/release-0.5.0
+    release/release-0.4.10
+    release/release-0.4.9
+    release/release-0.4.8
+    release/release-0.4.7
+    release/release-0.4.6
+    release/release-0.4.5
+    release/release-0.4.4
+    release/release-0.4.3
+    release/release-0.4.2
+    release/release-0.4.1
+    release/release-0.4.0
+    release/release-0.3.2
+    release/release-0.3.1
+    release/release-0.3.0
+    release/release-0.2.1
+    release/release-0.2.0
+    release/release-0.1.0

data/embulk-docs/src/release/release-0.5.5.rst ADDED

@@ -0,0 +1,18 @@
+Release 0.5.5
+==================================
+Plugin API
+------------------
+* Added ``spi.Exec.getModelManager()`` method.
+General Changes
+------------------
+* ``guess-csv`` does not add config parameters if ``type`` is already set by other guess plugins (@shun0102++)
+* Fixed double double-quoting in build.gradle file generated by the plugin template generator
+Release Date
+------------------
+2015-04-07

data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java CHANGED

@@ -150,7 +150,7 @@ public class CsvParserPlugin
                         schema.visitColumns(new ColumnVisitor() {
                             public void booleanColumn(Column column)
                             {
-                                String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
+                                String v = nextColumn();
                                 if (v == null) {
                                     pageBuilder.setNull(column);
                                 } else {
@@ -160,7 +160,7 @@ public class CsvParserPlugin
                             public void longColumn(Column column)
                             {
-                                String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
+                                String v = nextColumn();
                                 if (v == null) {
                                     pageBuilder.setNull(column);
                                 } else {
@@ -175,7 +175,7 @@ public class CsvParserPlugin
                             public void doubleColumn(Column column)
                             {
-                                String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
+                                String v = nextColumn();
                                 if (v == null) {
                                     pageBuilder.setNull(column);
                                 } else {
@@ -190,7 +190,7 @@ public class CsvParserPlugin
                             public void stringColumn(Column column)
                             {
-                                String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
+                                String v = nextColumn();
                                 if (v == null) {
                                     pageBuilder.setNull(column);
                                 } else {
@@ -200,7 +200,7 @@ public class CsvParserPlugin
                             public void timestampColumn(Column column)
                             {
-                                String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
+                                String v = nextColumn();
                                 if (v == null) {
                                     pageBuilder.setNull(column);
                                 } else {
@@ -212,6 +212,24 @@ public class CsvParserPlugin
                                     }
                                 }
                             }
+                            private String nextColumn()
+                            {
+                                if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
+                                    return null;
+                                }
+                                String v = tokenizer.nextColumn();
+                                if (!v.isEmpty()) {
+                                    if (v.equals(nullStringOrNull)) {
+                                        return null;
+                                    }
+                                    return v;
+                                } else if (tokenizer.wasQuotedColumn()) {
+                                    return "";
+                                } else {
+                                    return null;
+                                }
+                            }
                         });
                         pageBuilder.addRecord();
@@ -228,24 +246,6 @@ public class CsvParserPlugin
         }
     }
-    private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull, boolean allowOptionalColumns)
-    {
-        if(allowOptionalColumns && !tokenizer.hasNextColumn()) {
-            return null;
-        }
-        String v = tokenizer.nextColumn();
-        if (!v.isEmpty()) {
-            if (v.equals(nullStringOrNull)) {
-                return null;
-            }
-            return v;
-        } else if (tokenizer.wasQuotedColumn()) {
-            return "";
-        } else {
-            return null;
-        }
-    }
     static class CsvRecordValidateException
             extends RuntimeException
     {

data/lib/embulk/data/new/java/build.gradle.erb CHANGED

@@ -40,7 +40,7 @@ Gem::Specification.new do |spec|
   spec.version       = "${project.version}"
   spec.authors       = [<%= author.dump %>]
   spec.summary       = %[<%= display_name %> <%= display_category %> plugin for Embulk]
-  spec.description   = %[<%= "#{description}".dump %>]
+  spec.description   = %[<%= description %>]
   spec.email         = [<%= email.dump %>]
   spec.licenses      = ["MIT"]
   # TODO set this: spec.homepage      = <%= "https://github.com/#{email[/([^@]*)/]}/#{project_name}".dump %>

data/lib/embulk/guess/csv.rb CHANGED

@@ -28,6 +28,8 @@ module Embulk
       NO_SKIP_DETECT_LINES = 10
       def guess_lines(config, sample_lines)
+        return {} unless config.fetch("type", "csv") == "csv"
         delim = guess_delimiter(sample_lines)
         unless delim
           # not CSV file

data/lib/embulk/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Embulk
-  VERSION = '0.5.4'
+  VERSION = '0.5.5'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: embulk
 version: !ruby/object:Gem::Version
-  version: 0.5.4
+  version: 0.5.5
 platform: ruby
 authors:
 - Sadayuki Furuhashi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-03-24 00:00:00.000000000 Z
+date: 2015-04-07 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -249,6 +249,9 @@ files:
 - embulk-docs/plugins/index.html.erb
 - embulk-docs/plugins/plugins.css
 - embulk-docs/push-gh-pages.sh
+- embulk-docs/src/_static/embulk-architecture.png
+- embulk-docs/src/_static/embulk-logo.png
+- embulk-docs/src/built-in.rst
 - embulk-docs/src/conf.py
 - embulk-docs/src/index.rst
 - embulk-docs/src/recipe.rst
@@ -276,6 +279,7 @@ files:
 - embulk-docs/src/release/release-0.5.2.rst
 - embulk-docs/src/release/release-0.5.3.rst
 - embulk-docs/src/release/release-0.5.4.rst
+- embulk-docs/src/release/release-0.5.5.rst
 - embulk-standards/build.gradle
 - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
 - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -380,8 +384,8 @@ files:
 - classpath/bval-jsr303-0.5.jar
 - classpath/commons-beanutils-core-1.8.3.jar
 - classpath/commons-lang3-3.1.jar
-- classpath/embulk-core-0.5.4.jar
-- classpath/embulk-standards-0.5.4.jar
+- classpath/embulk-core-0.5.5.jar
+- classpath/embulk-standards-0.5.5.jar
 - classpath/guava-18.0.jar
 - classpath/guice-3.0.jar
 - classpath/guice-multibindings-3.0.jar