embulk-output-bigquery 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +24 -25
- data/build.gradle +1 -3
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -12
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +23 -51
- data/src/main/java/org/embulk/output/BigqueryWriter.java +136 -151
- metadata +3 -7
- data/src/main/java/org/embulk/output/BigqueryGcsWriter.java +0 -201
- data/src/test/java/org/embulk/output/TestBigqueryGcsWriter.java +0 -5
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 46c61dd1c73ff99c3c69bd217ca772f07b2e1127
         | 
| 4 | 
            +
              data.tar.gz: ba184360972884260c1fe90264af7d5386791804
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: aa693e59cb4b45c2d43f07479f3d61e63242185be9964d4f00b83a4a784a0443ae270a63760f3f2f188e74deb77cbb94a89a18db49d2c5cd4621f18b73363ab3
         | 
| 7 | 
            +
              data.tar.gz: 7c0ea783220de28befd7c565ff83ec5ff58f13af0db16b3d341a12c3e415adeacba375e5688a42fcbb26d0402a48071622ed5b161fa52fd08b1f56444faf66e1
         | 
    
        data/README.md
    CHANGED
    
    | @@ -1,17 +1,17 @@ | |
| 1 1 |  | 
| 2 2 | 
             
            # embulk-output-bigquery
         | 
| 3 3 |  | 
| 4 | 
            -
            [Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/) | 
| 4 | 
            +
            [Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/)
         | 
| 5 5 |  | 
| 6 6 | 
             
            ## Overview
         | 
| 7 7 |  | 
| 8 | 
            -
            load data into Google BigQuery as batch jobs  | 
| 8 | 
            +
            load data into Google BigQuery as batch jobs for big amount of data
         | 
| 9 9 | 
             
            https://developers.google.com/bigquery/loading-data-into-bigquery
         | 
| 10 10 |  | 
| 11 11 | 
             
            * **Plugin type**: output
         | 
| 12 12 | 
             
            * **Resume supported**: no
         | 
| 13 13 | 
             
            * **Cleanup supported**: no
         | 
| 14 | 
            -
            * **Dynamic table creating**:  | 
| 14 | 
            +
            * **Dynamic table creating**: yes
         | 
| 15 15 |  | 
| 16 16 | 
             
            ### NOT IMPLEMENTED 
         | 
| 17 17 | 
             
            * insert data over streaming inserts
         | 
| @@ -30,32 +30,19 @@ OAuth flow for installed applications. | |
| 30 30 | 
             
            - **sequence_format**: (string, optional, default is %03d.%02d)
         | 
| 31 31 | 
             
            - **file_ext**: (string, required)
         | 
| 32 32 | 
             
            - **source_format**: file type (NEWLINE_DELIMITED_JSON or CSV) (string, required, default is CSV)
         | 
| 33 | 
            -
            - **is_file_compressed**: upload file is gzip compressed or not. (boolean, optional, default is 1)
         | 
| 34 | 
            -
            - **bucket**: Google Cloud Storage output bucket name (string, required)
         | 
| 35 | 
            -
            - **remote_path**: folder name in GCS bucket (string, optional)
         | 
| 36 33 | 
             
            - **project**: project_id (string, required)
         | 
| 37 34 | 
             
            - **dataset**: dataset (string, required)
         | 
| 38 35 | 
             
            - **table**: table name (string, required)
         | 
| 36 | 
            +
            - **auto_create_table**: (boolean, optional default is 0)
         | 
| 37 | 
            +
            - **schema_path**: (string, optional)
         | 
| 39 38 | 
             
            - **application_name**: application name anything you like (string, optional)
         | 
| 40 | 
            -
            - ** | 
| 41 | 
            -
            - **delete_from_bucket_when_job_end**: (boolean, optional, default is 0)
         | 
| 39 | 
            +
            - **delete_from_local_when_job_end**: (boolean, optional, default is 0)
         | 
| 42 40 | 
             
            - **job_status_max_polling_time**: max job status polling time. (int, optional, default is 3600 sec)
         | 
| 43 41 | 
             
            - **job_status_polling_interval**: job status polling interval. (int, optional, default is 10 sec)
         | 
| 44 42 | 
             
            - **is_skip_job_result_check**: (boolean, optional, default is 0)
         | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
            * Supported
         | 
| 50 | 
            -
              * Maximum size per load job: 1TB across all input files
         | 
| 51 | 
            -
              * Maximum number of files per load job: 10,000
         | 
| 52 | 
            -
                * embulk-output-bigquery divides a file into more than one job, like below.
         | 
| 53 | 
            -
                  * job1: file1(1GB) file2(1GB)...file10(1GB)
         | 
| 54 | 
            -
                  * job2: file11(1GB) file12(1GB)
         | 
| 55 | 
            -
             | 
| 56 | 
            -
            * Not Supported
         | 
| 57 | 
            -
              * Daily limit: 1,000 load jobs per table per day (including failures)
         | 
| 58 | 
            -
              * 10,000 load jobs per project per day (including failures)
         | 
| 43 | 
            +
            - **field_delimiter**: (string, optional, default is ",")
         | 
| 44 | 
            +
            - **max_bad_records**: (int, optional, default is 0)
         | 
| 45 | 
            +
            - **encoding**: (UTF-8 or ISO-8859-1) (string, optional, default is "UTF-8")
         | 
| 59 46 |  | 
| 60 47 | 
             
            ## Example
         | 
| 61 48 |  | 
| @@ -67,10 +54,7 @@ out: | |
| 67 54 | 
             
              path_prefix: /path/to/output
         | 
| 68 55 | 
             
              file_ext: csv.gz
         | 
| 69 56 | 
             
              source_format: CSV
         | 
| 70 | 
            -
              is_file_compressed: 1
         | 
| 71 57 | 
             
              project: your-project-000
         | 
| 72 | 
            -
              bucket: output_bucket_name
         | 
| 73 | 
            -
              remote_path: folder_name
         | 
| 74 58 | 
             
              dataset: your_dataset_name
         | 
| 75 59 | 
             
              table: your_table_name
         | 
| 76 60 | 
             
              formatter:
         | 
| @@ -80,6 +64,21 @@ out: | |
| 80 64 | 
             
              - {type: gzip}
         | 
| 81 65 | 
             
            ```
         | 
| 82 66 |  | 
| 67 | 
            +
            ## Dynamic table creating
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            When `auto_create_table` is set to true, try to create the table using BigQuery API.
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            To describe the schema of the target table, please write schema path.
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            `table` option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
         | 
| 74 | 
            +
            format of ruby to construct table name.
         | 
| 75 | 
            +
             | 
| 76 | 
            +
            ```
         | 
| 77 | 
            +
            auto_create_table: true
         | 
| 78 | 
            +
            table: table_%Y_%m
         | 
| 79 | 
            +
            schema_path: /path/to/schema.json
         | 
| 80 | 
            +
            ```
         | 
| 81 | 
            +
             | 
| 83 82 | 
             
            ## Build
         | 
| 84 83 |  | 
| 85 84 | 
             
            ```
         | 
    
        data/build.gradle
    CHANGED
    
    | @@ -15,16 +15,14 @@ configurations { | |
| 15 15 | 
             
            sourceCompatibility = 1.7
         | 
| 16 16 | 
             
            targetCompatibility = 1.7
         | 
| 17 17 |  | 
| 18 | 
            -
            version = "0.1. | 
| 18 | 
            +
            version = "0.1.2"
         | 
| 19 19 |  | 
| 20 20 | 
             
            dependencies {
         | 
| 21 21 | 
             
                compile  "org.embulk:embulk-core:0.5.1"
         | 
| 22 22 | 
             
                provided "org.embulk:embulk-core:0.5.1"
         | 
| 23 23 |  | 
| 24 24 | 
             
                compile "com.google.http-client:google-http-client-jackson2:1.19.0"
         | 
| 25 | 
            -
                compile ("com.google.apis:google-api-services-storage:v1-rev27-1.19.1") {exclude module: "guava-jdk5"}
         | 
| 26 25 | 
             
                compile "com.google.apis:google-api-services-bigquery:v2-rev193-1.19.1"
         | 
| 27 | 
            -
                compile "eu.medsea.mimeutil:mime-util:2.1.3"
         | 
| 28 26 |  | 
| 29 27 | 
             
                testCompile "junit:junit:4.+"
         | 
| 30 28 | 
             
            }
         | 
| @@ -23,8 +23,6 @@ import com.google.api.client.http.HttpTransport; | |
| 23 23 | 
             
            import com.google.api.client.http.InputStreamContent;
         | 
| 24 24 | 
             
            import com.google.api.client.json.JsonFactory;
         | 
| 25 25 | 
             
            import com.google.api.client.json.jackson2.JacksonFactory;
         | 
| 26 | 
            -
            import com.google.api.services.storage.Storage;
         | 
| 27 | 
            -
            import com.google.api.services.storage.StorageScopes;
         | 
| 28 26 | 
             
            import com.google.api.services.bigquery.Bigquery;
         | 
| 29 27 | 
             
            import com.google.api.services.bigquery.BigqueryScopes;
         | 
| 30 28 | 
             
            import com.google.api.services.bigquery.model.ProjectList;
         | 
| @@ -64,7 +62,6 @@ public class BigqueryAuthentication | |
| 64 62 | 
             
                            .setServiceAccountId(serviceAccountEmail)
         | 
| 65 63 | 
             
                            .setServiceAccountScopes(
         | 
| 66 64 | 
             
                                    ImmutableList.of(
         | 
| 67 | 
            -
                                            BigqueryScopes.DEVSTORAGE_READ_WRITE,
         | 
| 68 65 | 
             
                                            BigqueryScopes.BIGQUERY
         | 
| 69 66 | 
             
                                    )
         | 
| 70 67 | 
             
                            )
         | 
| @@ -87,13 +84,4 @@ public class BigqueryAuthentication | |
| 87 84 |  | 
| 88 85 | 
             
                    return client;
         | 
| 89 86 | 
             
                }
         | 
| 90 | 
            -
             | 
| 91 | 
            -
                public Storage getGcsClient() throws IOException
         | 
| 92 | 
            -
                {
         | 
| 93 | 
            -
                    Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
         | 
| 94 | 
            -
                            .setApplicationName(applicationName)
         | 
| 95 | 
            -
                            .build();
         | 
| 96 | 
            -
             | 
| 97 | 
            -
                    return client;
         | 
| 98 | 
            -
                }
         | 
| 99 87 | 
             
            }
         | 
| @@ -13,6 +13,7 @@ import java.util.concurrent.TimeoutException; | |
| 13 13 | 
             
            import com.google.common.base.Optional;
         | 
| 14 14 | 
             
            import com.google.common.base.Throwables;
         | 
| 15 15 | 
             
            import java.security.GeneralSecurityException;
         | 
| 16 | 
            +
            import org.jruby.embed.ScriptingContainer;
         | 
| 16 17 |  | 
| 17 18 | 
             
            import org.embulk.config.Config;
         | 
| 18 19 | 
             
            import org.embulk.config.ConfigException;
         | 
| @@ -59,10 +60,6 @@ public class BigqueryOutputPlugin | |
| 59 60 | 
             
                    @ConfigDefault("\"CSV\"")
         | 
| 60 61 | 
             
                    public String getSourceFormat();
         | 
| 61 62 |  | 
| 62 | 
            -
                    @Config("is_file_compressed")
         | 
| 63 | 
            -
                    @ConfigDefault("true")
         | 
| 64 | 
            -
                    public boolean getIsFileCompressed();
         | 
| 65 | 
            -
             | 
| 66 63 | 
             
                    @Config("field_delimiter")
         | 
| 67 64 | 
             
                    @ConfigDefault("\",\"")
         | 
| 68 65 | 
             
                    public String getFieldDelimiter();
         | 
| @@ -71,20 +68,13 @@ public class BigqueryOutputPlugin | |
| 71 68 | 
             
                    @ConfigDefault("0")
         | 
| 72 69 | 
             
                    public int getMaxBadrecords();
         | 
| 73 70 |  | 
| 74 | 
            -
                    @Config(" | 
| 75 | 
            -
                    @ConfigDefault(" | 
| 76 | 
            -
                    public  | 
| 71 | 
            +
                    @Config("encoding")
         | 
| 72 | 
            +
                    @ConfigDefault("\"UTF-8\"")
         | 
| 73 | 
            +
                    public String getEncoding();
         | 
| 77 74 |  | 
| 78 | 
            -
                    @Config(" | 
| 75 | 
            +
                    @Config("delete_from_local_when_job_end")
         | 
| 79 76 | 
             
                    @ConfigDefault("false")
         | 
| 80 | 
            -
                    public boolean  | 
| 81 | 
            -
             | 
| 82 | 
            -
                    @Config("bucket")
         | 
| 83 | 
            -
                    public String getBucket();
         | 
| 84 | 
            -
             | 
| 85 | 
            -
                    @Config("remote_path")
         | 
| 86 | 
            -
                    @ConfigDefault("null")
         | 
| 87 | 
            -
                    public Optional<String> getRemotePath();
         | 
| 77 | 
            +
                    public boolean getDeleteFromLocalWhenJobEnd();
         | 
| 88 78 |  | 
| 89 79 | 
             
                    @Config("project")
         | 
| 90 80 | 
             
                    public String getProject();
         | 
| @@ -117,7 +107,6 @@ public class BigqueryOutputPlugin | |
| 117 107 | 
             
                }
         | 
| 118 108 |  | 
| 119 109 | 
             
                private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
         | 
| 120 | 
            -
                private static BigqueryGcsWriter bigQueryGcsWriter;
         | 
| 121 110 | 
             
                private static BigqueryWriter bigQueryWriter;
         | 
| 122 111 |  | 
| 123 112 | 
             
                public ConfigDiff transaction(ConfigSource config, int taskCount,
         | 
| @@ -126,33 +115,25 @@ public class BigqueryOutputPlugin | |
| 126 115 | 
             
                    final PluginTask task = config.loadConfig(PluginTask.class);
         | 
| 127 116 |  | 
| 128 117 | 
             
                    try {
         | 
| 129 | 
            -
                        bigQueryGcsWriter = new BigqueryGcsWriter.Builder(task.getServiceAccountEmail())
         | 
| 130 | 
            -
                                .setP12KeyFilePath(task.getP12KeyfilePath())
         | 
| 131 | 
            -
                                .setApplicationName(task.getApplicationName())
         | 
| 132 | 
            -
                                .setBucket(task.getBucket())
         | 
| 133 | 
            -
                                .setSourceFormat(task.getSourceFormat())
         | 
| 134 | 
            -
                                .setIsFileCompressed(task.getIsFileCompressed())
         | 
| 135 | 
            -
                                .setDeleteFromBucketWhenJobEnd(task.getDeleteFromBucketWhenJobEnd())
         | 
| 136 | 
            -
                                .build();
         | 
| 137 | 
            -
             | 
| 138 118 | 
             
                        bigQueryWriter = new BigqueryWriter.Builder(task.getServiceAccountEmail())
         | 
| 139 119 | 
             
                                .setP12KeyFilePath(task.getP12KeyfilePath())
         | 
| 140 120 | 
             
                                .setApplicationName(task.getApplicationName())
         | 
| 141 121 | 
             
                                .setProject(task.getProject())
         | 
| 142 122 | 
             
                                .setDataset(task.getDataset())
         | 
| 143 | 
            -
                                .setTable(task.getTable())
         | 
| 123 | 
            +
                                .setTable(generateTableName(task.getTable()))
         | 
| 144 124 | 
             
                                .setAutoCreateTable(task.getAutoCreateTable())
         | 
| 145 125 | 
             
                                .setSchemaPath(task.getSchemaPath())
         | 
| 146 | 
            -
                                .setBucket(task.getBucket())
         | 
| 147 126 | 
             
                                .setSourceFormat(task.getSourceFormat())
         | 
| 148 127 | 
             
                                .setFieldDelimiter(task.getFieldDelimiter())
         | 
| 149 128 | 
             
                                .setMaxBadrecords(task.getMaxBadrecords())
         | 
| 129 | 
            +
                                .setEncoding(task.getEncoding())
         | 
| 150 130 | 
             
                                .setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
         | 
| 151 131 | 
             
                                .setJobStatusPollingInterval(task.getJobStatusPollingInterval())
         | 
| 152 132 | 
             
                                .setIsSkipJobResultCheck(task.getIsSkipJobResultCheck())
         | 
| 153 133 | 
             
                                .build();
         | 
| 134 | 
            +
                    } catch (FileNotFoundException ex) {
         | 
| 135 | 
            +
                        throw new ConfigException(ex);
         | 
| 154 136 | 
             
                    } catch (IOException | GeneralSecurityException ex) {
         | 
| 155 | 
            -
                        log.warn("Google Authentication was failed. Please Check your configurations.");
         | 
| 156 137 | 
             
                        throw new ConfigException(ex);
         | 
| 157 138 | 
             
                    }
         | 
| 158 139 | 
             
                    // non-retryable (non-idempotent) output:
         | 
| @@ -165,19 +146,6 @@ public class BigqueryOutputPlugin | |
| 165 146 | 
             
                {
         | 
| 166 147 | 
             
                    control.run(taskSource);
         | 
| 167 148 |  | 
| 168 | 
            -
                    try {
         | 
| 169 | 
            -
                        bigQueryWriter.executeJob();
         | 
| 170 | 
            -
                        // TODO refactor
         | 
| 171 | 
            -
                        if (bigQueryGcsWriter.getDeleteFromBucketWhenJobEnd()) {
         | 
| 172 | 
            -
                            ArrayList<HashMap<String, String>> fileList = bigQueryWriter.getFileList();
         | 
| 173 | 
            -
                            for (HashMap<String, String> file : fileList) {
         | 
| 174 | 
            -
                                bigQueryGcsWriter.deleteFile(file.get("remote_path"), file.get("file_name"));
         | 
| 175 | 
            -
                            }
         | 
| 176 | 
            -
                        }
         | 
| 177 | 
            -
                    } catch (IOException | TimeoutException | BigqueryWriter.JobFailedException ex) {
         | 
| 178 | 
            -
                        log.warn(ex.getMessage());
         | 
| 179 | 
            -
                        throw Throwables.propagate(ex);
         | 
| 180 | 
            -
                    }
         | 
| 181 149 | 
             
                    return Exec.newConfigDiff();
         | 
| 182 150 | 
             
                }
         | 
| 183 151 |  | 
| @@ -196,7 +164,6 @@ public class BigqueryOutputPlugin | |
| 196 164 | 
             
                    final String pathPrefix = task.getPathPrefix();
         | 
| 197 165 | 
             
                    final String sequenceFormat = task.getSequenceFormat();
         | 
| 198 166 | 
             
                    final String pathSuffix = task.getFileNameExtension();
         | 
| 199 | 
            -
                    final Optional<String> remotePath = task.getRemotePath();
         | 
| 200 167 |  | 
| 201 168 | 
             
                    return new TransactionalFileOutput() {
         | 
| 202 169 | 
             
                        private int fileIndex = 0;
         | 
| @@ -217,7 +184,6 @@ public class BigqueryOutputPlugin | |
| 217 184 | 
             
                                }
         | 
| 218 185 | 
             
                                filePath = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + suffix;
         | 
| 219 186 | 
             
                                file = new File(filePath);
         | 
| 220 | 
            -
                                fileName = file.getName();
         | 
| 221 187 |  | 
| 222 188 | 
             
                                String parentPath = file.getParent();
         | 
| 223 189 | 
             
                                File dir = new File(parentPath);
         | 
| @@ -257,18 +223,15 @@ public class BigqueryOutputPlugin | |
| 257 223 | 
             
                        public void finish()
         | 
| 258 224 | 
             
                        {
         | 
| 259 225 | 
             
                            closeFile();
         | 
| 260 | 
            -
                            if ( | 
| 261 | 
            -
                                fileSize = file.length();
         | 
| 226 | 
            +
                            if (filePath != null) {
         | 
| 262 227 | 
             
                                try {
         | 
| 263 | 
            -
                                     | 
| 228 | 
            +
                                    bigQueryWriter.executeLoad(filePath);
         | 
| 264 229 |  | 
| 265 | 
            -
                                    if (task. | 
| 230 | 
            +
                                    if (task.getDeleteFromLocalWhenJobEnd()) {
         | 
| 266 231 | 
             
                                        log.info(String.format("Delete local file [%s]", filePath));
         | 
| 267 232 | 
             
                                        file.delete();
         | 
| 268 233 | 
             
                                    }
         | 
| 269 | 
            -
             | 
| 270 | 
            -
                                    bigQueryWriter.addTask(remotePath, fileName, fileSize);
         | 
| 271 | 
            -
                                } catch (IOException ex) {
         | 
| 234 | 
            +
                                } catch (IOException | TimeoutException | BigqueryWriter.JobFailedException ex) {
         | 
| 272 235 | 
             
                                    throw Throwables.propagate(ex);
         | 
| 273 236 | 
             
                                }
         | 
| 274 237 | 
             
                            }
         | 
| @@ -290,4 +253,13 @@ public class BigqueryOutputPlugin | |
| 290 253 | 
             
                        }
         | 
| 291 254 | 
             
                    };
         | 
| 292 255 | 
             
                }
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                // Parse like "table_%Y_%m"(include pattern or not) format using Java is difficult. So use jRuby.
         | 
| 258 | 
            +
                public String generateTableName(String tableName)
         | 
| 259 | 
            +
                {
         | 
| 260 | 
            +
                    ScriptingContainer jruby = new ScriptingContainer();
         | 
| 261 | 
            +
                    Object result = jruby.runScriptlet("Time.now.strftime('" + tableName + "')");
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                    return result.toString();
         | 
| 264 | 
            +
                }
         | 
| 293 265 | 
             
            }
         | 
| @@ -1,6 +1,11 @@ | |
| 1 1 | 
             
            package org.embulk.output;
         | 
| 2 2 |  | 
| 3 | 
            +
            import java.io.File;
         | 
| 3 4 | 
             
            import java.io.IOException;
         | 
| 5 | 
            +
            import java.io.FileNotFoundException;
         | 
| 6 | 
            +
            import java.io.FileInputStream;
         | 
| 7 | 
            +
            import java.io.BufferedInputStream;
         | 
| 8 | 
            +
            import com.google.api.client.http.InputStreamContent;
         | 
| 4 9 | 
             
            import java.util.ArrayList;
         | 
| 5 10 | 
             
            import java.util.List;
         | 
| 6 11 | 
             
            import java.util.Iterator;
         | 
| @@ -11,14 +16,19 @@ import java.util.concurrent.TimeoutException; | |
| 11 16 | 
             
            import org.apache.commons.lang3.StringUtils;
         | 
| 12 17 | 
             
            import com.google.common.base.Optional;
         | 
| 13 18 | 
             
            import com.google.common.collect.ImmutableSet;
         | 
| 19 | 
            +
            import com.google.common.base.Throwables;
         | 
| 14 20 | 
             
            import java.security.GeneralSecurityException;
         | 
| 15 21 |  | 
| 22 | 
            +
            import com.fasterxml.jackson.databind.ObjectMapper;
         | 
| 23 | 
            +
            import com.fasterxml.jackson.core.type.TypeReference;
         | 
| 24 | 
            +
             | 
| 16 25 | 
             
            import org.embulk.spi.Exec;
         | 
| 17 26 | 
             
            import org.slf4j.Logger;
         | 
| 18 27 |  | 
| 19 28 | 
             
            import com.google.api.services.bigquery.Bigquery;
         | 
| 20 29 | 
             
            import com.google.api.services.bigquery.BigqueryScopes;
         | 
| 21 30 | 
             
            import com.google.api.services.bigquery.Bigquery.Datasets;
         | 
| 31 | 
            +
            import com.google.api.services.bigquery.Bigquery.Tables;
         | 
| 22 32 | 
             
            import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
         | 
| 23 33 | 
             
            import com.google.api.services.bigquery.Bigquery.Jobs.GetQueryResults;
         | 
| 24 34 | 
             
            import com.google.api.services.bigquery.model.Job;
         | 
| @@ -28,11 +38,19 @@ import com.google.api.services.bigquery.model.JobStatus; | |
| 28 38 | 
             
            import com.google.api.services.bigquery.model.JobStatistics;
         | 
| 29 39 | 
             
            import com.google.api.services.bigquery.model.JobReference;
         | 
| 30 40 | 
             
            import com.google.api.services.bigquery.model.DatasetList;
         | 
| 41 | 
            +
            import com.google.api.services.bigquery.model.Table;
         | 
| 42 | 
            +
            import com.google.api.services.bigquery.model.TableList;
         | 
| 31 43 | 
             
            import com.google.api.services.bigquery.model.TableSchema;
         | 
| 32 44 | 
             
            import com.google.api.services.bigquery.model.TableReference;
         | 
| 33 45 | 
             
            import com.google.api.services.bigquery.model.TableFieldSchema;
         | 
| 34 46 | 
             
            import com.google.api.services.bigquery.model.TableCell;
         | 
| 35 47 | 
             
            import com.google.api.services.bigquery.model.TableRow;
         | 
| 48 | 
            +
            import com.google.api.services.bigquery.model.ErrorProto;
         | 
| 49 | 
            +
            import com.google.api.client.googleapis.json.GoogleJsonResponseException;
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            import com.google.api.client.googleapis.media.MediaHttpUploader;
         | 
| 52 | 
            +
            import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
         | 
| 53 | 
            +
            import com.google.api.client.googleapis.media.MediaHttpUploader.UploadState;
         | 
| 36 54 |  | 
| 37 55 | 
             
            public class BigqueryWriter
         | 
| 38 56 | 
             
            {
         | 
| @@ -43,43 +61,58 @@ public class BigqueryWriter | |
| 43 61 | 
             
                private final String table;
         | 
| 44 62 | 
             
                private final boolean autoCreateTable;
         | 
| 45 63 | 
             
                private final Optional<String> schemaPath;
         | 
| 46 | 
            -
                private final  | 
| 64 | 
            +
                private final TableSchema tableSchema;
         | 
| 47 65 | 
             
                private final String sourceFormat;
         | 
| 48 66 | 
             
                private final String fieldDelimiter;
         | 
| 49 67 | 
             
                private final int maxBadrecords;
         | 
| 68 | 
            +
                private final String encoding;
         | 
| 50 69 | 
             
                private final long jobStatusMaxPollingTime;
         | 
| 51 70 | 
             
                private final long jobStatusPollingInterval;
         | 
| 52 71 | 
             
                private final boolean isSkipJobResultCheck;
         | 
| 53 72 | 
             
                private final Bigquery bigQueryClient;
         | 
| 54 | 
            -
                private final EmbulkBigqueryTask writerTask;
         | 
| 55 73 |  | 
| 56 | 
            -
                public BigqueryWriter(Builder builder) throws IOException, GeneralSecurityException
         | 
| 74 | 
            +
                public BigqueryWriter(Builder builder) throws FileNotFoundException, IOException, GeneralSecurityException
         | 
| 57 75 | 
             
                {
         | 
| 58 76 | 
             
                    this.project = builder.project;
         | 
| 59 77 | 
             
                    this.dataset = builder.dataset;
         | 
| 60 78 | 
             
                    this.table = builder.table;
         | 
| 61 79 | 
             
                    this.autoCreateTable = builder.autoCreateTable;
         | 
| 62 80 | 
             
                    this.schemaPath = builder.schemaPath;
         | 
| 63 | 
            -
                    this.bucket = builder.bucket;
         | 
| 64 81 | 
             
                    this.sourceFormat = builder.sourceFormat.toUpperCase();
         | 
| 65 82 | 
             
                    this.fieldDelimiter = builder.fieldDelimiter;
         | 
| 66 83 | 
             
                    this.maxBadrecords = builder.maxBadrecords;
         | 
| 84 | 
            +
                    this.encoding = builder.encoding.toUpperCase();
         | 
| 67 85 | 
             
                    this.jobStatusMaxPollingTime = builder.jobStatusMaxPollingTime;
         | 
| 68 86 | 
             
                    this.jobStatusPollingInterval = builder.jobStatusPollingInterval;
         | 
| 69 87 | 
             
                    this.isSkipJobResultCheck = builder.isSkipJobResultCheck;
         | 
| 70 88 |  | 
| 71 89 | 
             
                    BigqueryAuthentication auth = new BigqueryAuthentication(builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
         | 
| 72 90 | 
             
                    this.bigQueryClient = auth.getBigqueryClient();
         | 
| 73 | 
            -
             | 
| 91 | 
            +
             | 
| 92 | 
            +
                    checkConfig();
         | 
| 93 | 
            +
                    if (autoCreateTable) {
         | 
| 94 | 
            +
                        this.tableSchema = createTableSchema(builder.schemaPath);
         | 
| 95 | 
            +
                    } else {
         | 
| 96 | 
            +
                        this.tableSchema = null;
         | 
| 97 | 
            +
                    }
         | 
| 74 98 | 
             
                }
         | 
| 75 99 |  | 
| 76 100 | 
             
                private String getJobStatus(JobReference jobRef) throws JobFailedException
         | 
| 77 101 | 
             
                {
         | 
| 78 102 | 
             
                    try {
         | 
| 79 103 | 
             
                        Job job = bigQueryClient.jobs().get(project, jobRef.getJobId()).execute();
         | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 104 | 
            +
             | 
| 105 | 
            +
                        ErrorProto fatalError = job.getStatus().getErrorResult();
         | 
| 106 | 
            +
                        if (fatalError != null) {
         | 
| 107 | 
            +
                            throw new JobFailedException(String.format("Job failed. job id:[%s] reason:[%s][%s] status:[FAILED]", jobRef.getJobId(), fatalError.getReason(), fatalError.getMessage()));
         | 
| 108 | 
            +
                        }
         | 
| 109 | 
            +
                        List<ErrorProto> errors = job.getStatus().getErrors();
         | 
| 110 | 
            +
                        if (errors != null) {
         | 
| 111 | 
            +
                            for (ErrorProto error : errors) {
         | 
| 112 | 
            +
                                log.warn(String.format("Error: job id:[%s] reason[%s][%s] location:[%s]", jobRef.getJobId(), error.getReason(), error.getMessage(), error.getLocation()));
         | 
| 113 | 
            +
                            }
         | 
| 82 114 | 
             
                        }
         | 
| 115 | 
            +
             | 
| 83 116 | 
             
                        String jobStatus = job.getStatus().getState();
         | 
| 84 117 | 
             
                        if (jobStatus.equals("DONE")) {
         | 
| 85 118 | 
             
                            JobStatistics statistics = job.getStatistics();
         | 
| @@ -117,59 +150,68 @@ public class BigqueryWriter | |
| 117 150 | 
             
                    }
         | 
| 118 151 | 
             
                }
         | 
| 119 152 |  | 
| 120 | 
            -
                public void  | 
| 121 | 
            -
                {
         | 
| 122 | 
            -
                    // TODO: refactor
         | 
| 123 | 
            -
                    ArrayList<ArrayList<HashMap<String, String>>> taskList = writerTask.createJobList();
         | 
| 124 | 
            -
                    for (ArrayList<HashMap<String, String>> task : taskList) {
         | 
| 125 | 
            -
                        Job job = createJob(task);
         | 
| 126 | 
            -
                        // TODO: multi-threading
         | 
| 127 | 
            -
                        new EmbulkBigqueryJob(job).call();
         | 
| 128 | 
            -
                    }
         | 
| 129 | 
            -
                }
         | 
| 130 | 
            -
             | 
| 131 | 
            -
                private Job createJob(ArrayList<HashMap<String, String>> task)
         | 
| 153 | 
            +
                public void executeLoad(String localFilePath) throws GoogleJsonResponseException, IOException, TimeoutException, JobFailedException
         | 
| 132 154 | 
             
                {
         | 
| 133 155 | 
             
                    log.info(String.format("Job preparing... project:%s dataset:%s table:%s", project, dataset, table));
         | 
| 134 156 |  | 
| 135 157 | 
             
                    Job job = new Job();
         | 
| 158 | 
            +
                    JobReference jobRef = null;
         | 
| 136 159 | 
             
                    JobConfiguration jobConfig = new JobConfiguration();
         | 
| 137 160 | 
             
                    JobConfigurationLoad loadConfig = new JobConfigurationLoad();
         | 
| 138 161 | 
             
                    jobConfig.setLoad(loadConfig);
         | 
| 139 162 | 
             
                    job.setConfiguration(jobConfig);
         | 
| 140 163 |  | 
| 141 164 | 
             
                    loadConfig.setAllowQuotedNewlines(false);
         | 
| 165 | 
            +
                    loadConfig.setEncoding(encoding);
         | 
| 166 | 
            +
                    loadConfig.setMaxBadRecords(maxBadrecords);
         | 
| 142 167 | 
             
                    if (sourceFormat.equals("NEWLINE_DELIMITED_JSON")) {
         | 
| 143 168 | 
             
                        loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
         | 
| 144 169 | 
             
                    } else {
         | 
| 145 170 | 
             
                        loadConfig.setFieldDelimiter(fieldDelimiter);
         | 
| 146 171 | 
             
                    }
         | 
| 172 | 
            +
                    loadConfig.setWriteDisposition("WRITE_APPEND");
         | 
| 147 173 | 
             
                    if (autoCreateTable) {
         | 
| 148 | 
            -
                        loadConfig.setSchema( | 
| 149 | 
            -
                        loadConfig.setWriteDisposition("WRITE_EMPTY");
         | 
| 174 | 
            +
                        loadConfig.setSchema(tableSchema);
         | 
| 150 175 | 
             
                        loadConfig.setCreateDisposition("CREATE_IF_NEEDED");
         | 
| 151 | 
            -
                        log.info(String.format("table:[%s] will be create | 
| 176 | 
            +
                        log.info(String.format("table:[%s] will be create if not exists", table));
         | 
| 152 177 | 
             
                    } else {
         | 
| 153 | 
            -
                        loadConfig.setWriteDisposition("WRITE_APPEND");
         | 
| 154 178 | 
             
                        loadConfig.setCreateDisposition("CREATE_NEVER");
         | 
| 155 179 | 
             
                    }
         | 
| 156 | 
            -
                    loadConfig.setMaxBadRecords(maxBadrecords);
         | 
| 157 180 |  | 
| 158 | 
            -
                     | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
                    }
         | 
| 166 | 
            -
                    loadConfig.setSourceUris(sources);
         | 
| 167 | 
            -
                    loadConfig.setDestinationTable(getTableReference());
         | 
| 181 | 
            +
                    loadConfig.setDestinationTable(createTableReference());
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                    File file = new File(localFilePath);
         | 
| 184 | 
            +
                    InputStreamContent mediaContent = new InputStreamContent("application/octet-stream",
         | 
| 185 | 
            +
                            new BufferedInputStream(
         | 
| 186 | 
            +
                                    new FileInputStream(file)));
         | 
| 187 | 
            +
                    mediaContent.setLength(file.length());
         | 
| 168 188 |  | 
| 169 | 
            -
                     | 
| 189 | 
            +
                    Insert insert = bigQueryClient.jobs().insert(project, job, mediaContent);
         | 
| 190 | 
            +
                    insert.setProjectId(project);
         | 
| 191 | 
            +
                    insert.setDisableGZipContent(true);
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                    // @see https://code.google.com/p/google-api-java-client/wiki/MediaUpload
         | 
| 194 | 
            +
                    UploadProgressListener listner = new UploadProgressListener();
         | 
| 195 | 
            +
                    listner.setFileName(localFilePath);
         | 
| 196 | 
            +
                    insert.getMediaHttpUploader()
         | 
| 197 | 
            +
                            .setProgressListener(listner)
         | 
| 198 | 
            +
                            .setDirectUploadEnabled(false);
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                    try {
         | 
| 201 | 
            +
                        jobRef = insert.execute().getJobReference();
         | 
| 202 | 
            +
                    } catch (Exception ex) {
         | 
| 203 | 
            +
                        log.warn("Job execution was failed. Please check your settings or data... like data matches schema");
         | 
| 204 | 
            +
                        throw Throwables.propagate(ex);
         | 
| 205 | 
            +
                    }
         | 
| 206 | 
            +
                    log.info(String.format("Job executed. job id:[%s] file:[%s]", jobRef.getJobId(), localFilePath));
         | 
| 207 | 
            +
                    if (isSkipJobResultCheck) {
         | 
| 208 | 
            +
                        log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
         | 
| 209 | 
            +
                    } else {
         | 
| 210 | 
            +
                        getJobStatusUntilDone(jobRef);
         | 
| 211 | 
            +
                    }
         | 
| 170 212 | 
             
                }
         | 
| 171 213 |  | 
| 172 | 
            -
                private TableReference  | 
| 214 | 
            +
                private TableReference createTableReference()
         | 
| 173 215 | 
             
                {
         | 
| 174 216 | 
             
                    return new TableReference()
         | 
| 175 217 | 
             
                            .setProjectId(project)
         | 
| @@ -177,135 +219,78 @@ public class BigqueryWriter | |
| 177 219 | 
             
                            .setTableId(table);
         | 
| 178 220 | 
             
                }
         | 
| 179 221 |  | 
| 180 | 
            -
                private TableSchema  | 
| 222 | 
            +
                private TableSchema createTableSchema(Optional<String> schemaPath) throws FileNotFoundException, IOException
         | 
| 181 223 | 
             
                {
         | 
| 182 | 
            -
                     | 
| 183 | 
            -
                     | 
| 184 | 
            -
                     | 
| 185 | 
            -
                     | 
| 186 | 
            -
             | 
| 187 | 
            -
             | 
| 188 | 
            -
                         | 
| 189 | 
            -
             | 
| 190 | 
            -
             | 
| 191 | 
            -
             | 
| 224 | 
            +
                    String path = schemaPath.orNull();
         | 
| 225 | 
            +
                    File file = new File(path);
         | 
| 226 | 
            +
                    FileInputStream stream = null;
         | 
| 227 | 
            +
                    try {
         | 
| 228 | 
            +
                        stream = new FileInputStream(file);
         | 
| 229 | 
            +
                        ObjectMapper mapper = new ObjectMapper();
         | 
| 230 | 
            +
                        List<TableFieldSchema> fields = mapper.readValue(stream, new TypeReference<List<TableFieldSchema>>() {});
         | 
| 231 | 
            +
                        TableSchema tableSchema = new TableSchema().setFields(fields);
         | 
| 232 | 
            +
                        return tableSchema;
         | 
| 233 | 
            +
                    } finally {
         | 
| 234 | 
            +
                        if (stream != null) {
         | 
| 235 | 
            +
                            stream.close();
         | 
| 236 | 
            +
                        }
         | 
| 192 237 | 
             
                    }
         | 
| 193 | 
            -
                    */
         | 
| 194 | 
            -
             | 
| 195 | 
            -
                    tableSchema.setFields(fields);
         | 
| 196 | 
            -
                    return tableSchema;
         | 
| 197 238 | 
             
                }
         | 
| 198 239 |  | 
| 199 | 
            -
                 | 
| 240 | 
            +
                public boolean isExistTable(String tableName) throws IOException
         | 
| 200 241 | 
             
                {
         | 
| 201 | 
            -
                     | 
| 202 | 
            -
                     | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
             | 
| 206 | 
            -
                        path = bucket + "/" + StringUtils.join(pathList) + "/" + fileName;
         | 
| 242 | 
            +
                    Tables tableRequest = bigQueryClient.tables();
         | 
| 243 | 
            +
                    try {
         | 
| 244 | 
            +
                        Table tableData = tableRequest.get(project, dataset, tableName).execute();
         | 
| 245 | 
            +
                    } catch (GoogleJsonResponseException ex) {
         | 
| 246 | 
            +
                        return false;
         | 
| 207 247 | 
             
                    }
         | 
| 208 | 
            -
                    return  | 
| 209 | 
            -
                }
         | 
| 210 | 
            -
             | 
| 211 | 
            -
                public void addTask(Optional<String> remotePath, String fileName, long fileSize)
         | 
| 212 | 
            -
                {
         | 
| 213 | 
            -
                    writerTask.addTaskFile(remotePath, fileName, fileSize);
         | 
| 214 | 
            -
                }
         | 
| 215 | 
            -
             | 
| 216 | 
            -
                public ArrayList<HashMap<String, String>> getFileList()
         | 
| 217 | 
            -
                {
         | 
| 218 | 
            -
                    return writerTask.getFileList();
         | 
| 248 | 
            +
                    return true;
         | 
| 219 249 | 
             
                }
         | 
| 220 250 |  | 
| 221 | 
            -
                 | 
| 251 | 
            +
                public void checkConfig() throws FileNotFoundException, IOException
         | 
| 222 252 | 
             
                {
         | 
| 223 | 
            -
                     | 
| 224 | 
            -
             | 
| 225 | 
            -
             | 
| 226 | 
            -
                    {
         | 
| 227 | 
            -
                        this.job = job;
         | 
| 228 | 
            -
                    }
         | 
| 229 | 
            -
             | 
| 230 | 
            -
                    public Void call() throws IOException, TimeoutException, JobFailedException
         | 
| 231 | 
            -
                    {
         | 
| 232 | 
            -
                        Insert insert = bigQueryClient.jobs().insert(project, job);
         | 
| 233 | 
            -
                        insert.setProjectId(project);
         | 
| 234 | 
            -
                        JobReference jobRef = insert.execute().getJobReference();
         | 
| 235 | 
            -
                        log.info(String.format("Job executed. job id:[%s]", jobRef.getJobId()));
         | 
| 236 | 
            -
                        if (isSkipJobResultCheck) {
         | 
| 237 | 
            -
                            log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
         | 
| 253 | 
            +
                    if (autoCreateTable) {
         | 
| 254 | 
            +
                        if (!schemaPath.isPresent()) {
         | 
| 255 | 
            +
                            throw new IOException("schema_path is empty");
         | 
| 238 256 | 
             
                        } else {
         | 
| 239 | 
            -
                             | 
| 257 | 
            +
                            File file = new File(schemaPath.orNull());
         | 
| 258 | 
            +
                            if (!file.exists()) {
         | 
| 259 | 
            +
                                throw new FileNotFoundException("Can not load schema file.");
         | 
| 260 | 
            +
                            }
         | 
| 261 | 
            +
                        }
         | 
| 262 | 
            +
                    } else {
         | 
| 263 | 
            +
                        if (!isExistTable(table)) {
         | 
| 264 | 
            +
                            throw new IOException(String.format("table [%s] is not exists", table));
         | 
| 240 265 | 
             
                        }
         | 
| 241 | 
            -
                        return null;
         | 
| 242 266 | 
             
                    }
         | 
| 243 267 | 
             
                }
         | 
| 244 268 |  | 
| 245 | 
            -
                private class  | 
| 269 | 
            +
                private class UploadProgressListener implements MediaHttpUploaderProgressListener
         | 
| 246 270 | 
             
                {
         | 
| 247 | 
            -
                     | 
| 248 | 
            -
                    private final long MAX_SIZE_PER_LOAD_JOB = 1000 * 1024 * 1024 * 1024L; // 1TB
         | 
| 249 | 
            -
                    private final int MAX_NUMBER_OF_FILES_PER_LOAD_JOB = 10000;
         | 
| 271 | 
            +
                    private String fileName;
         | 
| 250 272 |  | 
| 251 | 
            -
                     | 
| 252 | 
            -
                     | 
| 253 | 
            -
             | 
| 254 | 
            -
                    public void addTaskFile(Optional<String> remotePath, String fileName, long fileSize)
         | 
| 273 | 
            +
                    @Override
         | 
| 274 | 
            +
                    public void progressChanged(MediaHttpUploader uploader) throws IOException
         | 
| 255 275 | 
             
                    {
         | 
| 256 | 
            -
                         | 
| 257 | 
            -
             | 
| 258 | 
            -
             | 
| 259 | 
            -
             | 
| 260 | 
            -
                             | 
| 261 | 
            -
             | 
| 262 | 
            -
             | 
| 263 | 
            -
             | 
| 264 | 
            -
             | 
| 265 | 
            -
             | 
| 266 | 
            -
             | 
| 267 | 
            -
             | 
| 268 | 
            -
                    {
         | 
| 269 | 
            -
                        long currentBundleSize = 0;
         | 
| 270 | 
            -
                        int currentFileCount = 0;
         | 
| 271 | 
            -
                        ArrayList<HashMap<String, String>> job = new ArrayList<HashMap<String, String>>();
         | 
| 272 | 
            -
                        for (HashMap<String, String> task : taskList) {
         | 
| 273 | 
            -
                            boolean isNeedNextJobList = false;
         | 
| 274 | 
            -
                            long fileSize = Long.valueOf(task.get("file_size")).longValue();
         | 
| 275 | 
            -
             | 
| 276 | 
            -
                            if (currentBundleSize + fileSize > MAX_SIZE_PER_LOAD_JOB) {
         | 
| 277 | 
            -
                                isNeedNextJobList = true;
         | 
| 278 | 
            -
                            }
         | 
| 279 | 
            -
             | 
| 280 | 
            -
                            if (currentFileCount >= MAX_NUMBER_OF_FILES_PER_LOAD_JOB) {
         | 
| 281 | 
            -
                                isNeedNextJobList = true;
         | 
| 282 | 
            -
                            }
         | 
| 283 | 
            -
             | 
| 284 | 
            -
                            if (isNeedNextJobList) {
         | 
| 285 | 
            -
                                jobList.add(job);
         | 
| 286 | 
            -
                                job = new ArrayList<HashMap<String, String>>();
         | 
| 287 | 
            -
                                job.add(task);
         | 
| 288 | 
            -
                                currentBundleSize = 0;
         | 
| 289 | 
            -
                            } else {
         | 
| 290 | 
            -
                                job.add(task);
         | 
| 291 | 
            -
                            }
         | 
| 292 | 
            -
                            currentBundleSize += fileSize;
         | 
| 293 | 
            -
                            currentFileCount++;
         | 
| 294 | 
            -
             | 
| 295 | 
            -
                            log.debug(String.format("currentBundleSize:%s currentFileCount:%s", currentBundleSize, currentFileCount));
         | 
| 296 | 
            -
                            log.debug(String.format("fileSize:%s, MAX_SIZE_PER_LOAD_JOB:%s MAX_NUMBER_OF_FILES_PER_LOAD_JOB:%s",
         | 
| 297 | 
            -
                                    fileSize, MAX_SIZE_PER_LOAD_JOB, MAX_NUMBER_OF_FILES_PER_LOAD_JOB));
         | 
| 298 | 
            -
             | 
| 299 | 
            -
                        }
         | 
| 300 | 
            -
                        if (job.size() > 0) {
         | 
| 301 | 
            -
                            jobList.add(job);
         | 
| 276 | 
            +
                        switch (uploader.getUploadState()) {
         | 
| 277 | 
            +
                            case INITIATION_STARTED:
         | 
| 278 | 
            +
                                log.info(String.format("Upload start [%s]", fileName));
         | 
| 279 | 
            +
                                break;
         | 
| 280 | 
            +
                            case INITIATION_COMPLETE:
         | 
| 281 | 
            +
                                //log.info(String.format("Upload initiation completed file [%s]", fileName));
         | 
| 282 | 
            +
                                break;
         | 
| 283 | 
            +
                            case MEDIA_IN_PROGRESS:
         | 
| 284 | 
            +
                                log.debug(String.format("Uploading [%s] progress %3.0f", fileName, uploader.getProgress() * 100) + "%");
         | 
| 285 | 
            +
                                break;
         | 
| 286 | 
            +
                            case MEDIA_COMPLETE:
         | 
| 287 | 
            +
                                log.info(String.format("Upload completed [%s]", fileName));
         | 
| 302 288 | 
             
                        }
         | 
| 303 | 
            -
                        return jobList;
         | 
| 304 289 | 
             
                    }
         | 
| 305 290 |  | 
| 306 | 
            -
                    public  | 
| 291 | 
            +
                    public void setFileName(String fileName)
         | 
| 307 292 | 
             
                    {
         | 
| 308 | 
            -
                         | 
| 293 | 
            +
                        this.fileName = fileName;
         | 
| 309 294 | 
             
                    }
         | 
| 310 295 | 
             
                }
         | 
| 311 296 |  | 
| @@ -319,10 +304,10 @@ public class BigqueryWriter | |
| 319 304 | 
             
                    private String table;
         | 
| 320 305 | 
             
                    private boolean autoCreateTable;
         | 
| 321 306 | 
             
                    private Optional<String> schemaPath;
         | 
| 322 | 
            -
                    private String bucket;
         | 
| 323 307 | 
             
                    private String sourceFormat;
         | 
| 324 308 | 
             
                    private String fieldDelimiter;
         | 
| 325 309 | 
             
                    private int maxBadrecords;
         | 
| 310 | 
            +
                    private String encoding;
         | 
| 326 311 | 
             
                    private int jobStatusMaxPollingTime;
         | 
| 327 312 | 
             
                    private int jobStatusPollingInterval;
         | 
| 328 313 | 
             
                    private boolean isSkipJobResultCheck;
         | 
| @@ -375,12 +360,6 @@ public class BigqueryWriter | |
| 375 360 | 
             
                        return this;
         | 
| 376 361 | 
             
                    }
         | 
| 377 362 |  | 
| 378 | 
            -
                    public Builder setBucket(String bucket)
         | 
| 379 | 
            -
                    {
         | 
| 380 | 
            -
                        this.bucket = bucket;
         | 
| 381 | 
            -
                        return this;
         | 
| 382 | 
            -
                    }
         | 
| 383 | 
            -
             | 
| 384 363 | 
             
                    public Builder setSourceFormat(String sourceFormat)
         | 
| 385 364 | 
             
                    {
         | 
| 386 365 | 
             
                        this.sourceFormat = sourceFormat;
         | 
| @@ -399,6 +378,12 @@ public class BigqueryWriter | |
| 399 378 | 
             
                        return this;
         | 
| 400 379 | 
             
                    }
         | 
| 401 380 |  | 
| 381 | 
            +
                    public Builder setEncoding(String encoding)
         | 
| 382 | 
            +
                    {
         | 
| 383 | 
            +
                        this.encoding = encoding;
         | 
| 384 | 
            +
                        return this;
         | 
| 385 | 
            +
                    }
         | 
| 386 | 
            +
             | 
| 402 387 | 
             
                    public Builder setJobStatusMaxPollingTime(int jobStatusMaxPollingTime)
         | 
| 403 388 | 
             
                    {
         | 
| 404 389 | 
             
                        this.jobStatusMaxPollingTime = jobStatusMaxPollingTime;
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: embulk-output-bigquery
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Satoshi Akama
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2015- | 
| 11 | 
            +
            date: 2015-04-01 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -56,19 +56,16 @@ files: | |
| 56 56 | 
             
            - lib/embulk/output/bigquery.rb
         | 
| 57 57 | 
             
            - settings.gradle
         | 
| 58 58 | 
             
            - src/main/java/org/embulk/output/BigqueryAuthentication.java
         | 
| 59 | 
            -
            - src/main/java/org/embulk/output/BigqueryGcsWriter.java
         | 
| 60 59 | 
             
            - src/main/java/org/embulk/output/BigqueryOutputPlugin.java
         | 
| 61 60 | 
             
            - src/main/java/org/embulk/output/BigqueryWriter.java
         | 
| 62 61 | 
             
            - src/test/java/org/embulk/output/TestBigqueryAuthentication.java
         | 
| 63 | 
            -
            - src/test/java/org/embulk/output/TestBigqueryGcsWriter.java
         | 
| 64 62 | 
             
            - src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java
         | 
| 65 63 | 
             
            - src/test/java/org/embulk/output/TestBigqueryWriter.java
         | 
| 66 64 | 
             
            - classpath/commons-codec-1.3.jar
         | 
| 67 65 | 
             
            - classpath/commons-logging-1.1.1.jar
         | 
| 68 | 
            -
            - classpath/embulk-output-bigquery-0.1. | 
| 66 | 
            +
            - classpath/embulk-output-bigquery-0.1.2.jar
         | 
| 69 67 | 
             
            - classpath/google-api-client-1.19.1.jar
         | 
| 70 68 | 
             
            - classpath/google-api-services-bigquery-v2-rev193-1.19.1.jar
         | 
| 71 | 
            -
            - classpath/google-api-services-storage-v1-rev27-1.19.1.jar
         | 
| 72 69 | 
             
            - classpath/google-http-client-1.19.0.jar
         | 
| 73 70 | 
             
            - classpath/google-http-client-jackson2-1.19.0.jar
         | 
| 74 71 | 
             
            - classpath/google-oauth-client-1.19.0.jar
         | 
| @@ -76,7 +73,6 @@ files: | |
| 76 73 | 
             
            - classpath/httpclient-4.0.1.jar
         | 
| 77 74 | 
             
            - classpath/httpcore-4.0.1.jar
         | 
| 78 75 | 
             
            - classpath/jsr305-1.3.9.jar
         | 
| 79 | 
            -
            - classpath/mime-util-2.1.3.jar
         | 
| 80 76 | 
             
            homepage: https://github.com/sakama/embulk-output-bigquery
         | 
| 81 77 | 
             
            licenses:
         | 
| 82 78 | 
             
            - Apache-2.0
         | 
| @@ -1,201 +0,0 @@ | |
| 1 | 
            -
            package org.embulk.output;
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            import java.io.File;
         | 
| 4 | 
            -
            import java.io.FileNotFoundException;
         | 
| 5 | 
            -
            import java.io.FileInputStream;
         | 
| 6 | 
            -
            import java.io.IOException;
         | 
| 7 | 
            -
            import java.util.ArrayList;
         | 
| 8 | 
            -
            import java.util.List;
         | 
| 9 | 
            -
            import java.util.Collection;
         | 
| 10 | 
            -
            import java.util.Iterator;
         | 
| 11 | 
            -
            import java.util.IllegalFormatException;
         | 
| 12 | 
            -
            import java.nio.charset.Charset;
         | 
| 13 | 
            -
            import java.nio.charset.StandardCharsets;
         | 
| 14 | 
            -
            import com.google.common.base.Optional;
         | 
| 15 | 
            -
            import com.google.common.collect.ImmutableList;
         | 
| 16 | 
            -
            //import eu.medsea.mimeutil.MimeType;
         | 
| 17 | 
            -
            //import eu.medsea.mimeutil.MimeUtil;
         | 
| 18 | 
            -
            //import eu.medsea.mimeutil.detector.MimeDetector;
         | 
| 19 | 
            -
            import org.apache.commons.lang3.StringUtils;
         | 
| 20 | 
            -
            import org.apache.commons.codec.binary.Base64;
         | 
| 21 | 
            -
            import java.security.GeneralSecurityException;
         | 
| 22 | 
            -
             | 
| 23 | 
            -
            import org.embulk.spi.Exec;
         | 
| 24 | 
            -
            import org.slf4j.Logger;
         | 
| 25 | 
            -
             | 
| 26 | 
            -
            import com.google.api.services.storage.Storage;
         | 
| 27 | 
            -
            import com.google.api.services.storage.StorageScopes;
         | 
| 28 | 
            -
            import com.google.api.services.storage.model.Bucket;
         | 
| 29 | 
            -
            import com.google.api.services.storage.model.Objects;
         | 
| 30 | 
            -
            import com.google.api.services.storage.model.StorageObject;
         | 
| 31 | 
            -
             | 
| 32 | 
            -
            import com.google.api.client.http.InputStreamContent;
         | 
| 33 | 
            -
             | 
| 34 | 
            -
            public class BigqueryGcsWriter
         | 
| 35 | 
            -
            {
         | 
| 36 | 
            -
             | 
| 37 | 
            -
                private final Logger log = Exec.getLogger(BigqueryGcsWriter.class);
         | 
| 38 | 
            -
                private final String bucket;
         | 
| 39 | 
            -
                private final String sourceFormat;
         | 
| 40 | 
            -
                private final boolean isFileCompressed;
         | 
| 41 | 
            -
                private final boolean deleteFromBucketWhenJobEnd;
         | 
| 42 | 
            -
                private Storage storageClient;
         | 
| 43 | 
            -
             | 
| 44 | 
            -
                public BigqueryGcsWriter(Builder builder) throws IOException, GeneralSecurityException
         | 
| 45 | 
            -
                {
         | 
| 46 | 
            -
                    this.bucket = builder.bucket;
         | 
| 47 | 
            -
                    this.sourceFormat = builder.sourceFormat.toUpperCase();
         | 
| 48 | 
            -
                    this.isFileCompressed = builder.isFileCompressed;
         | 
| 49 | 
            -
                    this.deleteFromBucketWhenJobEnd = builder.deleteFromBucketWhenJobEnd;
         | 
| 50 | 
            -
             | 
| 51 | 
            -
                    BigqueryAuthentication auth = new BigqueryAuthentication(builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
         | 
| 52 | 
            -
                    this.storageClient = auth.getGcsClient();
         | 
| 53 | 
            -
                }
         | 
| 54 | 
            -
             | 
| 55 | 
            -
                public void uploadFile(String localFilePath, String fileName, Optional<String> remotePath) throws IOException
         | 
| 56 | 
            -
                {
         | 
| 57 | 
            -
                    FileInputStream stream = null;
         | 
| 58 | 
            -
             | 
| 59 | 
            -
                    try {
         | 
| 60 | 
            -
                        String path;
         | 
| 61 | 
            -
                        if (remotePath.isPresent()) {
         | 
| 62 | 
            -
                            path = remotePath.get();
         | 
| 63 | 
            -
                        } else {
         | 
| 64 | 
            -
                            path = "";
         | 
| 65 | 
            -
                        }
         | 
| 66 | 
            -
                        String gcsPath = getRemotePath(path, fileName);
         | 
| 67 | 
            -
                        StorageObject objectMetadata = new StorageObject().setName(gcsPath);
         | 
| 68 | 
            -
                        log.info(String.format("Uploading file [%s] to [gs://%s/%s]", localFilePath, bucket, gcsPath));
         | 
| 69 | 
            -
             | 
| 70 | 
            -
                        File file = new File(localFilePath);
         | 
| 71 | 
            -
                        stream = new FileInputStream(file);
         | 
| 72 | 
            -
                        InputStreamContent content = new InputStreamContent(getContentType(), stream);
         | 
| 73 | 
            -
                        Storage.Objects.Insert insertObject = storageClient.objects().insert(bucket, objectMetadata, content);
         | 
| 74 | 
            -
                        insertObject.setDisableGZipContent(true);
         | 
| 75 | 
            -
             | 
| 76 | 
            -
                        StorageObject response = insertObject.execute();
         | 
| 77 | 
            -
                        log.info(String.format("Upload completed [%s] to [gs://%s/%s]", localFilePath, bucket, gcsPath));
         | 
| 78 | 
            -
                    } finally {
         | 
| 79 | 
            -
                        stream.close();
         | 
| 80 | 
            -
                    }
         | 
| 81 | 
            -
                }
         | 
| 82 | 
            -
             | 
| 83 | 
            -
                private String getRemotePath(String remotePath, String fileName)
         | 
| 84 | 
            -
                {
         | 
| 85 | 
            -
                    if (remotePath.isEmpty()) {
         | 
| 86 | 
            -
                        return fileName;
         | 
| 87 | 
            -
                    }
         | 
| 88 | 
            -
                    String[] pathList = StringUtils.split(remotePath, '/');
         | 
| 89 | 
            -
                    String path = StringUtils.join(pathList) + "/";
         | 
| 90 | 
            -
                    if (!path.endsWith("/")) {
         | 
| 91 | 
            -
                        path = path + "/";
         | 
| 92 | 
            -
                    }
         | 
| 93 | 
            -
                    return path + fileName;
         | 
| 94 | 
            -
                }
         | 
| 95 | 
            -
             | 
| 96 | 
            -
                public void deleteFile(String remotePath, String fileName) throws IOException
         | 
| 97 | 
            -
                {
         | 
| 98 | 
            -
                    String path = getRemotePath(remotePath, fileName);
         | 
| 99 | 
            -
                    storageClient.objects().delete(bucket, path).execute();
         | 
| 100 | 
            -
                    log.info(String.format("Delete remote file [gs://%s/%s]", bucket, path));
         | 
| 101 | 
            -
                }
         | 
| 102 | 
            -
             | 
| 103 | 
            -
                public boolean getDeleteFromBucketWhenJobEnd()
         | 
| 104 | 
            -
                {
         | 
| 105 | 
            -
                    return this.deleteFromBucketWhenJobEnd;
         | 
| 106 | 
            -
                }
         | 
| 107 | 
            -
             | 
| 108 | 
            -
                private String getContentType()
         | 
| 109 | 
            -
                {
         | 
| 110 | 
            -
                    if (isFileCompressed) {
         | 
| 111 | 
            -
                        return "application/x-gzip";
         | 
| 112 | 
            -
                    } else {
         | 
| 113 | 
            -
                        if (sourceFormat.equals("NEWLINE_DELIMITED_JSON)")) {
         | 
| 114 | 
            -
                            return "application/json";
         | 
| 115 | 
            -
                        } else {
         | 
| 116 | 
            -
                            return "text/csv";
         | 
| 117 | 
            -
                        }
         | 
| 118 | 
            -
                    }
         | 
| 119 | 
            -
                }
         | 
| 120 | 
            -
             | 
| 121 | 
            -
                /*
         | 
| 122 | 
            -
                private void registerMimeDetector()
         | 
| 123 | 
            -
                {
         | 
| 124 | 
            -
                    String mimeDetector = "eu.medsea.mimeutil.detector.MagicMimeMimeDetector";
         | 
| 125 | 
            -
                    MimeDetector registeredMimeDetector = MimeUtil.getMimeDetector(mimeDetector);
         | 
| 126 | 
            -
                    MimeUtil.registerMimeDetector(mimeDetector);
         | 
| 127 | 
            -
                }
         | 
| 128 | 
            -
             | 
| 129 | 
            -
                public String detectMimeType(File file)
         | 
| 130 | 
            -
                {
         | 
| 131 | 
            -
                    try {
         | 
| 132 | 
            -
                        Collection<?> mimeTypes = MimeUtil.getMimeTypes(file);
         | 
| 133 | 
            -
                        if (!mimeTypes.isEmpty()) {
         | 
| 134 | 
            -
                            Iterator<?> iterator = mimeTypes.iterator();
         | 
| 135 | 
            -
                            MimeType mimeType = (MimeType) iterator.next();
         | 
| 136 | 
            -
                            return mimeType.getMediaType() + "/" + mimeType.getSubType();
         | 
| 137 | 
            -
                        }
         | 
| 138 | 
            -
                    } catch (Exception ex) {
         | 
| 139 | 
            -
                    }
         | 
| 140 | 
            -
                    return "application/octet-stream";
         | 
| 141 | 
            -
                }
         | 
| 142 | 
            -
                */
         | 
| 143 | 
            -
             | 
| 144 | 
            -
                public static class Builder
         | 
| 145 | 
            -
                {
         | 
| 146 | 
            -
                    private final String serviceAccountEmail;
         | 
| 147 | 
            -
                    private String p12KeyFilePath;
         | 
| 148 | 
            -
                    private String applicationName;
         | 
| 149 | 
            -
                    private String bucket;
         | 
| 150 | 
            -
                    private String sourceFormat;
         | 
| 151 | 
            -
                    private boolean isFileCompressed;
         | 
| 152 | 
            -
                    private boolean deleteFromBucketWhenJobEnd;
         | 
| 153 | 
            -
                    private boolean enableMd5hashCheck;
         | 
| 154 | 
            -
             | 
| 155 | 
            -
                    public Builder(String serviceAccountEmail)
         | 
| 156 | 
            -
                    {
         | 
| 157 | 
            -
                        this.serviceAccountEmail = serviceAccountEmail;
         | 
| 158 | 
            -
                    }
         | 
| 159 | 
            -
             | 
| 160 | 
            -
                    public Builder setP12KeyFilePath(String p12KeyFilePath)
         | 
| 161 | 
            -
                    {
         | 
| 162 | 
            -
                        this.p12KeyFilePath = p12KeyFilePath;
         | 
| 163 | 
            -
                        return this;
         | 
| 164 | 
            -
                    }
         | 
| 165 | 
            -
             | 
| 166 | 
            -
                    public Builder setApplicationName(String applicationName)
         | 
| 167 | 
            -
                    {
         | 
| 168 | 
            -
                        this.applicationName = applicationName;
         | 
| 169 | 
            -
                        return this;
         | 
| 170 | 
            -
                    }
         | 
| 171 | 
            -
             | 
| 172 | 
            -
                    public Builder setBucket(String bucket)
         | 
| 173 | 
            -
                    {
         | 
| 174 | 
            -
                        this.bucket = bucket;
         | 
| 175 | 
            -
                        return this;
         | 
| 176 | 
            -
                    }
         | 
| 177 | 
            -
             | 
| 178 | 
            -
                    public Builder setSourceFormat(String sourceFormat)
         | 
| 179 | 
            -
                    {
         | 
| 180 | 
            -
                        this.sourceFormat = sourceFormat;
         | 
| 181 | 
            -
                        return this;
         | 
| 182 | 
            -
                    }
         | 
| 183 | 
            -
             | 
| 184 | 
            -
                    public Builder setIsFileCompressed(boolean isFileCompressed)
         | 
| 185 | 
            -
                    {
         | 
| 186 | 
            -
                        this.isFileCompressed = isFileCompressed;
         | 
| 187 | 
            -
                        return this;
         | 
| 188 | 
            -
                    }
         | 
| 189 | 
            -
             | 
| 190 | 
            -
                    public Builder setDeleteFromBucketWhenJobEnd(boolean deleteFromBucketWhenJobEnd)
         | 
| 191 | 
            -
                    {
         | 
| 192 | 
            -
                        this.deleteFromBucketWhenJobEnd = deleteFromBucketWhenJobEnd;
         | 
| 193 | 
            -
                        return this;
         | 
| 194 | 
            -
                    }
         | 
| 195 | 
            -
             | 
| 196 | 
            -
                    public BigqueryGcsWriter build() throws IOException, GeneralSecurityException
         | 
| 197 | 
            -
                    {
         | 
| 198 | 
            -
                        return new BigqueryGcsWriter(this);
         | 
| 199 | 
            -
                    }
         | 
| 200 | 
            -
                }
         | 
| 201 | 
            -
            }
         |