embulk-output-bigquery 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a814af5998dfc7f29f8cac8d06b49ec0b6713ff6
4
- data.tar.gz: 0381957abca63f41d93c768c1b1f0f1ac62a4339
3
+ metadata.gz: 48a9a0add9223ccfca3e1e48f360ebf38cfe08d5
4
+ data.tar.gz: b6d9d0b0635c9d0728238873094122b4a72648de
5
5
  SHA512:
6
- metadata.gz: ab1ee4e8bf996e9540c69352881449b29e1818a5e4d23babb8df6f47fe08c1cadd80ec373ed271c6e919ebd13f009bdba56e3d13ea2aa27085ef8e45e7f16c8b
7
- data.tar.gz: 1274ac1eed4de4d046229a724a718d32e33a8c16d17844ffb4a4f94e4dc97259d5f6902222a6c34d4bba1525a6e184070d84f05bb192cbccc8b72c4166287b84
6
+ metadata.gz: 7ee81bbbc5b65c34d86014e3da5a3d848ded06766a3b58e92f5918070cc89eadd8740cf16f2b36c864c7a50f5476a0be2460fe2b722f2edfeb23f612f721df91
7
+ data.tar.gz: 605003c7364982e0f4fc391ef51517ef16b201b0dc0c3f43833f1cd98a5110b7ae2476f64ebbf3d9284e3b781a2ed85b68855d3b585cdda4b68e72f223974b14
data/README.md CHANGED
@@ -30,14 +30,14 @@ OAuth flow for installed applications.
30
30
  |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
31
31
  | auth_method | string | optional | "private_key" | `private_key` or `compute_engine`
32
32
  | service_account_email | string | required when auth_method is private_key | | Your Google service account email
33
- | p12_keyfile_path | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
33
+ | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
34
34
  | sequence_format | string | optional | %03d.%02d | |
35
35
  | file_ext | string | optional | | e.g. ".csv.gz" ".json.gz" |
36
36
  | project | string | required | | project_id |
37
37
  | dataset | string | required | | dataset |
38
38
  | table | string | required | | table name |
39
39
  | auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
40
- | schema_path | string | optional | | /path/to/schema.json |
40
+ | schema_file | string | optional | | /path/to/schema.json |
41
41
  | prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
42
42
  | delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
43
43
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
@@ -53,7 +53,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
53
53
  |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
54
54
  | source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
55
55
  | max_bad_records | int | optional | 0 | |
56
- | field_delimiter | string | optional | "," | |
56
+ | field_delimiter | char | optional | "," | |
57
57
  | encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
58
58
  | ignore_unknown_values | boolean | optional | 0 | |
59
59
  | allow_quoted_newlines | boolean | optional | 0 | Set true, if data contains newline characters. It may cause slow procsssing |
@@ -65,7 +65,7 @@ out:
65
65
  type: bigquery
66
66
  auth_method: private_key # default
67
67
  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
68
- p12_keyfile_path: /path/to/p12_keyfile.p12
68
+ p12_keyfile: /path/to/p12_keyfile.p12
69
69
  path_prefix: /path/to/output
70
70
  file_ext: csv.gz
71
71
  source_format: CSV
@@ -130,14 +130,14 @@ out:
130
130
  type: bigquery
131
131
  auto_create_table: true
132
132
  table: table_%Y_%m
133
- schema_path: /path/to/schema.json
133
+ schema_file: /path/to/schema.json
134
134
  ```
135
135
 
136
136
  ### Data Consistency
137
137
 
138
138
  When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
139
139
 
140
- `job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values)`
140
+ `job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
141
141
 
142
142
  [job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency). So same data can't insert with same settings.
143
143
 
data/build.gradle CHANGED
@@ -15,11 +15,11 @@ configurations {
15
15
  sourceCompatibility = 1.7
16
16
  targetCompatibility = 1.7
17
17
 
18
- version = "0.1.7"
18
+ version = "0.1.8"
19
19
 
20
20
  dependencies {
21
- compile "org.embulk:embulk-core:0.6.8"
22
- provided "org.embulk:embulk-core:0.6.8"
21
+ compile "org.embulk:embulk-core:0.6.22"
22
+ provided "org.embulk:embulk-core:0.6.22"
23
23
 
24
24
  compile "com.google.http-client:google-http-client-jackson2:1.20.0"
25
25
  compile "com.google.apis:google-api-services-bigquery:v2-rev205-1.20.0"
@@ -49,7 +49,7 @@ Gem::Specification.new do |spec|
49
49
  spec.description = %[Embulk plugin that insert records to Google BigQuery.]
50
50
  spec.email = ["satoshiakama@gmail.com"]
51
51
  spec.licenses = ["Apache-2.0"]
52
- spec.homepage = "https://github.com/sakama/embulk-output-bigquery"
52
+ spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
53
53
 
54
54
  spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
55
55
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
@@ -5,9 +5,11 @@ import java.io.FileNotFoundException;
5
5
  import java.io.FileOutputStream;
6
6
  import java.io.BufferedOutputStream;
7
7
  import java.io.IOException;
8
+ import java.nio.charset.Charset;
8
9
  import java.security.NoSuchAlgorithmException;
9
10
  import java.util.List;
10
11
  import java.util.concurrent.TimeoutException;
12
+ import com.google.common.base.Function;
11
13
  import com.google.common.base.Optional;
12
14
  import com.google.common.base.Throwables;
13
15
  import java.security.GeneralSecurityException;
@@ -21,6 +23,7 @@ import org.embulk.config.ConfigDiff;
21
23
  import org.embulk.config.CommitReport;
22
24
  import org.embulk.config.Task;
23
25
  import org.embulk.config.TaskSource;
26
+ import org.embulk.spi.unit.LocalFile;
24
27
  import org.embulk.spi.Buffer;
25
28
  import org.embulk.spi.FileOutputPlugin;
26
29
  import org.embulk.spi.TransactionalFileOutput;
@@ -36,114 +39,147 @@ public class BigqueryOutputPlugin
36
39
  {
37
40
  @Config("auth_method")
38
41
  @ConfigDefault("\"private_key\"")
39
- public String getAuthMethod();
42
+ AuthMethod getAuthMethod();
40
43
 
41
44
  @Config("service_account_email")
42
45
  @ConfigDefault("null")
43
- public Optional<String> getServiceAccountEmail();
46
+ Optional<String> getServiceAccountEmail();
44
47
 
48
+ // kept for backward compatibility
45
49
  @Config("p12_keyfile_path")
46
50
  @ConfigDefault("null")
47
- public Optional<String> getP12KeyfilePath();
51
+ Optional<String> getP12KeyfilePath();
52
+
53
+ @Config("p12_keyfile")
54
+ @ConfigDefault("null")
55
+ Optional<LocalFile> getP12Keyfile();
56
+ void setP12Keyfile(Optional<LocalFile> p12Keyfile);
48
57
 
49
58
  @Config("application_name")
50
59
  @ConfigDefault("\"Embulk BigQuery plugin\"")
51
- public String getApplicationName();
60
+ String getApplicationName();
52
61
 
53
62
  @Config("path_prefix")
54
- public String getPathPrefix();
63
+ String getPathPrefix();
55
64
 
56
65
  @Config("sequence_format")
57
66
  @ConfigDefault("\".%03d.%02d\"")
58
- public String getSequenceFormat();
67
+ String getSequenceFormat();
59
68
 
60
69
  @Config("file_ext")
61
- public String getFileNameExtension();
70
+ String getFileNameExtension();
62
71
 
63
72
  @Config("source_format")
64
73
  @ConfigDefault("\"CSV\"")
65
- public String getSourceFormat();
74
+ SourceFormat getSourceFormat();
66
75
 
67
76
  @Config("field_delimiter")
68
77
  @ConfigDefault("\",\"")
69
- public String getFieldDelimiter();
78
+ char getFieldDelimiter();
70
79
 
71
80
  @Config("max_bad_records")
72
81
  @ConfigDefault("0")
73
- public int getMaxBadrecords();
82
+ int getMaxBadrecords();
74
83
 
75
84
  @Config("encoding")
76
85
  @ConfigDefault("\"UTF-8\"")
77
- public String getEncoding();
86
+ Charset getEncoding();
78
87
 
79
88
  @Config("delete_from_local_when_job_end")
80
89
  @ConfigDefault("false")
81
- public boolean getDeleteFromLocalWhenJobEnd();
90
+ boolean getDeleteFromLocalWhenJobEnd();
82
91
 
83
92
  @Config("project")
84
- public String getProject();
93
+ String getProject();
85
94
 
86
95
  @Config("dataset")
87
- public String getDataset();
96
+ String getDataset();
88
97
 
89
98
  @Config("table")
90
- public String getTable();
99
+ String getTable();
91
100
 
92
101
  @Config("auto_create_table")
93
102
  @ConfigDefault("false")
94
- public boolean getAutoCreateTable();
103
+ boolean getAutoCreateTable();
95
104
 
105
+ // kept for backward compatibility
96
106
  @Config("schema_path")
97
107
  @ConfigDefault("null")
98
- public Optional<String> getSchemaPath();
108
+ Optional<String> getSchemaPath();
109
+
110
+ @Config("schema_file")
111
+ @ConfigDefault("null")
112
+ Optional<LocalFile> getSchemaFile();
113
+ void setSchemaFile(Optional<LocalFile> schemaFile);
99
114
 
100
115
  @Config("prevent_duplicate_insert")
101
116
  @ConfigDefault("false")
102
- public boolean getPreventDuplicateInsert();
117
+ boolean getPreventDuplicateInsert();
103
118
 
104
119
  @Config("job_status_max_polling_time")
105
120
  @ConfigDefault("3600")
106
- public int getJobStatusMaxPollingTime();
121
+ int getJobStatusMaxPollingTime();
107
122
 
108
123
  @Config("job_status_polling_interval")
109
124
  @ConfigDefault("10")
110
- public int getJobStatusPollingInterval();
125
+ int getJobStatusPollingInterval();
111
126
 
112
127
  @Config("is_skip_job_result_check")
113
128
  @ConfigDefault("false")
114
- public boolean getIsSkipJobResultCheck();
129
+ boolean getIsSkipJobResultCheck();
115
130
 
116
131
  @Config("ignore_unknown_values")
117
132
  @ConfigDefault("false")
118
- public boolean getIgnoreUnknownValues();
133
+ boolean getIgnoreUnknownValues();
119
134
 
120
135
  @Config("allow_quoted_newlines")
121
136
  @ConfigDefault("false")
122
- public boolean getAllowQuotedNewlines();
137
+ boolean getAllowQuotedNewlines();
123
138
  }
124
139
 
125
140
  private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
126
141
  private static BigqueryWriter bigQueryWriter;
127
142
 
143
+ @Override
128
144
  public ConfigDiff transaction(ConfigSource config, int taskCount,
129
145
  FileOutputPlugin.Control control)
130
146
  {
131
147
  final PluginTask task = config.loadConfig(PluginTask.class);
132
148
 
149
+ if (task.getP12KeyfilePath().isPresent()) {
150
+ if (task.getP12Keyfile().isPresent()) {
151
+ throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
152
+ }
153
+ try {
154
+ task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfilePath().get())));
155
+ } catch (IOException ex) {
156
+ throw Throwables.propagate(ex);
157
+ }
158
+ }
159
+
160
+ if (task.getSchemaPath().isPresent()) {
161
+ if (task.getSchemaFile().isPresent()) {
162
+ throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
163
+ }
164
+ try {
165
+ task.setSchemaFile(Optional.of(LocalFile.of(task.getSchemaPath().get())));
166
+ } catch (IOException ex) {
167
+ throw Throwables.propagate(ex);
168
+ }
169
+ }
170
+
133
171
  try {
134
- bigQueryWriter = new BigqueryWriter.Builder(task.getAuthMethod())
135
- .setServiceAccountEmail(task.getServiceAccountEmail())
136
- .setP12KeyFilePath(task.getP12KeyfilePath())
137
- .setApplicationName(task.getApplicationName())
138
- .setProject(task.getProject())
139
- .setDataset(task.getDataset())
140
- .setTable(generateTableName(task.getTable()))
172
+ bigQueryWriter = new BigqueryWriter.Builder (
173
+ task.getAuthMethod().getString(),
174
+ task.getServiceAccountEmail(),
175
+ task.getP12Keyfile().transform(localFileToPathString()),
176
+ task.getApplicationName())
141
177
  .setAutoCreateTable(task.getAutoCreateTable())
142
- .setSchemaPath(task.getSchemaPath())
143
- .setSourceFormat(task.getSourceFormat())
144
- .setFieldDelimiter(task.getFieldDelimiter())
145
- .setMaxBadrecords(task.getMaxBadrecords())
146
- .setEncoding(task.getEncoding())
178
+ .setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
179
+ .setSourceFormat(task.getSourceFormat().getString())
180
+ .setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
181
+ .setMaxBadRecords(task.getMaxBadrecords())
182
+ .setEncoding(String.valueOf(task.getEncoding()))
147
183
  .setPreventDuplicateInsert(task.getPreventDuplicateInsert())
148
184
  .setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
149
185
  .setJobStatusPollingInterval(task.getJobStatusPollingInterval())
@@ -151,8 +187,9 @@ public class BigqueryOutputPlugin
151
187
  .setIgnoreUnknownValues(task.getIgnoreUnknownValues())
152
188
  .setAllowQuotedNewlines(task.getAllowQuotedNewlines())
153
189
  .build();
154
- } catch (FileNotFoundException ex) {
155
- throw new ConfigException(ex);
190
+
191
+ bigQueryWriter.checkConfig(task.getProject(), task.getDataset(), task.getTable());
192
+
156
193
  } catch (IOException | GeneralSecurityException ex) {
157
194
  throw new ConfigException(ex);
158
195
  }
@@ -160,6 +197,7 @@ public class BigqueryOutputPlugin
160
197
  return resume(task.dump(), taskCount, control);
161
198
  }
162
199
 
200
+ @Override
163
201
  public ConfigDiff resume(TaskSource taskSource,
164
202
  int taskCount,
165
203
  FileOutputPlugin.Control control)
@@ -176,6 +214,17 @@ public class BigqueryOutputPlugin
176
214
  {
177
215
  }
178
216
 
217
+ private Function<LocalFile, String> localFileToPathString()
218
+ {
219
+ return new Function<LocalFile, String>()
220
+ {
221
+ public String apply(LocalFile file)
222
+ {
223
+ return file.getPath().toString();
224
+ }
225
+ };
226
+ }
227
+
179
228
  @Override
180
229
  public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
181
230
  {
@@ -186,6 +235,11 @@ public class BigqueryOutputPlugin
186
235
  final String pathSuffix = task.getFileNameExtension();
187
236
 
188
237
  return new TransactionalFileOutput() {
238
+ private final String project = task.getProject();
239
+ private final String dataset = task.getDataset();
240
+ private final String table = generateTableName(task.getTable());
241
+ private final boolean deleteFromLocalWhenJobEnd = task.getDeleteFromLocalWhenJobEnd();
242
+
189
243
  private int fileIndex = 0;
190
244
  private BufferedOutputStream output = null;
191
245
  private File file;
@@ -243,9 +297,9 @@ public class BigqueryOutputPlugin
243
297
  closeFile();
244
298
  if (filePath != null) {
245
299
  try {
246
- bigQueryWriter.executeLoad(filePath);
300
+ bigQueryWriter.executeLoad(project, dataset, table, filePath);
247
301
 
248
- if (task.getDeleteFromLocalWhenJobEnd()) {
302
+ if (deleteFromLocalWhenJobEnd) {
249
303
  log.info(String.format("Delete local file [%s]", filePath));
250
304
  file.delete();
251
305
  }
@@ -281,4 +335,40 @@ public class BigqueryOutputPlugin
281
335
 
282
336
  return result.toString();
283
337
  }
284
- }
338
+
339
+ public enum SourceFormat
340
+ {
341
+ CSV("CSV"),
342
+ NEWLINE_DELIMITED_JSON("NEWLINE_DELIMITED_JSON");
343
+
344
+ private final String string;
345
+
346
+ SourceFormat(String string)
347
+ {
348
+ this.string = string;
349
+ }
350
+
351
+ public String getString()
352
+ {
353
+ return string;
354
+ }
355
+ }
356
+
357
+ public enum AuthMethod
358
+ {
359
+ private_key("private_key"),
360
+ compute_engine("compute_engine");
361
+
362
+ private final String string;
363
+
364
+ AuthMethod(String string)
365
+ {
366
+ this.string = string;
367
+ }
368
+
369
+ public String getString()
370
+ {
371
+ return string;
372
+ }
373
+ }
374
+ }
@@ -6,17 +6,17 @@ import java.io.FileNotFoundException;
6
6
  import java.io.FileInputStream;
7
7
  import java.io.BufferedInputStream;
8
8
  import com.google.api.client.http.InputStreamContent;
9
+
9
10
  import java.security.MessageDigest;
10
11
  import java.security.NoSuchAlgorithmException;
11
12
  import java.util.List;
12
13
  import java.util.concurrent.TimeoutException;
13
14
  import com.google.common.base.Optional;
14
- import com.google.api.client.util.Base64;
15
- import com.google.common.base.Throwables;
16
15
  import java.security.GeneralSecurityException;
17
16
  import com.fasterxml.jackson.databind.ObjectMapper;
18
17
  import com.fasterxml.jackson.core.type.TypeReference;
19
18
 
19
+ import com.google.common.collect.ImmutableList;
20
20
  import org.apache.commons.codec.binary.Hex;
21
21
  import org.embulk.spi.Exec;
22
22
  import org.slf4j.Logger;
@@ -40,17 +40,13 @@ import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
40
40
 
41
41
  public class BigqueryWriter
42
42
  {
43
-
44
43
  private final Logger log = Exec.getLogger(BigqueryWriter.class);
45
- private final String project;
46
- private final String dataset;
47
- private final String table;
48
44
  private final boolean autoCreateTable;
49
45
  private final Optional<String> schemaPath;
50
46
  private final TableSchema tableSchema;
51
47
  private final String sourceFormat;
52
48
  private final String fieldDelimiter;
53
- private final int maxBadrecords;
49
+ private final int maxBadRecords;
54
50
  private final String encoding;
55
51
  private final boolean preventDuplicateInsert;
56
52
  private final long jobStatusMaxPollingTime;
@@ -60,16 +56,14 @@ public class BigqueryWriter
60
56
  private final boolean allowQuotedNewlines;
61
57
  private final Bigquery bigQueryClient;
62
58
 
63
- public BigqueryWriter(Builder builder) throws FileNotFoundException, IOException, GeneralSecurityException
59
+ public BigqueryWriter(Builder builder)
60
+ throws IOException, GeneralSecurityException
64
61
  {
65
- this.project = builder.project;
66
- this.dataset = builder.dataset;
67
- this.table = builder.table;
68
62
  this.autoCreateTable = builder.autoCreateTable;
69
63
  this.schemaPath = builder.schemaPath;
70
64
  this.sourceFormat = builder.sourceFormat.toUpperCase();
71
65
  this.fieldDelimiter = builder.fieldDelimiter;
72
- this.maxBadrecords = builder.maxBadrecords;
66
+ this.maxBadRecords = builder.maxBadRecords;
73
67
  this.encoding = builder.encoding.toUpperCase();
74
68
  this.preventDuplicateInsert = builder.preventDuplicateInsert;
75
69
  this.jobStatusMaxPollingTime = builder.jobStatusMaxPollingTime;
@@ -81,15 +75,14 @@ public class BigqueryWriter
81
75
  BigqueryAuthentication auth = new BigqueryAuthentication(builder.authMethod, builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
82
76
  this.bigQueryClient = auth.getBigqueryClient();
83
77
 
84
- checkConfig();
85
78
  if (autoCreateTable) {
86
- this.tableSchema = createTableSchema(builder.schemaPath);
79
+ this.tableSchema = createTableSchema();
87
80
  } else {
88
81
  this.tableSchema = null;
89
82
  }
90
83
  }
91
84
 
92
- private String getJobStatus(JobReference jobRef) throws JobFailedException
85
+ private String getJobStatus(String project, JobReference jobRef) throws JobFailedException
93
86
  {
94
87
  try {
95
88
  Job job = bigQueryClient.jobs().get(project, jobRef.getJobId()).execute();
@@ -108,7 +101,6 @@ public class BigqueryWriter
108
101
  String jobStatus = job.getStatus().getState();
109
102
  if (jobStatus.equals("DONE")) {
110
103
  JobStatistics statistics = job.getStatistics();
111
- //log.info(String.format("Job end. create:[%s] end:[%s]", statistics.getCreationTime(), statistics.getEndTime()));
112
104
  log.info(String.format("Job statistics [%s]", statistics.getLoad()));
113
105
  }
114
106
  return jobStatus;
@@ -118,14 +110,14 @@ public class BigqueryWriter
118
110
  }
119
111
  }
120
112
 
121
- private void getJobStatusUntilDone(JobReference jobRef) throws TimeoutException, JobFailedException
113
+ private void getJobStatusUntilDone(String project, JobReference jobRef) throws TimeoutException, JobFailedException
122
114
  {
123
115
  long startTime = System.currentTimeMillis();
124
116
  long elapsedTime;
125
117
 
126
118
  try {
127
119
  while (true) {
128
- String jobStatus = getJobStatus(jobRef);
120
+ String jobStatus = getJobStatus(project, jobRef);
129
121
  elapsedTime = System.currentTimeMillis() - startTime;
130
122
  if (jobStatus.equals("DONE")) {
131
123
  log.info(String.format("Job completed successfully. job id:[%s] elapsed_time:%dms status:[%s]", jobRef.getJobId(), elapsedTime, "SUCCESS"));
@@ -142,44 +134,28 @@ public class BigqueryWriter
142
134
  }
143
135
  }
144
136
 
145
- public void executeLoad(String localFilePath) throws GoogleJsonResponseException, NoSuchAlgorithmException,
146
- TimeoutException, JobFailedException, IOException
137
+ public void executeLoad(String project, String dataset, String table, String localFilePath)
138
+ throws NoSuchAlgorithmException, TimeoutException, JobFailedException, IOException
147
139
  {
148
140
  log.info(String.format("Job preparing... project:%s dataset:%s table:%s", project, dataset, table));
149
141
 
150
142
  Job job = new Job();
151
143
  JobReference jobRef = new JobReference();
152
- JobConfiguration jobConfig = new JobConfiguration();
153
- JobConfigurationLoad loadConfig = new JobConfigurationLoad();
154
- jobConfig.setLoad(loadConfig);
144
+ JobConfiguration jobConfig = new JobConfiguration().setLoad(setLoadConfig(project, dataset, table));
155
145
  job.setConfiguration(jobConfig);
156
146
 
157
147
  if (preventDuplicateInsert) {
158
- String jobId = createJobId(localFilePath);
148
+ ImmutableList<String> elements = ImmutableList.of(
149
+ getLocalMd5hash(localFilePath), dataset, table,
150
+ String.valueOf(tableSchema), sourceFormat, fieldDelimiter, String.valueOf(maxBadRecords),
151
+ encoding, String.valueOf(ignoreUnknownValues), String.valueOf(allowQuotedNewlines)
152
+ );
153
+ String jobId = createJobId(elements);
154
+
159
155
  jobRef.setJobId(jobId);
160
156
  job.setJobReference(jobRef);
161
157
  }
162
158
 
163
- loadConfig.setAllowQuotedNewlines(allowQuotedNewlines);
164
- loadConfig.setEncoding(encoding);
165
- loadConfig.setMaxBadRecords(maxBadrecords);
166
- if (sourceFormat.equals("NEWLINE_DELIMITED_JSON")) {
167
- loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
168
- } else {
169
- loadConfig.setFieldDelimiter(fieldDelimiter);
170
- }
171
- loadConfig.setWriteDisposition("WRITE_APPEND");
172
- if (autoCreateTable) {
173
- loadConfig.setSchema(tableSchema);
174
- loadConfig.setCreateDisposition("CREATE_IF_NEEDED");
175
- log.info(String.format("table:[%s] will be create if not exists", table));
176
- } else {
177
- loadConfig.setCreateDisposition("CREATE_NEVER");
178
- }
179
- loadConfig.setIgnoreUnknownValues(ignoreUnknownValues);
180
-
181
- loadConfig.setDestinationTable(createTableReference());
182
-
183
159
  File file = new File(localFilePath);
184
160
  InputStreamContent mediaContent = new InputStreamContent("application/octet-stream",
185
161
  new BufferedInputStream(
@@ -206,31 +182,52 @@ public class BigqueryWriter
206
182
  if (isSkipJobResultCheck) {
207
183
  log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
208
184
  } else {
209
- getJobStatusUntilDone(jobRef);
185
+ getJobStatusUntilDone(project, jobRef);
210
186
  }
211
187
  }
212
188
 
213
- private String createJobId(String localFilePath) throws NoSuchAlgorithmException, IOException
189
+ private JobConfigurationLoad setLoadConfig(String project, String dataset, String table)
190
+ {
191
+ JobConfigurationLoad config = new JobConfigurationLoad();
192
+ config.setAllowQuotedNewlines(allowQuotedNewlines)
193
+ .setEncoding(encoding)
194
+ .setMaxBadRecords(maxBadRecords)
195
+ .setSourceFormat(sourceFormat)
196
+ .setIgnoreUnknownValues(ignoreUnknownValues)
197
+ .setDestinationTable(createTableReference(project, dataset, table))
198
+ .setWriteDisposition("WRITE_APPEND");
199
+
200
+ if (sourceFormat.equals("CSV")) {
201
+ config.setFieldDelimiter(String.valueOf(fieldDelimiter));
202
+ }
203
+ if (autoCreateTable) {
204
+ config.setSchema(tableSchema);
205
+ config.setCreateDisposition("CREATE_IF_NEEDED");
206
+ log.info(String.format("table:[%s] will be create if not exists", table));
207
+ } else {
208
+ config.setCreateDisposition("CREATE_NEVER");
209
+ }
210
+ return config;
211
+ }
212
+
213
+ private String createJobId(ImmutableList<String> elements) throws NoSuchAlgorithmException, IOException
214
214
  {
215
215
  StringBuilder sb = new StringBuilder();
216
- sb.append(getLocalMd5hash(localFilePath));
217
- sb.append(dataset);
218
- sb.append(table);
219
- sb.append(tableSchema);
220
- sb.append(sourceFormat);
221
- sb.append(fieldDelimiter);
222
- sb.append(maxBadrecords);
223
- sb.append(encoding);
224
- sb.append(ignoreUnknownValues);
216
+ for (String element : elements) {
217
+ sb.append(element);
218
+ }
225
219
 
226
220
  MessageDigest md = MessageDigest.getInstance("MD5");
227
- String str = new String(sb);
228
- byte[] digest = md.digest(str.getBytes());
221
+ byte[] digest = md.digest(new String(sb).getBytes());
229
222
  String hash = new String(Hex.encodeHex(digest));
230
- return "embulk_job_" + hash;
223
+
224
+ StringBuilder jobId = new StringBuilder();
225
+ jobId.append("embulk_job_");
226
+ jobId.append(hash);
227
+ return jobId.toString();
231
228
  }
232
229
 
233
- private TableReference createTableReference()
230
+ private TableReference createTableReference(String project, String dataset, String table)
234
231
  {
235
232
  return new TableReference()
236
233
  .setProjectId(project)
@@ -238,7 +235,7 @@ public class BigqueryWriter
238
235
  .setTableId(table);
239
236
  }
240
237
 
241
- private TableSchema createTableSchema(Optional<String> schemaPath) throws FileNotFoundException, IOException
238
+ public TableSchema createTableSchema() throws IOException
242
239
  {
243
240
  String path = schemaPath.orNull();
244
241
  File file = new File(path);
@@ -247,8 +244,7 @@ public class BigqueryWriter
247
244
  stream = new FileInputStream(file);
248
245
  ObjectMapper mapper = new ObjectMapper();
249
246
  List<TableFieldSchema> fields = mapper.readValue(stream, new TypeReference<List<TableFieldSchema>>() {});
250
- TableSchema tableSchema = new TableSchema().setFields(fields);
251
- return tableSchema;
247
+ return new TableSchema().setFields(fields);
252
248
  } finally {
253
249
  if (stream != null) {
254
250
  stream.close();
@@ -256,22 +252,22 @@ public class BigqueryWriter
256
252
  }
257
253
  }
258
254
 
259
- public boolean isExistTable(String tableName) throws IOException
255
+ public boolean isExistTable(String project, String dataset, String table) throws IOException
260
256
  {
261
257
  Tables tableRequest = bigQueryClient.tables();
262
258
  try {
263
- Table tableData = tableRequest.get(project, dataset, tableName).execute();
259
+ Table tableData = tableRequest.get(project, dataset, table).execute();
264
260
  } catch (GoogleJsonResponseException ex) {
265
261
  return false;
266
262
  }
267
263
  return true;
268
264
  }
269
265
 
270
- public void checkConfig() throws FileNotFoundException, IOException
266
+ public void checkConfig(String project, String dataset, String table) throws IOException
271
267
  {
272
268
  if (autoCreateTable) {
273
269
  if (!schemaPath.isPresent()) {
274
- throw new FileNotFoundException("schema_path is empty");
270
+ throw new FileNotFoundException("schema_file is empty");
275
271
  } else {
276
272
  File file = new File(schemaPath.orNull());
277
273
  if (!file.exists()) {
@@ -279,7 +275,7 @@ public class BigqueryWriter
279
275
  }
280
276
  }
281
277
  } else {
282
- if (!isExistTable(table)) {
278
+ if (!isExistTable(project, dataset, table)) {
283
279
  throw new IOException(String.format("table [%s] is not exists", table));
284
280
  }
285
281
  }
@@ -341,14 +337,11 @@ public class BigqueryWriter
341
337
  private Optional<String> serviceAccountEmail;
342
338
  private Optional<String> p12KeyFilePath;
343
339
  private String applicationName;
344
- private String project;
345
- private String dataset;
346
- private String table;
347
340
  private boolean autoCreateTable;
348
341
  private Optional<String> schemaPath;
349
342
  private String sourceFormat;
350
343
  private String fieldDelimiter;
351
- private int maxBadrecords;
344
+ private int maxBadRecords;
352
345
  private String encoding;
353
346
  private boolean preventDuplicateInsert;
354
347
  private int jobStatusMaxPollingTime;
@@ -357,45 +350,12 @@ public class BigqueryWriter
357
350
  private boolean ignoreUnknownValues;
358
351
  private boolean allowQuotedNewlines;
359
352
 
360
- public Builder(String authMethod)
353
+ public Builder(String authMethod, Optional<String> serviceAccountEmail, Optional<String> p12KeyFilePath, String applicationName)
361
354
  {
362
355
  this.authMethod = authMethod;
363
- }
364
-
365
- public Builder setServiceAccountEmail(Optional<String> serviceAccountEmail)
366
- {
367
356
  this.serviceAccountEmail = serviceAccountEmail;
368
- return this;
369
- }
370
-
371
- public Builder setP12KeyFilePath(Optional<String> p12KeyFilePath)
372
- {
373
357
  this.p12KeyFilePath = p12KeyFilePath;
374
- return this;
375
- }
376
-
377
- public Builder setApplicationName(String applicationName)
378
- {
379
358
  this.applicationName = applicationName;
380
- return this;
381
- }
382
-
383
- public Builder setProject(String project)
384
- {
385
- this.project = project;
386
- return this;
387
- }
388
-
389
- public Builder setDataset(String dataset)
390
- {
391
- this.dataset = dataset;
392
- return this;
393
- }
394
-
395
- public Builder setTable(String table)
396
- {
397
- this.table = table;
398
- return this;
399
359
  }
400
360
 
401
361
  public Builder setAutoCreateTable(boolean autoCreateTable)
@@ -422,9 +382,9 @@ public class BigqueryWriter
422
382
  return this;
423
383
  }
424
384
 
425
- public Builder setMaxBadrecords(int maxBadrecords)
385
+ public Builder setMaxBadRecords(int maxBadRecords)
426
386
  {
427
- this.maxBadrecords = maxBadrecords;
387
+ this.maxBadRecords = maxBadRecords;
428
388
  return this;
429
389
  }
430
390
 
@@ -482,4 +442,4 @@ public class BigqueryWriter
482
442
  super(message);
483
443
  }
484
444
  }
485
- }
445
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-20 00:00:00.000000000 Z
11
+ date: 2015-08-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,7 +62,7 @@ files:
62
62
  - src/test/java/org/embulk/output/TestBigqueryWriter.java
63
63
  - classpath/commons-codec-1.3.jar
64
64
  - classpath/commons-logging-1.1.1.jar
65
- - classpath/embulk-output-bigquery-0.1.7.jar
65
+ - classpath/embulk-output-bigquery-0.1.8.jar
66
66
  - classpath/google-api-client-1.20.0.jar
67
67
  - classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
68
68
  - classpath/google-http-client-1.20.0.jar
@@ -72,7 +72,7 @@ files:
72
72
  - classpath/httpclient-4.0.1.jar
73
73
  - classpath/httpcore-4.0.1.jar
74
74
  - classpath/jsr305-1.3.9.jar
75
- homepage: https://github.com/sakama/embulk-output-bigquery
75
+ homepage: https://github.com/embulk/embulk-output-bigquery
76
76
  licenses:
77
77
  - Apache-2.0
78
78
  metadata: {}