embulk-output-bigquery 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a814af5998dfc7f29f8cac8d06b49ec0b6713ff6
4
- data.tar.gz: 0381957abca63f41d93c768c1b1f0f1ac62a4339
3
+ metadata.gz: 48a9a0add9223ccfca3e1e48f360ebf38cfe08d5
4
+ data.tar.gz: b6d9d0b0635c9d0728238873094122b4a72648de
5
5
  SHA512:
6
- metadata.gz: ab1ee4e8bf996e9540c69352881449b29e1818a5e4d23babb8df6f47fe08c1cadd80ec373ed271c6e919ebd13f009bdba56e3d13ea2aa27085ef8e45e7f16c8b
7
- data.tar.gz: 1274ac1eed4de4d046229a724a718d32e33a8c16d17844ffb4a4f94e4dc97259d5f6902222a6c34d4bba1525a6e184070d84f05bb192cbccc8b72c4166287b84
6
+ metadata.gz: 7ee81bbbc5b65c34d86014e3da5a3d848ded06766a3b58e92f5918070cc89eadd8740cf16f2b36c864c7a50f5476a0be2460fe2b722f2edfeb23f612f721df91
7
+ data.tar.gz: 605003c7364982e0f4fc391ef51517ef16b201b0dc0c3f43833f1cd98a5110b7ae2476f64ebbf3d9284e3b781a2ed85b68855d3b585cdda4b68e72f223974b14
data/README.md CHANGED
@@ -30,14 +30,14 @@ OAuth flow for installed applications.
30
30
  |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
31
31
  | auth_method | string | optional | "private_key" | `private_key` or `compute_engine`
32
32
  | service_account_email | string | required when auth_method is private_key | | Your Google service account email
33
- | p12_keyfile_path | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
33
+ | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
34
34
  | sequence_format | string | optional | %03d.%02d | |
35
35
  | file_ext | string | optional | | e.g. ".csv.gz" ".json.gz" |
36
36
  | project | string | required | | project_id |
37
37
  | dataset | string | required | | dataset |
38
38
  | table | string | required | | table name |
39
39
  | auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
40
- | schema_path | string | optional | | /path/to/schema.json |
40
+ | schema_file | string | optional | | /path/to/schema.json |
41
41
  | prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
42
42
  | delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
43
43
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
@@ -53,7 +53,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
53
53
  |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
54
54
  | source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
55
55
  | max_bad_records | int | optional | 0 | |
56
- | field_delimiter | string | optional | "," | |
56
+ | field_delimiter | char | optional | "," | |
57
57
  | encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
58
58
  | ignore_unknown_values | boolean | optional | 0 | |
59
59
  | allow_quoted_newlines | boolean | optional | 0 | Set true, if data contains newline characters. It may cause slow procsssing |
@@ -65,7 +65,7 @@ out:
65
65
  type: bigquery
66
66
  auth_method: private_key # default
67
67
  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
68
- p12_keyfile_path: /path/to/p12_keyfile.p12
68
+ p12_keyfile: /path/to/p12_keyfile.p12
69
69
  path_prefix: /path/to/output
70
70
  file_ext: csv.gz
71
71
  source_format: CSV
@@ -130,14 +130,14 @@ out:
130
130
  type: bigquery
131
131
  auto_create_table: true
132
132
  table: table_%Y_%m
133
- schema_path: /path/to/schema.json
133
+ schema_file: /path/to/schema.json
134
134
  ```
135
135
 
136
136
  ### Data Consistency
137
137
 
138
138
  When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
139
139
 
140
- `job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values)`
140
+ `job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
141
141
 
142
142
  [job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency). So same data can't insert with same settings.
143
143
 
data/build.gradle CHANGED
@@ -15,11 +15,11 @@ configurations {
15
15
  sourceCompatibility = 1.7
16
16
  targetCompatibility = 1.7
17
17
 
18
- version = "0.1.7"
18
+ version = "0.1.8"
19
19
 
20
20
  dependencies {
21
- compile "org.embulk:embulk-core:0.6.8"
22
- provided "org.embulk:embulk-core:0.6.8"
21
+ compile "org.embulk:embulk-core:0.6.22"
22
+ provided "org.embulk:embulk-core:0.6.22"
23
23
 
24
24
  compile "com.google.http-client:google-http-client-jackson2:1.20.0"
25
25
  compile "com.google.apis:google-api-services-bigquery:v2-rev205-1.20.0"
@@ -49,7 +49,7 @@ Gem::Specification.new do |spec|
49
49
  spec.description = %[Embulk plugin that insert records to Google BigQuery.]
50
50
  spec.email = ["satoshiakama@gmail.com"]
51
51
  spec.licenses = ["Apache-2.0"]
52
- spec.homepage = "https://github.com/sakama/embulk-output-bigquery"
52
+ spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
53
53
 
54
54
  spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
55
55
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
@@ -5,9 +5,11 @@ import java.io.FileNotFoundException;
5
5
  import java.io.FileOutputStream;
6
6
  import java.io.BufferedOutputStream;
7
7
  import java.io.IOException;
8
+ import java.nio.charset.Charset;
8
9
  import java.security.NoSuchAlgorithmException;
9
10
  import java.util.List;
10
11
  import java.util.concurrent.TimeoutException;
12
+ import com.google.common.base.Function;
11
13
  import com.google.common.base.Optional;
12
14
  import com.google.common.base.Throwables;
13
15
  import java.security.GeneralSecurityException;
@@ -21,6 +23,7 @@ import org.embulk.config.ConfigDiff;
21
23
  import org.embulk.config.CommitReport;
22
24
  import org.embulk.config.Task;
23
25
  import org.embulk.config.TaskSource;
26
+ import org.embulk.spi.unit.LocalFile;
24
27
  import org.embulk.spi.Buffer;
25
28
  import org.embulk.spi.FileOutputPlugin;
26
29
  import org.embulk.spi.TransactionalFileOutput;
@@ -36,114 +39,147 @@ public class BigqueryOutputPlugin
36
39
  {
37
40
  @Config("auth_method")
38
41
  @ConfigDefault("\"private_key\"")
39
- public String getAuthMethod();
42
+ AuthMethod getAuthMethod();
40
43
 
41
44
  @Config("service_account_email")
42
45
  @ConfigDefault("null")
43
- public Optional<String> getServiceAccountEmail();
46
+ Optional<String> getServiceAccountEmail();
44
47
 
48
+ // kept for backward compatibility
45
49
  @Config("p12_keyfile_path")
46
50
  @ConfigDefault("null")
47
- public Optional<String> getP12KeyfilePath();
51
+ Optional<String> getP12KeyfilePath();
52
+
53
+ @Config("p12_keyfile")
54
+ @ConfigDefault("null")
55
+ Optional<LocalFile> getP12Keyfile();
56
+ void setP12Keyfile(Optional<LocalFile> p12Keyfile);
48
57
 
49
58
  @Config("application_name")
50
59
  @ConfigDefault("\"Embulk BigQuery plugin\"")
51
- public String getApplicationName();
60
+ String getApplicationName();
52
61
 
53
62
  @Config("path_prefix")
54
- public String getPathPrefix();
63
+ String getPathPrefix();
55
64
 
56
65
  @Config("sequence_format")
57
66
  @ConfigDefault("\".%03d.%02d\"")
58
- public String getSequenceFormat();
67
+ String getSequenceFormat();
59
68
 
60
69
  @Config("file_ext")
61
- public String getFileNameExtension();
70
+ String getFileNameExtension();
62
71
 
63
72
  @Config("source_format")
64
73
  @ConfigDefault("\"CSV\"")
65
- public String getSourceFormat();
74
+ SourceFormat getSourceFormat();
66
75
 
67
76
  @Config("field_delimiter")
68
77
  @ConfigDefault("\",\"")
69
- public String getFieldDelimiter();
78
+ char getFieldDelimiter();
70
79
 
71
80
  @Config("max_bad_records")
72
81
  @ConfigDefault("0")
73
- public int getMaxBadrecords();
82
+ int getMaxBadrecords();
74
83
 
75
84
  @Config("encoding")
76
85
  @ConfigDefault("\"UTF-8\"")
77
- public String getEncoding();
86
+ Charset getEncoding();
78
87
 
79
88
  @Config("delete_from_local_when_job_end")
80
89
  @ConfigDefault("false")
81
- public boolean getDeleteFromLocalWhenJobEnd();
90
+ boolean getDeleteFromLocalWhenJobEnd();
82
91
 
83
92
  @Config("project")
84
- public String getProject();
93
+ String getProject();
85
94
 
86
95
  @Config("dataset")
87
- public String getDataset();
96
+ String getDataset();
88
97
 
89
98
  @Config("table")
90
- public String getTable();
99
+ String getTable();
91
100
 
92
101
  @Config("auto_create_table")
93
102
  @ConfigDefault("false")
94
- public boolean getAutoCreateTable();
103
+ boolean getAutoCreateTable();
95
104
 
105
+ // kept for backward compatibility
96
106
  @Config("schema_path")
97
107
  @ConfigDefault("null")
98
- public Optional<String> getSchemaPath();
108
+ Optional<String> getSchemaPath();
109
+
110
+ @Config("schema_file")
111
+ @ConfigDefault("null")
112
+ Optional<LocalFile> getSchemaFile();
113
+ void setSchemaFile(Optional<LocalFile> schemaFile);
99
114
 
100
115
  @Config("prevent_duplicate_insert")
101
116
  @ConfigDefault("false")
102
- public boolean getPreventDuplicateInsert();
117
+ boolean getPreventDuplicateInsert();
103
118
 
104
119
  @Config("job_status_max_polling_time")
105
120
  @ConfigDefault("3600")
106
- public int getJobStatusMaxPollingTime();
121
+ int getJobStatusMaxPollingTime();
107
122
 
108
123
  @Config("job_status_polling_interval")
109
124
  @ConfigDefault("10")
110
- public int getJobStatusPollingInterval();
125
+ int getJobStatusPollingInterval();
111
126
 
112
127
  @Config("is_skip_job_result_check")
113
128
  @ConfigDefault("false")
114
- public boolean getIsSkipJobResultCheck();
129
+ boolean getIsSkipJobResultCheck();
115
130
 
116
131
  @Config("ignore_unknown_values")
117
132
  @ConfigDefault("false")
118
- public boolean getIgnoreUnknownValues();
133
+ boolean getIgnoreUnknownValues();
119
134
 
120
135
  @Config("allow_quoted_newlines")
121
136
  @ConfigDefault("false")
122
- public boolean getAllowQuotedNewlines();
137
+ boolean getAllowQuotedNewlines();
123
138
  }
124
139
 
125
140
  private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
126
141
  private static BigqueryWriter bigQueryWriter;
127
142
 
143
+ @Override
128
144
  public ConfigDiff transaction(ConfigSource config, int taskCount,
129
145
  FileOutputPlugin.Control control)
130
146
  {
131
147
  final PluginTask task = config.loadConfig(PluginTask.class);
132
148
 
149
+ if (task.getP12KeyfilePath().isPresent()) {
150
+ if (task.getP12Keyfile().isPresent()) {
151
+ throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
152
+ }
153
+ try {
154
+ task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfilePath().get())));
155
+ } catch (IOException ex) {
156
+ throw Throwables.propagate(ex);
157
+ }
158
+ }
159
+
160
+ if (task.getSchemaPath().isPresent()) {
161
+ if (task.getSchemaFile().isPresent()) {
162
+ throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
163
+ }
164
+ try {
165
+ task.setSchemaFile(Optional.of(LocalFile.of(task.getSchemaPath().get())));
166
+ } catch (IOException ex) {
167
+ throw Throwables.propagate(ex);
168
+ }
169
+ }
170
+
133
171
  try {
134
- bigQueryWriter = new BigqueryWriter.Builder(task.getAuthMethod())
135
- .setServiceAccountEmail(task.getServiceAccountEmail())
136
- .setP12KeyFilePath(task.getP12KeyfilePath())
137
- .setApplicationName(task.getApplicationName())
138
- .setProject(task.getProject())
139
- .setDataset(task.getDataset())
140
- .setTable(generateTableName(task.getTable()))
172
+ bigQueryWriter = new BigqueryWriter.Builder (
173
+ task.getAuthMethod().getString(),
174
+ task.getServiceAccountEmail(),
175
+ task.getP12Keyfile().transform(localFileToPathString()),
176
+ task.getApplicationName())
141
177
  .setAutoCreateTable(task.getAutoCreateTable())
142
- .setSchemaPath(task.getSchemaPath())
143
- .setSourceFormat(task.getSourceFormat())
144
- .setFieldDelimiter(task.getFieldDelimiter())
145
- .setMaxBadrecords(task.getMaxBadrecords())
146
- .setEncoding(task.getEncoding())
178
+ .setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
179
+ .setSourceFormat(task.getSourceFormat().getString())
180
+ .setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
181
+ .setMaxBadRecords(task.getMaxBadrecords())
182
+ .setEncoding(String.valueOf(task.getEncoding()))
147
183
  .setPreventDuplicateInsert(task.getPreventDuplicateInsert())
148
184
  .setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
149
185
  .setJobStatusPollingInterval(task.getJobStatusPollingInterval())
@@ -151,8 +187,9 @@ public class BigqueryOutputPlugin
151
187
  .setIgnoreUnknownValues(task.getIgnoreUnknownValues())
152
188
  .setAllowQuotedNewlines(task.getAllowQuotedNewlines())
153
189
  .build();
154
- } catch (FileNotFoundException ex) {
155
- throw new ConfigException(ex);
190
+
191
+ bigQueryWriter.checkConfig(task.getProject(), task.getDataset(), task.getTable());
192
+
156
193
  } catch (IOException | GeneralSecurityException ex) {
157
194
  throw new ConfigException(ex);
158
195
  }
@@ -160,6 +197,7 @@ public class BigqueryOutputPlugin
160
197
  return resume(task.dump(), taskCount, control);
161
198
  }
162
199
 
200
+ @Override
163
201
  public ConfigDiff resume(TaskSource taskSource,
164
202
  int taskCount,
165
203
  FileOutputPlugin.Control control)
@@ -176,6 +214,17 @@ public class BigqueryOutputPlugin
176
214
  {
177
215
  }
178
216
 
217
+ private Function<LocalFile, String> localFileToPathString()
218
+ {
219
+ return new Function<LocalFile, String>()
220
+ {
221
+ public String apply(LocalFile file)
222
+ {
223
+ return file.getPath().toString();
224
+ }
225
+ };
226
+ }
227
+
179
228
  @Override
180
229
  public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
181
230
  {
@@ -186,6 +235,11 @@ public class BigqueryOutputPlugin
186
235
  final String pathSuffix = task.getFileNameExtension();
187
236
 
188
237
  return new TransactionalFileOutput() {
238
+ private final String project = task.getProject();
239
+ private final String dataset = task.getDataset();
240
+ private final String table = generateTableName(task.getTable());
241
+ private final boolean deleteFromLocalWhenJobEnd = task.getDeleteFromLocalWhenJobEnd();
242
+
189
243
  private int fileIndex = 0;
190
244
  private BufferedOutputStream output = null;
191
245
  private File file;
@@ -243,9 +297,9 @@ public class BigqueryOutputPlugin
243
297
  closeFile();
244
298
  if (filePath != null) {
245
299
  try {
246
- bigQueryWriter.executeLoad(filePath);
300
+ bigQueryWriter.executeLoad(project, dataset, table, filePath);
247
301
 
248
- if (task.getDeleteFromLocalWhenJobEnd()) {
302
+ if (deleteFromLocalWhenJobEnd) {
249
303
  log.info(String.format("Delete local file [%s]", filePath));
250
304
  file.delete();
251
305
  }
@@ -281,4 +335,40 @@ public class BigqueryOutputPlugin
281
335
 
282
336
  return result.toString();
283
337
  }
284
- }
338
+
339
+ public enum SourceFormat
340
+ {
341
+ CSV("CSV"),
342
+ NEWLINE_DELIMITED_JSON("NEWLINE_DELIMITED_JSON");
343
+
344
+ private final String string;
345
+
346
+ SourceFormat(String string)
347
+ {
348
+ this.string = string;
349
+ }
350
+
351
+ public String getString()
352
+ {
353
+ return string;
354
+ }
355
+ }
356
+
357
+ public enum AuthMethod
358
+ {
359
+ private_key("private_key"),
360
+ compute_engine("compute_engine");
361
+
362
+ private final String string;
363
+
364
+ AuthMethod(String string)
365
+ {
366
+ this.string = string;
367
+ }
368
+
369
+ public String getString()
370
+ {
371
+ return string;
372
+ }
373
+ }
374
+ }
@@ -6,17 +6,17 @@ import java.io.FileNotFoundException;
6
6
  import java.io.FileInputStream;
7
7
  import java.io.BufferedInputStream;
8
8
  import com.google.api.client.http.InputStreamContent;
9
+
9
10
  import java.security.MessageDigest;
10
11
  import java.security.NoSuchAlgorithmException;
11
12
  import java.util.List;
12
13
  import java.util.concurrent.TimeoutException;
13
14
  import com.google.common.base.Optional;
14
- import com.google.api.client.util.Base64;
15
- import com.google.common.base.Throwables;
16
15
  import java.security.GeneralSecurityException;
17
16
  import com.fasterxml.jackson.databind.ObjectMapper;
18
17
  import com.fasterxml.jackson.core.type.TypeReference;
19
18
 
19
+ import com.google.common.collect.ImmutableList;
20
20
  import org.apache.commons.codec.binary.Hex;
21
21
  import org.embulk.spi.Exec;
22
22
  import org.slf4j.Logger;
@@ -40,17 +40,13 @@ import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
40
40
 
41
41
  public class BigqueryWriter
42
42
  {
43
-
44
43
  private final Logger log = Exec.getLogger(BigqueryWriter.class);
45
- private final String project;
46
- private final String dataset;
47
- private final String table;
48
44
  private final boolean autoCreateTable;
49
45
  private final Optional<String> schemaPath;
50
46
  private final TableSchema tableSchema;
51
47
  private final String sourceFormat;
52
48
  private final String fieldDelimiter;
53
- private final int maxBadrecords;
49
+ private final int maxBadRecords;
54
50
  private final String encoding;
55
51
  private final boolean preventDuplicateInsert;
56
52
  private final long jobStatusMaxPollingTime;
@@ -60,16 +56,14 @@ public class BigqueryWriter
60
56
  private final boolean allowQuotedNewlines;
61
57
  private final Bigquery bigQueryClient;
62
58
 
63
- public BigqueryWriter(Builder builder) throws FileNotFoundException, IOException, GeneralSecurityException
59
+ public BigqueryWriter(Builder builder)
60
+ throws IOException, GeneralSecurityException
64
61
  {
65
- this.project = builder.project;
66
- this.dataset = builder.dataset;
67
- this.table = builder.table;
68
62
  this.autoCreateTable = builder.autoCreateTable;
69
63
  this.schemaPath = builder.schemaPath;
70
64
  this.sourceFormat = builder.sourceFormat.toUpperCase();
71
65
  this.fieldDelimiter = builder.fieldDelimiter;
72
- this.maxBadrecords = builder.maxBadrecords;
66
+ this.maxBadRecords = builder.maxBadRecords;
73
67
  this.encoding = builder.encoding.toUpperCase();
74
68
  this.preventDuplicateInsert = builder.preventDuplicateInsert;
75
69
  this.jobStatusMaxPollingTime = builder.jobStatusMaxPollingTime;
@@ -81,15 +75,14 @@ public class BigqueryWriter
81
75
  BigqueryAuthentication auth = new BigqueryAuthentication(builder.authMethod, builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
82
76
  this.bigQueryClient = auth.getBigqueryClient();
83
77
 
84
- checkConfig();
85
78
  if (autoCreateTable) {
86
- this.tableSchema = createTableSchema(builder.schemaPath);
79
+ this.tableSchema = createTableSchema();
87
80
  } else {
88
81
  this.tableSchema = null;
89
82
  }
90
83
  }
91
84
 
92
- private String getJobStatus(JobReference jobRef) throws JobFailedException
85
+ private String getJobStatus(String project, JobReference jobRef) throws JobFailedException
93
86
  {
94
87
  try {
95
88
  Job job = bigQueryClient.jobs().get(project, jobRef.getJobId()).execute();
@@ -108,7 +101,6 @@ public class BigqueryWriter
108
101
  String jobStatus = job.getStatus().getState();
109
102
  if (jobStatus.equals("DONE")) {
110
103
  JobStatistics statistics = job.getStatistics();
111
- //log.info(String.format("Job end. create:[%s] end:[%s]", statistics.getCreationTime(), statistics.getEndTime()));
112
104
  log.info(String.format("Job statistics [%s]", statistics.getLoad()));
113
105
  }
114
106
  return jobStatus;
@@ -118,14 +110,14 @@ public class BigqueryWriter
118
110
  }
119
111
  }
120
112
 
121
- private void getJobStatusUntilDone(JobReference jobRef) throws TimeoutException, JobFailedException
113
+ private void getJobStatusUntilDone(String project, JobReference jobRef) throws TimeoutException, JobFailedException
122
114
  {
123
115
  long startTime = System.currentTimeMillis();
124
116
  long elapsedTime;
125
117
 
126
118
  try {
127
119
  while (true) {
128
- String jobStatus = getJobStatus(jobRef);
120
+ String jobStatus = getJobStatus(project, jobRef);
129
121
  elapsedTime = System.currentTimeMillis() - startTime;
130
122
  if (jobStatus.equals("DONE")) {
131
123
  log.info(String.format("Job completed successfully. job id:[%s] elapsed_time:%dms status:[%s]", jobRef.getJobId(), elapsedTime, "SUCCESS"));
@@ -142,44 +134,28 @@ public class BigqueryWriter
142
134
  }
143
135
  }
144
136
 
145
- public void executeLoad(String localFilePath) throws GoogleJsonResponseException, NoSuchAlgorithmException,
146
- TimeoutException, JobFailedException, IOException
137
+ public void executeLoad(String project, String dataset, String table, String localFilePath)
138
+ throws NoSuchAlgorithmException, TimeoutException, JobFailedException, IOException
147
139
  {
148
140
  log.info(String.format("Job preparing... project:%s dataset:%s table:%s", project, dataset, table));
149
141
 
150
142
  Job job = new Job();
151
143
  JobReference jobRef = new JobReference();
152
- JobConfiguration jobConfig = new JobConfiguration();
153
- JobConfigurationLoad loadConfig = new JobConfigurationLoad();
154
- jobConfig.setLoad(loadConfig);
144
+ JobConfiguration jobConfig = new JobConfiguration().setLoad(setLoadConfig(project, dataset, table));
155
145
  job.setConfiguration(jobConfig);
156
146
 
157
147
  if (preventDuplicateInsert) {
158
- String jobId = createJobId(localFilePath);
148
+ ImmutableList<String> elements = ImmutableList.of(
149
+ getLocalMd5hash(localFilePath), dataset, table,
150
+ String.valueOf(tableSchema), sourceFormat, fieldDelimiter, String.valueOf(maxBadRecords),
151
+ encoding, String.valueOf(ignoreUnknownValues), String.valueOf(allowQuotedNewlines)
152
+ );
153
+ String jobId = createJobId(elements);
154
+
159
155
  jobRef.setJobId(jobId);
160
156
  job.setJobReference(jobRef);
161
157
  }
162
158
 
163
- loadConfig.setAllowQuotedNewlines(allowQuotedNewlines);
164
- loadConfig.setEncoding(encoding);
165
- loadConfig.setMaxBadRecords(maxBadrecords);
166
- if (sourceFormat.equals("NEWLINE_DELIMITED_JSON")) {
167
- loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
168
- } else {
169
- loadConfig.setFieldDelimiter(fieldDelimiter);
170
- }
171
- loadConfig.setWriteDisposition("WRITE_APPEND");
172
- if (autoCreateTable) {
173
- loadConfig.setSchema(tableSchema);
174
- loadConfig.setCreateDisposition("CREATE_IF_NEEDED");
175
- log.info(String.format("table:[%s] will be create if not exists", table));
176
- } else {
177
- loadConfig.setCreateDisposition("CREATE_NEVER");
178
- }
179
- loadConfig.setIgnoreUnknownValues(ignoreUnknownValues);
180
-
181
- loadConfig.setDestinationTable(createTableReference());
182
-
183
159
  File file = new File(localFilePath);
184
160
  InputStreamContent mediaContent = new InputStreamContent("application/octet-stream",
185
161
  new BufferedInputStream(
@@ -206,31 +182,52 @@ public class BigqueryWriter
206
182
  if (isSkipJobResultCheck) {
207
183
  log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
208
184
  } else {
209
- getJobStatusUntilDone(jobRef);
185
+ getJobStatusUntilDone(project, jobRef);
210
186
  }
211
187
  }
212
188
 
213
- private String createJobId(String localFilePath) throws NoSuchAlgorithmException, IOException
189
+ private JobConfigurationLoad setLoadConfig(String project, String dataset, String table)
190
+ {
191
+ JobConfigurationLoad config = new JobConfigurationLoad();
192
+ config.setAllowQuotedNewlines(allowQuotedNewlines)
193
+ .setEncoding(encoding)
194
+ .setMaxBadRecords(maxBadRecords)
195
+ .setSourceFormat(sourceFormat)
196
+ .setIgnoreUnknownValues(ignoreUnknownValues)
197
+ .setDestinationTable(createTableReference(project, dataset, table))
198
+ .setWriteDisposition("WRITE_APPEND");
199
+
200
+ if (sourceFormat.equals("CSV")) {
201
+ config.setFieldDelimiter(String.valueOf(fieldDelimiter));
202
+ }
203
+ if (autoCreateTable) {
204
+ config.setSchema(tableSchema);
205
+ config.setCreateDisposition("CREATE_IF_NEEDED");
206
+ log.info(String.format("table:[%s] will be create if not exists", table));
207
+ } else {
208
+ config.setCreateDisposition("CREATE_NEVER");
209
+ }
210
+ return config;
211
+ }
212
+
213
+ private String createJobId(ImmutableList<String> elements) throws NoSuchAlgorithmException, IOException
214
214
  {
215
215
  StringBuilder sb = new StringBuilder();
216
- sb.append(getLocalMd5hash(localFilePath));
217
- sb.append(dataset);
218
- sb.append(table);
219
- sb.append(tableSchema);
220
- sb.append(sourceFormat);
221
- sb.append(fieldDelimiter);
222
- sb.append(maxBadrecords);
223
- sb.append(encoding);
224
- sb.append(ignoreUnknownValues);
216
+ for (String element : elements) {
217
+ sb.append(element);
218
+ }
225
219
 
226
220
  MessageDigest md = MessageDigest.getInstance("MD5");
227
- String str = new String(sb);
228
- byte[] digest = md.digest(str.getBytes());
221
+ byte[] digest = md.digest(new String(sb).getBytes());
229
222
  String hash = new String(Hex.encodeHex(digest));
230
- return "embulk_job_" + hash;
223
+
224
+ StringBuilder jobId = new StringBuilder();
225
+ jobId.append("embulk_job_");
226
+ jobId.append(hash);
227
+ return jobId.toString();
231
228
  }
232
229
 
233
- private TableReference createTableReference()
230
+ private TableReference createTableReference(String project, String dataset, String table)
234
231
  {
235
232
  return new TableReference()
236
233
  .setProjectId(project)
@@ -238,7 +235,7 @@ public class BigqueryWriter
238
235
  .setTableId(table);
239
236
  }
240
237
 
241
- private TableSchema createTableSchema(Optional<String> schemaPath) throws FileNotFoundException, IOException
238
+ public TableSchema createTableSchema() throws IOException
242
239
  {
243
240
  String path = schemaPath.orNull();
244
241
  File file = new File(path);
@@ -247,8 +244,7 @@ public class BigqueryWriter
247
244
  stream = new FileInputStream(file);
248
245
  ObjectMapper mapper = new ObjectMapper();
249
246
  List<TableFieldSchema> fields = mapper.readValue(stream, new TypeReference<List<TableFieldSchema>>() {});
250
- TableSchema tableSchema = new TableSchema().setFields(fields);
251
- return tableSchema;
247
+ return new TableSchema().setFields(fields);
252
248
  } finally {
253
249
  if (stream != null) {
254
250
  stream.close();
@@ -256,22 +252,22 @@ public class BigqueryWriter
256
252
  }
257
253
  }
258
254
 
259
- public boolean isExistTable(String tableName) throws IOException
255
+ public boolean isExistTable(String project, String dataset, String table) throws IOException
260
256
  {
261
257
  Tables tableRequest = bigQueryClient.tables();
262
258
  try {
263
- Table tableData = tableRequest.get(project, dataset, tableName).execute();
259
+ Table tableData = tableRequest.get(project, dataset, table).execute();
264
260
  } catch (GoogleJsonResponseException ex) {
265
261
  return false;
266
262
  }
267
263
  return true;
268
264
  }
269
265
 
270
- public void checkConfig() throws FileNotFoundException, IOException
266
+ public void checkConfig(String project, String dataset, String table) throws IOException
271
267
  {
272
268
  if (autoCreateTable) {
273
269
  if (!schemaPath.isPresent()) {
274
- throw new FileNotFoundException("schema_path is empty");
270
+ throw new FileNotFoundException("schema_file is empty");
275
271
  } else {
276
272
  File file = new File(schemaPath.orNull());
277
273
  if (!file.exists()) {
@@ -279,7 +275,7 @@ public class BigqueryWriter
279
275
  }
280
276
  }
281
277
  } else {
282
- if (!isExistTable(table)) {
278
+ if (!isExistTable(project, dataset, table)) {
283
279
  throw new IOException(String.format("table [%s] is not exists", table));
284
280
  }
285
281
  }
@@ -341,14 +337,11 @@ public class BigqueryWriter
341
337
  private Optional<String> serviceAccountEmail;
342
338
  private Optional<String> p12KeyFilePath;
343
339
  private String applicationName;
344
- private String project;
345
- private String dataset;
346
- private String table;
347
340
  private boolean autoCreateTable;
348
341
  private Optional<String> schemaPath;
349
342
  private String sourceFormat;
350
343
  private String fieldDelimiter;
351
- private int maxBadrecords;
344
+ private int maxBadRecords;
352
345
  private String encoding;
353
346
  private boolean preventDuplicateInsert;
354
347
  private int jobStatusMaxPollingTime;
@@ -357,45 +350,12 @@ public class BigqueryWriter
357
350
  private boolean ignoreUnknownValues;
358
351
  private boolean allowQuotedNewlines;
359
352
 
360
- public Builder(String authMethod)
353
+ public Builder(String authMethod, Optional<String> serviceAccountEmail, Optional<String> p12KeyFilePath, String applicationName)
361
354
  {
362
355
  this.authMethod = authMethod;
363
- }
364
-
365
- public Builder setServiceAccountEmail(Optional<String> serviceAccountEmail)
366
- {
367
356
  this.serviceAccountEmail = serviceAccountEmail;
368
- return this;
369
- }
370
-
371
- public Builder setP12KeyFilePath(Optional<String> p12KeyFilePath)
372
- {
373
357
  this.p12KeyFilePath = p12KeyFilePath;
374
- return this;
375
- }
376
-
377
- public Builder setApplicationName(String applicationName)
378
- {
379
358
  this.applicationName = applicationName;
380
- return this;
381
- }
382
-
383
- public Builder setProject(String project)
384
- {
385
- this.project = project;
386
- return this;
387
- }
388
-
389
- public Builder setDataset(String dataset)
390
- {
391
- this.dataset = dataset;
392
- return this;
393
- }
394
-
395
- public Builder setTable(String table)
396
- {
397
- this.table = table;
398
- return this;
399
359
  }
400
360
 
401
361
  public Builder setAutoCreateTable(boolean autoCreateTable)
@@ -422,9 +382,9 @@ public class BigqueryWriter
422
382
  return this;
423
383
  }
424
384
 
425
- public Builder setMaxBadrecords(int maxBadrecords)
385
+ public Builder setMaxBadRecords(int maxBadRecords)
426
386
  {
427
- this.maxBadrecords = maxBadrecords;
387
+ this.maxBadRecords = maxBadRecords;
428
388
  return this;
429
389
  }
430
390
 
@@ -482,4 +442,4 @@ public class BigqueryWriter
482
442
  super(message);
483
443
  }
484
444
  }
485
- }
445
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-20 00:00:00.000000000 Z
11
+ date: 2015-08-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,7 +62,7 @@ files:
62
62
  - src/test/java/org/embulk/output/TestBigqueryWriter.java
63
63
  - classpath/commons-codec-1.3.jar
64
64
  - classpath/commons-logging-1.1.1.jar
65
- - classpath/embulk-output-bigquery-0.1.7.jar
65
+ - classpath/embulk-output-bigquery-0.1.8.jar
66
66
  - classpath/google-api-client-1.20.0.jar
67
67
  - classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
68
68
  - classpath/google-http-client-1.20.0.jar
@@ -72,7 +72,7 @@ files:
72
72
  - classpath/httpclient-4.0.1.jar
73
73
  - classpath/httpcore-4.0.1.jar
74
74
  - classpath/jsr305-1.3.9.jar
75
- homepage: https://github.com/sakama/embulk-output-bigquery
75
+ homepage: https://github.com/embulk/embulk-output-bigquery
76
76
  licenses:
77
77
  - Apache-2.0
78
78
  metadata: {}