embulk-output-bigquery 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 10152df2aff8af2668f03c311bd27f7190ce0bc9
4
- data.tar.gz: d71a05b7ad0bc501a473e77220d21ddbda6b8397
3
+ metadata.gz: a814af5998dfc7f29f8cac8d06b49ec0b6713ff6
4
+ data.tar.gz: 0381957abca63f41d93c768c1b1f0f1ac62a4339
5
5
  SHA512:
6
- metadata.gz: 95492636456277841a59bd816c8268c48cbb706281d3d6aa1d1a8d7ebd19ba23deec81de6af1ae04fb1b9234f5abde6617bed195b787fb4af219eb9c167c0466
7
- data.tar.gz: cc3631f691a474d78e9d0ee0e68ce263a7cc6a5d691f5fd036a85aec7d94cf792c608016ff5bce721d99236b76d1a2d5f3d483f827ebe456944bbdb8d00a3e40
6
+ metadata.gz: ab1ee4e8bf996e9540c69352881449b29e1818a5e4d23babb8df6f47fe08c1cadd80ec373ed271c6e919ebd13f009bdba56e3d13ea2aa27085ef8e45e7f16c8b
7
+ data.tar.gz: 1274ac1eed4de4d046229a724a718d32e33a8c16d17844ffb4a4f94e4dc97259d5f6902222a6c34d4bba1525a6e184070d84f05bb192cbccc8b72c4166287b84
data/README.md CHANGED
@@ -24,28 +24,39 @@ OAuth flow for installed applications.
24
24
 
25
25
  ## Configuration
26
26
 
27
- - **auth_method**: (private_key or compute_engine) (string, optional, default is private_key)
28
- - **service_account_email**: your Google service account email (string, required when auth_method is private_key)
29
- - **p12_keyfile_path**: fullpath of private key in P12(PKCS12) format (string, required when auth_method is private_key)
30
- - **path_prefix**: (string, required)
31
- - **sequence_format**: (string, optional, default is %03d.%02d)
32
- - **file_ext**: (string, required)
33
- - **source_format**: file type (NEWLINE_DELIMITED_JSON or CSV) (string, required, default is CSV)
34
- - **project**: project_id (string, required)
35
- - **dataset**: dataset (string, required)
36
- - **table**: table name (string, required)
37
- - **auto_create_table**: (boolean, optional default is 0)
38
- - **schema_path**: (string, optional)
39
- - **prevent_duplicate_insert**: (boolean, optional default is 0)
40
- - **application_name**: application name anything you like (string, optional)
41
- - **delete_from_local_when_job_end**: (boolean, optional, default is 0)
42
- - **job_status_max_polling_time**: max job status polling time. (int, optional, default is 3600 sec)
43
- - **job_status_polling_interval**: job status polling interval. (int, optional, default is 10 sec)
44
- - **is_skip_job_result_check**: (boolean, optional, default is 0)
45
- - **field_delimiter**: (string, optional, default is ",")
46
- - **max_bad_records**: (int, optional, default is 0)
47
- - **encoding**: (UTF-8 or ISO-8859-1) (string, optional, default is UTF-8)
48
- - **ignore_unknown_values**: (boolean, optional, default is 0)
27
+ #### Original options
28
+
29
+ | name | type | required? | default | description |
30
+ |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
31
+ | auth_method | string | optional | "private_key" | `private_key` or `compute_engine`
32
+ | service_account_email | string | required when auth_method is private_key | | Your Google service account email
33
+ | p12_keyfile_path | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
34
+ | sequence_format | string | optional | %03d.%02d | |
35
+ | file_ext | string | optional | | e.g. ".csv.gz" ".json.gz" |
36
+ | project | string | required | | project_id |
37
+ | dataset | string | required | | dataset |
38
+ | table | string | required | | table name |
39
+ | auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
40
+ | schema_path | string | optional | | /path/to/schema.json |
41
+ | prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
42
+ | delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
43
+ | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
44
+ | job_status_max_polling_time | int | optional | 10 sec | Job status polling interval |
45
+ | is_skip_job_result_check | boolean | optional | 0 | |
46
+ | application_name | string | optional | "Embulk BigQuery plugin" | Anything you like |
47
+
48
+ #### Same options of bq command-line tools or BigQuery job's propery
49
+
50
+ Following options are same as [bq command-line tools](https://cloud.google.com/bigquery/bq-command-line-tool#creatingtablefromfile) or BigQuery [job's property](https://cloud.google.com/bigquery/docs/reference/v2/jobs#resource).
51
+
52
+ | name | type | required? | default | description |
53
+ |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
54
+ | source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
55
+ | max_bad_records | int | optional | 0 | |
56
+ | field_delimiter | string | optional | "," | |
57
+ | encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
58
+ | ignore_unknown_values | boolean | optional | 0 | |
59
+ | allow_quoted_newlines | boolean | optional | 0 | Set true, if data contains newline characters. It may cause slow procsssing |
49
60
 
50
61
  ### Example
51
62
 
data/build.gradle CHANGED
@@ -15,11 +15,11 @@ configurations {
15
15
  sourceCompatibility = 1.7
16
16
  targetCompatibility = 1.7
17
17
 
18
- version = "0.1.6"
18
+ version = "0.1.7"
19
19
 
20
20
  dependencies {
21
- compile "org.embulk:embulk-core:0.5.1"
22
- provided "org.embulk:embulk-core:0.5.1"
21
+ compile "org.embulk:embulk-core:0.6.8"
22
+ provided "org.embulk:embulk-core:0.6.8"
23
23
 
24
24
  compile "com.google.http-client:google-http-client-jackson2:1.20.0"
25
25
  compile "com.google.apis:google-api-services-bigquery:v2-rev205-1.20.0"
@@ -116,6 +116,10 @@ public class BigqueryOutputPlugin
116
116
  @Config("ignore_unknown_values")
117
117
  @ConfigDefault("false")
118
118
  public boolean getIgnoreUnknownValues();
119
+
120
+ @Config("allow_quoted_newlines")
121
+ @ConfigDefault("false")
122
+ public boolean getAllowQuotedNewlines();
119
123
  }
120
124
 
121
125
  private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
@@ -145,6 +149,7 @@ public class BigqueryOutputPlugin
145
149
  .setJobStatusPollingInterval(task.getJobStatusPollingInterval())
146
150
  .setIsSkipJobResultCheck(task.getIsSkipJobResultCheck())
147
151
  .setIgnoreUnknownValues(task.getIgnoreUnknownValues())
152
+ .setAllowQuotedNewlines(task.getAllowQuotedNewlines())
148
153
  .build();
149
154
  } catch (FileNotFoundException ex) {
150
155
  throw new ConfigException(ex);
@@ -57,6 +57,7 @@ public class BigqueryWriter
57
57
  private final long jobStatusPollingInterval;
58
58
  private final boolean isSkipJobResultCheck;
59
59
  private final boolean ignoreUnknownValues;
60
+ private final boolean allowQuotedNewlines;
60
61
  private final Bigquery bigQueryClient;
61
62
 
62
63
  public BigqueryWriter(Builder builder) throws FileNotFoundException, IOException, GeneralSecurityException
@@ -75,6 +76,7 @@ public class BigqueryWriter
75
76
  this.jobStatusPollingInterval = builder.jobStatusPollingInterval;
76
77
  this.isSkipJobResultCheck = builder.isSkipJobResultCheck;
77
78
  this.ignoreUnknownValues = builder.ignoreUnknownValues;
79
+ this.allowQuotedNewlines = builder.allowQuotedNewlines;
78
80
 
79
81
  BigqueryAuthentication auth = new BigqueryAuthentication(builder.authMethod, builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
80
82
  this.bigQueryClient = auth.getBigqueryClient();
@@ -158,7 +160,7 @@ public class BigqueryWriter
158
160
  job.setJobReference(jobRef);
159
161
  }
160
162
 
161
- loadConfig.setAllowQuotedNewlines(false);
163
+ loadConfig.setAllowQuotedNewlines(allowQuotedNewlines);
162
164
  loadConfig.setEncoding(encoding);
163
165
  loadConfig.setMaxBadRecords(maxBadrecords);
164
166
  if (sourceFormat.equals("NEWLINE_DELIMITED_JSON")) {
@@ -353,6 +355,7 @@ public class BigqueryWriter
353
355
  private int jobStatusPollingInterval;
354
356
  private boolean isSkipJobResultCheck;
355
357
  private boolean ignoreUnknownValues;
358
+ private boolean allowQuotedNewlines;
356
359
 
357
360
  public Builder(String authMethod)
358
361
  {
@@ -461,6 +464,12 @@ public class BigqueryWriter
461
464
  return this;
462
465
  }
463
466
 
467
+ public Builder setAllowQuotedNewlines(boolean allowQuotedNewlines)
468
+ {
469
+ this.allowQuotedNewlines = allowQuotedNewlines;
470
+ return this;
471
+ }
472
+
464
473
  public BigqueryWriter build() throws IOException, GeneralSecurityException
465
474
  {
466
475
  return new BigqueryWriter(this);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-23 00:00:00.000000000 Z
11
+ date: 2015-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,7 +62,7 @@ files:
62
62
  - src/test/java/org/embulk/output/TestBigqueryWriter.java
63
63
  - classpath/commons-codec-1.3.jar
64
64
  - classpath/commons-logging-1.1.1.jar
65
- - classpath/embulk-output-bigquery-0.1.6.jar
65
+ - classpath/embulk-output-bigquery-0.1.7.jar
66
66
  - classpath/google-api-client-1.20.0.jar
67
67
  - classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
68
68
  - classpath/google-http-client-1.20.0.jar