embulk-input-bigquery_extract_files 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0728b31117097ca0eef206be33646b0e210a85aa
|
4
|
+
data.tar.gz: 1ce4c3bd0eab4385250bfd7a9876866cd7f4f136
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 123ff54b8d18e9ecefaa1525a74792a98ec56537d5123f3e3a5180b62e92b84454fefe42813420fd4c16b5eddb1c300460918dae787b32291271edc2b28afb4c
|
7
|
+
data.tar.gz: 6c3872f92860a27bf42eb8ffc9ad71612c9defbd25f41a82234bb09fad62c5fb87492e6bd8bbf10d8b01ec4c928df4824fd8ffed941a7672b9f3e6b4cdcabd08
|
data/README.md
CHANGED
@@ -55,12 +55,19 @@ embulk gem install embulk-input-bigquery_extract_files
|
|
55
55
|
|
56
56
|
- **temp_schema_file_path**: bigquery result schema file for parser. (Optional) (string, default: `null`)
|
57
57
|
|
58
|
-
- **
|
58
|
+
- **bigquery_job_wait_second**: bigquery job waiting second. (Optional) (string, default: `600`)
|
59
|
+
|
60
|
+
- **cleanup_gcs_before_executing**: delete all file in gcs temp path before process start (Optional) (string, default: `true`)
|
61
|
+
|
62
|
+
- **cleanup_gcs_files**: delete all file in gcs temp path after process end (Optional) (string, default: `false`)
|
63
|
+
|
64
|
+
- **cleanup_temp_table**: delete query result table after process end (Optional) (string, default: `true`)
|
65
|
+
|
66
|
+
- **cleanup_local_temp_files**: delete all file in local temp dir (Optional) (string, default: `true`)
|
59
67
|
|
60
68
|
- **decoders**: embulk java-file-input plugin's default attribute. see : http://www.embulk.org/docs/built-in.html#gzip-decoder-plugin
|
61
69
|
- **parser**: embulk java-file-input plugin's default .attribute see : http://www.embulk.org/docs/built-in.html#csv-parser-plugin
|
62
70
|
|
63
|
-
|
64
71
|
## Example
|
65
72
|
|
66
73
|
```yaml
|
@@ -84,6 +91,51 @@ out:
|
|
84
91
|
type: stdout
|
85
92
|
```
|
86
93
|
|
94
|
+
### Advenced Example
|
95
|
+
|
96
|
+
#### bigquery to mysql with auto-schema
|
97
|
+
|
98
|
+
I have to batch bigquery table to mysql every day for my job.
|
99
|
+
then, I wan'to get auto-schema for this file input plugin.
|
100
|
+
|
101
|
+
- see also
|
102
|
+
- https://github.com/jo8937/embulk-parser-csv_with_schema_file
|
103
|
+
- https://github.com/embulk/embulk-output-jdbc/tree/master/embulk-output-mysql
|
104
|
+
|
105
|
+
this is my best practive for bigquery to mysql batch config.
|
106
|
+
|
107
|
+
```yaml
|
108
|
+
in:
|
109
|
+
type: bigquery_extract_files
|
110
|
+
project: my-google-project
|
111
|
+
json_keyfile: /tmp/embulk/google_service_account.json
|
112
|
+
query: 'select * from dataset.t_nitocris'
|
113
|
+
temp_dataset: temp_dataset
|
114
|
+
gcs_uri: gs://bucket/embulktemp/t_nitocris_*
|
115
|
+
temp_local_path: /tmp/embulk/data
|
116
|
+
file_format: 'CSV'
|
117
|
+
compression: 'GZIP'
|
118
|
+
temp_schema_file_path: /tmp/embulk/schema/csv_schema_nitocris.json
|
119
|
+
decoders:
|
120
|
+
- {type: gzip}
|
121
|
+
parser:
|
122
|
+
type: csv_with_schema_file
|
123
|
+
default_timestamp_format: '%Y-%m-%d %H:%M:%S %z'
|
124
|
+
schema_path: /tmp/embulk/schema/csv_schema_nitocris.json
|
125
|
+
out:
|
126
|
+
type: mysql
|
127
|
+
host: host
|
128
|
+
user: user
|
129
|
+
password: password
|
130
|
+
port: 3306
|
131
|
+
database: MY_DATABASE
|
132
|
+
table:
|
133
|
+
options: {connectTimeout: 0, waitTimeout: 0, enableQueryTimeouts: false, autoReconnect: true}
|
134
|
+
mode: insert_direct
|
135
|
+
retry_limit: 60
|
136
|
+
retry_wait: 3000
|
137
|
+
batch_size: 4096000
|
138
|
+
```
|
87
139
|
|
88
140
|
## Build
|
89
141
|
|
data/build.gradle
CHANGED
data/config.yml
CHANGED
@@ -26,6 +26,7 @@ out:
|
|
26
26
|
database: dbname
|
27
27
|
table: test_table
|
28
28
|
# https://dev.mysql.com/doc/connector-j/5.1/en/connector-j-reference-configuration-properties.html
|
29
|
-
options: {connectTimeout: 0, enableQueryTimeouts: false, waitTimeout: 0}
|
30
|
-
mode:
|
31
|
-
|
29
|
+
options: {connectTimeout: 0, waitTimeout: 0, enableQueryTimeouts: false, waitTimeout: 0, autoReconnect: true}
|
30
|
+
mode: insert
|
31
|
+
column_options:
|
32
|
+
log_id: {type: 'bigint not null PRIMARY KEY'}
|
data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java
CHANGED
@@ -127,12 +127,16 @@ public class BigqueryExportGcsFileInputPlugin
|
|
127
127
|
|
128
128
|
@Config("cleanup_gcs_files")
|
129
129
|
@ConfigDefault("false")
|
130
|
-
public boolean
|
130
|
+
public boolean getCleanupGcsTempFiles();
|
131
131
|
|
132
132
|
@Config("cleanup_temp_table")
|
133
133
|
@ConfigDefault("true")
|
134
134
|
public boolean getCleanupTempTable();
|
135
135
|
|
136
|
+
@Config("cleanup_local_temp_files")
|
137
|
+
@ConfigDefault("true")
|
138
|
+
public boolean getCleanupLocalTempFiles();
|
139
|
+
|
136
140
|
@Config("cleanup_gcs_before_executing")
|
137
141
|
@ConfigDefault("true")
|
138
142
|
public boolean getCleanupGcsBeforeExcuting();
|
@@ -328,12 +332,13 @@ public class BigqueryExportGcsFileInputPlugin
|
|
328
332
|
if( report.isEmpty() ){
|
329
333
|
String file = task.getFiles().get(i);
|
330
334
|
|
331
|
-
|
335
|
+
if(task.getCleanupLocalTempFiles()) {
|
336
|
+
Path p = BigqueryExportUtils.getFullPath(task,file);
|
337
|
+
log.info("delete temp file...{}",p);
|
338
|
+
p.toFile().delete();
|
339
|
+
}
|
332
340
|
|
333
|
-
|
334
|
-
p.toFile().delete();
|
335
|
-
|
336
|
-
if(task.getCleanupGcsTempFile()){
|
341
|
+
if(task.getCleanupGcsTempFiles()){
|
337
342
|
BigqueryExportUtils.removeTempGcsFiles(task, file);
|
338
343
|
}
|
339
344
|
|
@@ -486,7 +486,7 @@ public class BigqueryExportUtils
|
|
486
486
|
public static void removeTempGcsFiles(PluginTask task, String file){
|
487
487
|
try {
|
488
488
|
Storage gcs = BigqueryExportUtils.newGcsClient(task);
|
489
|
-
log.info("delete finish file gs://{}{}", task.getGcsBucket(), file);
|
489
|
+
log.info("delete finish file gs://{}/{}", task.getGcsBucket(), file);
|
490
490
|
gcs.objects().delete(task.getGcsBucket(), file).execute();
|
491
491
|
} catch (Exception e) {
|
492
492
|
log.error("# Remove temp gcs file FAIL : " + file,e);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-bigquery_extract_files
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jo8937
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java
|
66
66
|
- classpath/commons-codec-1.3.jar
|
67
67
|
- classpath/commons-logging-1.1.1.jar
|
68
|
-
- classpath/embulk-input-bigquery_extract_files-0.0.
|
68
|
+
- classpath/embulk-input-bigquery_extract_files-0.0.7.jar
|
69
69
|
- classpath/google-api-client-1.23.0.jar
|
70
70
|
- classpath/google-api-services-bigquery-v2-rev363-1.23.0.jar
|
71
71
|
- classpath/google-api-services-storage-v1-rev59-1.21.0.jar
|