embulk-input-bigquery_extract_files 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e709d68054ae241b205a52159c85d2eaec65406a
4
- data.tar.gz: 88d85008b319fd53613d347dfaa101ad1a106f36
3
+ metadata.gz: 0728b31117097ca0eef206be33646b0e210a85aa
4
+ data.tar.gz: 1ce4c3bd0eab4385250bfd7a9876866cd7f4f136
5
5
  SHA512:
6
- metadata.gz: 951cf84517c6ffa2785c4a164354f2e3ca96faf0ce3ded183946726a54d6d780bd96245e78befc60ec9c14793bb922a6a526b4574dc6dda67a7bb93ee201b200
7
- data.tar.gz: 7321c8a02fdfe9fc9973f539806eca76c37c4548a12cd55475a36e1421f9b0d82e38941a64f61358f4ac420dab92f5fcbc3d4388be41360c16955b1cff0b2301
6
+ metadata.gz: 123ff54b8d18e9ecefaa1525a74792a98ec56537d5123f3e3a5180b62e92b84454fefe42813420fd4c16b5eddb1c300460918dae787b32291271edc2b28afb4c
7
+ data.tar.gz: 6c3872f92860a27bf42eb8ffc9ad71612c9defbd25f41a82234bb09fad62c5fb87492e6bd8bbf10d8b01ec4c928df4824fd8ffed941a7672b9f3e6b4cdcabd08
data/README.md CHANGED
@@ -55,12 +55,19 @@ embulk gem install embulk-input-bigquery_extract_files
55
55
 
56
56
  - **temp_schema_file_path**: bigquery result schema file for parser. (Optional) (string, default: `null`)
57
57
 
58
- - **temp_schema_file_type**: default is embulk's Schema object. (Optional) (string, default: `null`)
58
+ - **bigquery_job_wait_second**: bigquery job waiting second. (Optional) (string, default: `600`)
59
+
60
+ - **cleanup_gcs_before_executing**: delete all file in gcs temp path before process start (Optional) (string, default: `true`)
61
+
62
+ - **cleanup_gcs_files**: delete all file in gcs temp path after process end (Optional) (string, default: `false`)
63
+
64
+ - **cleanup_temp_table**: delete query result table after process end (Optional) (string, default: `true`)
65
+
66
+ - **cleanup_local_temp_files**: delete all file in local temp dir (Optional) (string, default: `true`)
59
67
 
60
68
  - **decoders**: embulk java-file-input plugin's default attribute. see : http://www.embulk.org/docs/built-in.html#gzip-decoder-plugin
61
69
  - **parser**: embulk java-file-input plugin's default .attribute see : http://www.embulk.org/docs/built-in.html#csv-parser-plugin
62
70
 
63
-
64
71
  ## Example
65
72
 
66
73
  ```yaml
@@ -84,6 +91,51 @@ out:
84
91
  type: stdout
85
92
  ```
86
93
 
94
+ ### Advenced Example
95
+
96
+ #### bigquery to mysql with auto-schema
97
+
98
+ I have to batch bigquery table to mysql every day for my job.
99
+ then, I wan'to get auto-schema for this file input plugin.
100
+
101
+ - see also
102
+ - https://github.com/jo8937/embulk-parser-csv_with_schema_file
103
+ - https://github.com/embulk/embulk-output-jdbc/tree/master/embulk-output-mysql
104
+
105
+ this is my best practive for bigquery to mysql batch config.
106
+
107
+ ```yaml
108
+ in:
109
+ type: bigquery_extract_files
110
+ project: my-google-project
111
+ json_keyfile: /tmp/embulk/google_service_account.json
112
+ query: 'select * from dataset.t_nitocris'
113
+ temp_dataset: temp_dataset
114
+ gcs_uri: gs://bucket/embulktemp/t_nitocris_*
115
+ temp_local_path: /tmp/embulk/data
116
+ file_format: 'CSV'
117
+ compression: 'GZIP'
118
+ temp_schema_file_path: /tmp/embulk/schema/csv_schema_nitocris.json
119
+ decoders:
120
+ - {type: gzip}
121
+ parser:
122
+ type: csv_with_schema_file
123
+ default_timestamp_format: '%Y-%m-%d %H:%M:%S %z'
124
+ schema_path: /tmp/embulk/schema/csv_schema_nitocris.json
125
+ out:
126
+ type: mysql
127
+ host: host
128
+ user: user
129
+ password: password
130
+ port: 3306
131
+ database: MY_DATABASE
132
+ table:
133
+ options: {connectTimeout: 0, waitTimeout: 0, enableQueryTimeouts: false, autoReconnect: true}
134
+ mode: insert_direct
135
+ retry_limit: 60
136
+ retry_wait: 3000
137
+ batch_size: 4096000
138
+ ```
87
139
 
88
140
  ## Build
89
141
 
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.6"
16
+ version = "0.0.7"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
data/config.yml CHANGED
@@ -26,6 +26,7 @@ out:
26
26
  database: dbname
27
27
  table: test_table
28
28
  # https://dev.mysql.com/doc/connector-j/5.1/en/connector-j-reference-configuration-properties.html
29
- options: {connectTimeout: 0, enableQueryTimeouts: false, waitTimeout: 0}
30
- mode: insert_direct
31
-
29
+ options: {connectTimeout: 0, waitTimeout: 0, enableQueryTimeouts: false, waitTimeout: 0, autoReconnect: true}
30
+ mode: insert
31
+ column_options:
32
+ log_id: {type: 'bigint not null PRIMARY KEY'}
@@ -127,12 +127,16 @@ public class BigqueryExportGcsFileInputPlugin
127
127
 
128
128
  @Config("cleanup_gcs_files")
129
129
  @ConfigDefault("false")
130
- public boolean getCleanupGcsTempFile();
130
+ public boolean getCleanupGcsTempFiles();
131
131
 
132
132
  @Config("cleanup_temp_table")
133
133
  @ConfigDefault("true")
134
134
  public boolean getCleanupTempTable();
135
135
 
136
+ @Config("cleanup_local_temp_files")
137
+ @ConfigDefault("true")
138
+ public boolean getCleanupLocalTempFiles();
139
+
136
140
  @Config("cleanup_gcs_before_executing")
137
141
  @ConfigDefault("true")
138
142
  public boolean getCleanupGcsBeforeExcuting();
@@ -328,12 +332,13 @@ public class BigqueryExportGcsFileInputPlugin
328
332
  if( report.isEmpty() ){
329
333
  String file = task.getFiles().get(i);
330
334
 
331
- Path p = BigqueryExportUtils.getFullPath(task,file);
335
+ if(task.getCleanupLocalTempFiles()) {
336
+ Path p = BigqueryExportUtils.getFullPath(task,file);
337
+ log.info("delete temp file...{}",p);
338
+ p.toFile().delete();
339
+ }
332
340
 
333
- log.info("delete temp file...{}",p);
334
- p.toFile().delete();
335
-
336
- if(task.getCleanupGcsTempFile()){
341
+ if(task.getCleanupGcsTempFiles()){
337
342
  BigqueryExportUtils.removeTempGcsFiles(task, file);
338
343
  }
339
344
 
@@ -486,7 +486,7 @@ public class BigqueryExportUtils
486
486
  public static void removeTempGcsFiles(PluginTask task, String file){
487
487
  try {
488
488
  Storage gcs = BigqueryExportUtils.newGcsClient(task);
489
- log.info("delete finish file gs://{}{}", task.getGcsBucket(), file);
489
+ log.info("delete finish file gs://{}/{}", task.getGcsBucket(), file);
490
490
  gcs.objects().delete(task.getGcsBucket(), file).execute();
491
491
  } catch (Exception e) {
492
492
  log.error("# Remove temp gcs file FAIL : " + file,e);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-bigquery_extract_files
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - jo8937
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-21 00:00:00.000000000 Z
11
+ date: 2017-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java
66
66
  - classpath/commons-codec-1.3.jar
67
67
  - classpath/commons-logging-1.1.1.jar
68
- - classpath/embulk-input-bigquery_extract_files-0.0.6.jar
68
+ - classpath/embulk-input-bigquery_extract_files-0.0.7.jar
69
69
  - classpath/google-api-client-1.23.0.jar
70
70
  - classpath/google-api-services-bigquery-v2-rev363-1.23.0.jar
71
71
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar