embulk-output-jdbc 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed0f3d732ac1fa2ee1d660480be8587ac137d3f0
4
- data.tar.gz: c534795f00578dc8a30e580f9b34a8252e5cc5d4
3
+ metadata.gz: a3248904e8b007fc46013b4697d939d7dd3f55d8
4
+ data.tar.gz: f3eacb5f104d9ea4f474e14a7c0ea724c9831341
5
5
  SHA512:
6
- metadata.gz: 57157af5fd5be3ceeab92d8dbf36c9f031e2a1ed5bb8f2ae78bf0d91629efdcaf0523de04bbeed799633f40596c4159d8ef589c267b0693d6acd591973b6ee1c
7
- data.tar.gz: cbfa1e7646587f442a233f946bde40fe3525414713727333bbe98720408d71067b7e2e60e88358d9d86ac367ed58d1744f4f876e2e7357f5ae3f1009ac5c3786
6
+ metadata.gz: 5d8f93e2f5f014df77b5675ce58209e1c17a43b02e17a4dbe5fc57348461a90659a5aacd8c61f8f68bba94c08cab0da2facd6989c57b90e8357c810d2c3f84be
7
+ data.tar.gz: 6afcba3953c431e6c0fc4e5733a91c4dff9b4c04af6d5eb04869aeb7b8c2d432f2df8ddc6a96ae725bfeae18cb62f413be206f9dcad2246391de45cbfbe87c5b
@@ -1,2 +1,2 @@
1
- dependencies {
2
- }
1
+ dependencies {
2
+ }
@@ -1,3 +1,3 @@
1
- Embulk::JavaPlugin.register_output(
2
- :jdbc, "org.embulk.output.JdbcOutputPlugin",
3
- File.expand_path('../../../../classpath', __FILE__))
1
+ Embulk::JavaPlugin.register_output(
2
+ :jdbc, "org.embulk.output.JdbcOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -1,120 +1,120 @@
1
- package org.embulk.output;
2
-
3
- import java.util.Properties;
4
- import java.sql.Driver;
5
- import java.io.IOException;
6
- import java.sql.Connection;
7
- import java.sql.SQLException;
8
- import com.google.common.base.Optional;
9
- import com.google.common.base.Throwables;
10
- import org.embulk.config.Config;
11
- import org.embulk.config.ConfigDefault;
12
- import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
13
- import org.embulk.output.jdbc.BatchInsert;
14
- import org.embulk.output.jdbc.StandardBatchInsert;
15
- import org.embulk.output.jdbc.JdbcOutputConnector;
16
- import org.embulk.output.jdbc.JdbcOutputConnection;
17
-
18
- public class JdbcOutputPlugin
19
- extends AbstractJdbcOutputPlugin
20
- {
21
- public interface GenericPluginTask extends PluginTask
22
- {
23
- @Config("driver_path")
24
- @ConfigDefault("null")
25
- public Optional<String> getDriverPath();
26
-
27
- @Config("driver_class")
28
- public String getDriverClass();
29
-
30
- @Config("url")
31
- public String getUrl();
32
-
33
- @Config("user")
34
- @ConfigDefault("null")
35
- public Optional<String> getUser();
36
-
37
- @Config("password")
38
- @ConfigDefault("null")
39
- public Optional<String> getPassword();
40
-
41
- @Config("schema")
42
- @ConfigDefault("null")
43
- public Optional<String> getSchema();
44
- }
45
-
46
- @Override
47
- protected Class<? extends PluginTask> getTaskClass()
48
- {
49
- return GenericPluginTask.class;
50
- }
51
-
52
- @Override
53
- protected GenericOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
54
- {
55
- GenericPluginTask t = (GenericPluginTask) task;
56
-
57
- if (t.getDriverPath().isPresent()) {
58
- loadDriverJar(t.getDriverPath().get());
59
- }
60
-
61
- Properties props = new Properties();
62
- if (t.getUser().isPresent()) {
63
- props.setProperty("user", t.getUser().get());
64
- }
65
- if (t.getPassword().isPresent()) {
66
- props.setProperty("password", t.getPassword().get());
67
- }
68
-
69
- props.putAll(t.getOptions());
70
-
71
- return new GenericOutputConnector(t.getUrl(), props, t.getDriverClass(),
72
- t.getSchema().orNull());
73
- }
74
-
75
- private static class GenericOutputConnector
76
- implements JdbcOutputConnector
77
- {
78
- private final Driver driver;
79
- private final String url;
80
- private final Properties properties;
81
- private final String schemaName;
82
-
83
- public GenericOutputConnector(String url, Properties properties, String driverClass,
84
- String schemaName)
85
- {
86
- try {
87
- // TODO check Class.forName(driverClass) is a Driver before newInstance
88
- // for security
89
- this.driver = (Driver) Class.forName(driverClass).newInstance();
90
- } catch (Exception ex) {
91
- throw Throwables.propagate(ex);
92
- }
93
- this.url = url;
94
- this.properties = properties;
95
- this.schemaName = schemaName;
96
- }
97
-
98
- @Override
99
- public JdbcOutputConnection connect(boolean autoCommit) throws SQLException
100
- {
101
- Connection c = driver.connect(url, properties);
102
- try {
103
- c.setAutoCommit(autoCommit);
104
- JdbcOutputConnection con = new JdbcOutputConnection(c, schemaName);
105
- c = null;
106
- return con;
107
- } finally {
108
- if (c != null) {
109
- c.close();
110
- }
111
- }
112
- }
113
- }
114
-
115
- @Override
116
- protected BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException
117
- {
118
- return new StandardBatchInsert(getConnector(task, true));
119
- }
120
- }
1
+ package org.embulk.output;
2
+
3
+ import java.util.Properties;
4
+ import java.sql.Driver;
5
+ import java.io.IOException;
6
+ import java.sql.Connection;
7
+ import java.sql.SQLException;
8
+ import com.google.common.base.Optional;
9
+ import com.google.common.base.Throwables;
10
+ import org.embulk.config.Config;
11
+ import org.embulk.config.ConfigDefault;
12
+ import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
13
+ import org.embulk.output.jdbc.BatchInsert;
14
+ import org.embulk.output.jdbc.StandardBatchInsert;
15
+ import org.embulk.output.jdbc.JdbcOutputConnector;
16
+ import org.embulk.output.jdbc.JdbcOutputConnection;
17
+
18
+ public class JdbcOutputPlugin
19
+ extends AbstractJdbcOutputPlugin
20
+ {
21
+ public interface GenericPluginTask extends PluginTask
22
+ {
23
+ @Config("driver_path")
24
+ @ConfigDefault("null")
25
+ public Optional<String> getDriverPath();
26
+
27
+ @Config("driver_class")
28
+ public String getDriverClass();
29
+
30
+ @Config("url")
31
+ public String getUrl();
32
+
33
+ @Config("user")
34
+ @ConfigDefault("null")
35
+ public Optional<String> getUser();
36
+
37
+ @Config("password")
38
+ @ConfigDefault("null")
39
+ public Optional<String> getPassword();
40
+
41
+ @Config("schema")
42
+ @ConfigDefault("null")
43
+ public Optional<String> getSchema();
44
+ }
45
+
46
+ @Override
47
+ protected Class<? extends PluginTask> getTaskClass()
48
+ {
49
+ return GenericPluginTask.class;
50
+ }
51
+
52
+ @Override
53
+ protected GenericOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
54
+ {
55
+ GenericPluginTask t = (GenericPluginTask) task;
56
+
57
+ if (t.getDriverPath().isPresent()) {
58
+ loadDriverJar(t.getDriverPath().get());
59
+ }
60
+
61
+ Properties props = new Properties();
62
+ if (t.getUser().isPresent()) {
63
+ props.setProperty("user", t.getUser().get());
64
+ }
65
+ if (t.getPassword().isPresent()) {
66
+ props.setProperty("password", t.getPassword().get());
67
+ }
68
+
69
+ props.putAll(t.getOptions());
70
+
71
+ return new GenericOutputConnector(t.getUrl(), props, t.getDriverClass(),
72
+ t.getSchema().orNull());
73
+ }
74
+
75
+ private static class GenericOutputConnector
76
+ implements JdbcOutputConnector
77
+ {
78
+ private final Driver driver;
79
+ private final String url;
80
+ private final Properties properties;
81
+ private final String schemaName;
82
+
83
+ public GenericOutputConnector(String url, Properties properties, String driverClass,
84
+ String schemaName)
85
+ {
86
+ try {
87
+ // TODO check Class.forName(driverClass) is a Driver before newInstance
88
+ // for security
89
+ this.driver = (Driver) Class.forName(driverClass).newInstance();
90
+ } catch (Exception ex) {
91
+ throw Throwables.propagate(ex);
92
+ }
93
+ this.url = url;
94
+ this.properties = properties;
95
+ this.schemaName = schemaName;
96
+ }
97
+
98
+ @Override
99
+ public JdbcOutputConnection connect(boolean autoCommit) throws SQLException
100
+ {
101
+ Connection c = driver.connect(url, properties);
102
+ try {
103
+ c.setAutoCommit(autoCommit);
104
+ JdbcOutputConnection con = new JdbcOutputConnection(c, schemaName);
105
+ c = null;
106
+ return con;
107
+ } finally {
108
+ if (c != null) {
109
+ c.close();
110
+ }
111
+ }
112
+ }
113
+ }
114
+
115
+ @Override
116
+ protected BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException
117
+ {
118
+ return new StandardBatchInsert(getConnector(task, true));
119
+ }
120
+ }
@@ -1,756 +1,755 @@
1
- package org.embulk.output.jdbc;
2
-
3
- import java.util.HashSet;
4
- import java.util.List;
5
- import java.util.Locale;
6
- import java.util.Properties;
7
- import java.util.Set;
8
- import java.util.concurrent.ExecutionException;
9
- import java.io.IOException;
10
- import java.nio.file.Paths;
11
- import java.sql.Types;
12
- import java.sql.ResultSet;
13
- import java.sql.DatabaseMetaData;
14
- import java.sql.SQLException;
15
-
16
- import org.slf4j.Logger;
17
-
18
- import com.google.common.base.Optional;
19
- import com.google.common.base.Throwables;
20
- import com.google.common.collect.ImmutableList;
21
-
22
- import org.embulk.config.CommitReport;
23
- import org.embulk.config.Config;
24
- import org.embulk.config.ConfigDefault;
25
- import org.embulk.config.ConfigDiff;
26
- import org.embulk.config.ConfigException;
27
- import org.embulk.config.ConfigSource;
28
- import org.embulk.config.Task;
29
- import org.embulk.config.TaskSource;
30
- import org.embulk.spi.Exec;
31
- import org.embulk.spi.Column;
32
- import org.embulk.spi.ColumnVisitor;
33
- import org.embulk.spi.OutputPlugin;
34
- import org.embulk.spi.PluginClassLoader;
35
- import org.embulk.spi.Schema;
36
- import org.embulk.spi.TransactionalPageOutput;
37
- import org.embulk.spi.Page;
38
- import org.embulk.spi.PageReader;
39
- import org.embulk.spi.time.Timestamp;
40
- import org.embulk.spi.time.TimestampFormatter;
41
- import org.embulk.output.jdbc.setter.ColumnSetter;
42
- import org.embulk.output.jdbc.setter.ColumnSetterFactory;
43
- import org.embulk.output.jdbc.RetryExecutor.IdempotentOperation;
44
-
45
- import static org.embulk.output.jdbc.RetryExecutor.retryExecutor;
46
-
47
- public abstract class AbstractJdbcOutputPlugin
48
- implements OutputPlugin
49
- {
50
- private final static Set<String> loadedJarGlobs = new HashSet<String>();
51
-
52
- private final Logger logger = Exec.getLogger(getClass());
53
-
54
- public interface PluginTask
55
- extends Task
56
- {
57
- @Config("options")
58
- @ConfigDefault("{}")
59
- public Properties getOptions();
60
-
61
- @Config("table")
62
- public String getTable();
63
-
64
- @Config("mode")
65
- public String getModeConfig();
66
-
67
- @Config("batch_size")
68
- @ConfigDefault("16777216")
69
- // TODO set minimum number
70
- public int getBatchSize();
71
-
72
- public void setMode(Mode mode);
73
- public Mode getMode();
74
-
75
- public JdbcSchema getLoadSchema();
76
- public void setLoadSchema(JdbcSchema schema);
77
-
78
- public Optional<String> getSwapTable();
79
- public void setSwapTable(Optional<String> name);
80
-
81
- public Optional<String> getMultipleLoadTablePrefix();
82
- public void setMultipleLoadTablePrefix(Optional<String> prefix);
83
- }
84
-
85
- protected void loadDriverJar(String glob)
86
- {
87
- synchronized (loadedJarGlobs) {
88
- if (!loadedJarGlobs.contains(glob)) {
89
- // TODO match glob
90
- PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
91
- loader.addPath(Paths.get(glob));
92
- loadedJarGlobs.add(glob);
93
- }
94
- }
95
- }
96
-
97
- // for subclasses to add @Config
98
- protected Class<? extends PluginTask> getTaskClass()
99
- {
100
- return PluginTask.class;
101
- }
102
-
103
- protected abstract JdbcOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation);
104
-
105
- protected abstract BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException;
106
-
107
- protected JdbcOutputConnection newConnection(PluginTask task, boolean retryableMetadataOperation,
108
- boolean autoCommit) throws SQLException
109
- {
110
- return getConnector(task, retryableMetadataOperation).connect(autoCommit);
111
- }
112
-
113
- public enum Mode {
114
- INSERT,
115
- INSERT_DIRECT,
116
- TRUNCATE_INSERT,
117
- MERGE,
118
- REPLACE,
119
- REPLACE_INPLACE;
120
- //REPLACE_PARTITIONING, // MySQL: partitioning, PostgreSQL: inheritance
121
-
122
- public boolean isDirectWrite()
123
- {
124
- return this == INSERT_DIRECT;
125
- }
126
-
127
- public boolean isInplace()
128
- {
129
- return this == INSERT_DIRECT || this == REPLACE_INPLACE || this == MERGE;
130
- }
131
-
132
- public boolean isMerge()
133
- {
134
- return this == MERGE;
135
- }
136
-
137
- public boolean usesMultipleLoadTables()
138
- {
139
- return !isInplace();
140
- }
141
-
142
- public boolean createAndSwapTable()
143
- {
144
- return this == REPLACE_INPLACE || this == REPLACE;
145
- }
146
- }
147
-
148
- public ConfigDiff transaction(ConfigSource config,
149
- Schema schema, int taskCount,
150
- OutputPlugin.Control control)
151
- {
152
- PluginTask task = config.loadConfig(getTaskClass());
153
-
154
- // TODO this is a temporary code. behavior will change in a future release.
155
- switch(task.getModeConfig()) {
156
- case "insert":
157
- task.setMode(Mode.INSERT_DIRECT);
158
- break;
159
- case "replace":
160
- task.setMode(Mode.REPLACE_INPLACE);
161
- break;
162
- case "merge":
163
- task.setMode(Mode.MERGE);
164
- break;
165
- default:
166
- throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are: insert, replace, merge", task.getModeConfig()));
167
- }
168
-
169
- //switch(task.getModeConfig()) {
170
- ////case "insert":
171
- //// task.setMode(Mode.INSERT);
172
- //// break;
173
- //case "insert_direct":
174
- // task.setMode(Mode.INSERT_DIRECT);
175
- // break;
176
- ////case "truncate_insert": // TODO
177
- //// task.setMode(Mode.TRUNCATE_INSERT);
178
- //// break;
179
- ////case "merge": // TODO
180
- //// task.setMode(Mode.MERGE);
181
- //// break;
182
- ////case "replace":
183
- //// task.setMode(Mode.REPLACE);
184
- //// break;
185
- //case "replace_inplace":
186
- // task.setMode(Mode.REPLACE_INPLACE);
187
- // break;
188
- //default:
189
- // new ConfigException(String.format("Unknown mode '%s'. Supported modes are: insert_direct, replace_inplace", task.getModeConfig()));
190
- //}
191
-
192
- task = begin(task, schema, taskCount);
193
- control.run(task.dump());
194
- return commit(task, schema, taskCount);
195
- }
196
-
197
- public ConfigDiff resume(TaskSource taskSource,
198
- Schema schema, int taskCount,
199
- OutputPlugin.Control control)
200
- {
201
- PluginTask task = taskSource.loadTask(getTaskClass());
202
-
203
- if (task.getMode().isInplace()) {
204
- throw new UnsupportedOperationException("inplace mode is not resumable. You need to delete partially-loaded records from the database and restart the entire transaction.");
205
- }
206
-
207
- task = begin(task, schema, taskCount);
208
- control.run(task.dump());
209
- return commit(task, schema, taskCount);
210
- }
211
-
212
- protected String getTransactionUniqueName()
213
- {
214
- // TODO use uuid?
215
- Timestamp t = Exec.session().getTransactionTime();
216
- return String.format("%016x%08x", t.getEpochSecond(), t.getNano());
217
- }
218
-
219
- private PluginTask begin(final PluginTask task,
220
- final Schema schema, int taskCount)
221
- {
222
- try {
223
- withRetry(new IdempotentSqlRunnable() { // no intermediate data if isDirectWrite == true
224
- public void run() throws SQLException
225
- {
226
- JdbcOutputConnection con = newConnection(task, true, false);
227
- try {
228
- doBegin(con, task, schema);
229
- } finally {
230
- con.close();
231
- }
232
- }
233
- });
234
- } catch (SQLException | InterruptedException ex) {
235
- throw new RuntimeException(ex);
236
- }
237
- return task;
238
- }
239
-
240
- private ConfigDiff commit(final PluginTask task,
241
- Schema schema, final int taskCount)
242
- {
243
- if (!task.getMode().isDirectWrite()) { // no intermediate data if isDirectWrite == true
244
- try {
245
- withRetry(new IdempotentSqlRunnable() {
246
- public void run() throws SQLException
247
- {
248
- JdbcOutputConnection con = newConnection(task, false, false);
249
- try {
250
- doCommit(con, task, taskCount);
251
- } finally {
252
- con.close();
253
- }
254
- }
255
- });
256
- } catch (SQLException | InterruptedException ex) {
257
- throw new RuntimeException(ex);
258
- }
259
- }
260
- return Exec.newConfigDiff();
261
- }
262
-
263
- public void cleanup(TaskSource taskSource,
264
- Schema schema, final int taskCount,
265
- final List<CommitReport> successCommitReports)
266
- {
267
- final PluginTask task = taskSource.loadTask(getTaskClass());
268
-
269
- if (!task.getMode().isDirectWrite()) { // no intermediate data if isDirectWrite == true
270
- try {
271
- withRetry(new IdempotentSqlRunnable() {
272
- public void run() throws SQLException
273
- {
274
- JdbcOutputConnection con = newConnection(task, true, true);
275
- try {
276
- doCleanup(con, task, taskCount, successCommitReports);
277
- } finally {
278
- con.close();
279
- }
280
- }
281
- });
282
- } catch (SQLException | InterruptedException ex) {
283
- throw new RuntimeException(ex);
284
- }
285
- }
286
- }
287
-
288
- protected void doBegin(JdbcOutputConnection con,
289
- PluginTask task, Schema schema) throws SQLException
290
- {
291
- Mode mode = task.getMode();
292
-
293
- JdbcSchema targetTableSchema;
294
- if (mode.createAndSwapTable()) {
295
- // DROP TABLE IF EXISTS xyz__0000000054d92dee1e452158_bulk_load_temp
296
- // CREATE TABLE IF NOT EXISTS xyz__0000000054d92dee1e452158_bulk_load_temp
297
- // swapTableName = "xyz__0000000054d92dee1e452158_bulk_load_temp"
298
- String swapTableName = generateSwapTableName(task);
299
- con.dropTableIfExists(swapTableName);
300
- con.createTableIfNotExists(swapTableName, newJdbcSchemaForNewTable(schema));
301
- targetTableSchema = newJdbcSchemaFromExistentTable(con, swapTableName);
302
- task.setSwapTable(Optional.of(swapTableName));
303
- } else {
304
- // CREATE TABLE IF NOT EXISTS xyz
305
- con.createTableIfNotExists(task.getTable(), newJdbcSchemaForNewTable(schema));
306
- targetTableSchema = newJdbcSchemaFromExistentTable(con, task.getTable());
307
- task.setSwapTable(Optional.<String>absent());
308
- }
309
-
310
- if (mode.usesMultipleLoadTables()) {
311
- // multipleLoadTablePrefix = "xyz__0000000054d92dee1e452158_"
312
- // workers run:
313
- // CREATE TABLE xyz__0000000054d92dee1e452158_%d
314
- String multipleLoadTablePrefix = task.getTable() + "_" + getTransactionUniqueName();
315
- task.setMultipleLoadTablePrefix(Optional.of(multipleLoadTablePrefix));
316
- } else {
317
- task.setMultipleLoadTablePrefix(Optional.<String>absent());
318
- }
319
-
320
- task.setLoadSchema(matchSchemaByColumnNames(schema, targetTableSchema));
321
- }
322
-
323
- protected String generateSwapTableName(PluginTask task) throws SQLException
324
- {
325
- return task.getTable() + "_" + getTransactionUniqueName() + "_bulk_load_temp";
326
- }
327
-
328
- protected void doCommit(JdbcOutputConnection con, PluginTask task, int taskCount)
329
- throws SQLException
330
- {
331
- switch (task.getMode()) {
332
- case INSERT:
333
- // aggregate insert into target
334
- //con.gatherInsertTables();
335
- throw new UnsupportedOperationException("not implemented yet"); // TODO
336
- case INSERT_DIRECT:
337
- // already done
338
- break;
339
- case TRUNCATE_INSERT:
340
- // truncate & aggregate insert into target
341
- throw new UnsupportedOperationException("not implemented yet");
342
- //break;
343
- case MERGE:
344
- // aggregate merge into target
345
- throw new UnsupportedOperationException("not implemented yet");
346
- //break;
347
- case REPLACE:
348
- if (taskCount == 1) {
349
- // swap table
350
- con.replaceTable(task.getSwapTable().get(), task.getLoadSchema(), task.getTable());
351
- } else {
352
- // aggregate insert into swap table & swap table
353
- throw new UnsupportedOperationException("not implemented yet");
354
- }
355
- break;
356
- case REPLACE_INPLACE:
357
- // swap table
358
- con.replaceTable(task.getSwapTable().get(), task.getLoadSchema(), task.getTable());
359
- break;
360
- }
361
- }
362
-
363
- protected void doCleanup(JdbcOutputConnection con, PluginTask task, int taskCount,
364
- List<CommitReport> successCommitReports)
365
- throws SQLException
366
- {
367
- if (task.getSwapTable().isPresent()) {
368
- con.dropTableIfExists(task.getSwapTable().get());
369
- }
370
- if (task.getMultipleLoadTablePrefix().isPresent()) {
371
- for (int i=0; i < taskCount; i++) {
372
- con.dropTableIfExists(formatMultipleLoadTableName(task, i));
373
- }
374
- }
375
- }
376
-
377
- static String formatMultipleLoadTableName(PluginTask task, int taskIndex)
378
- {
379
- return task.getMultipleLoadTablePrefix().get() + String.format("%04x", taskIndex);
380
- }
381
-
382
- protected JdbcSchema newJdbcSchemaForNewTable(Schema schema)
383
- {
384
- final ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
385
- for (Column c : schema.getColumns()) {
386
- final String columnName = c.getName();
387
- c.visit(new ColumnVisitor() {
388
- public void booleanColumn(Column column)
389
- {
390
- columns.add(new JdbcColumn(
391
- columnName, "BOOLEAN",
392
- Types.BOOLEAN, 1, 0, false));
393
- }
394
-
395
- public void longColumn(Column column)
396
- {
397
- columns.add(new JdbcColumn(
398
- columnName, "BIGINT",
399
- Types.BIGINT, 22, 0, false));
400
- }
401
-
402
- public void doubleColumn(Column column)
403
- {
404
- columns.add(new JdbcColumn(
405
- columnName, "DOUBLE PRECISION",
406
- Types.FLOAT, 24, 0, false));
407
- }
408
-
409
- public void stringColumn(Column column)
410
- {
411
- columns.add(new JdbcColumn(
412
- columnName, "CLOB",
413
- Types.CLOB, 4000, 0, false)); // TODO size type param
414
- }
415
-
416
- public void timestampColumn(Column column)
417
- {
418
- columns.add(new JdbcColumn(
419
- columnName, "TIMESTAMP",
420
- Types.TIMESTAMP, 26, 0, false)); // size type param is from postgresql.
421
- }
422
- });
423
- }
424
- return new JdbcSchema(columns.build());
425
- }
426
-
427
- public JdbcSchema newJdbcSchemaFromExistentTable(JdbcOutputConnection connection,
428
- String tableName) throws SQLException
429
- {
430
- DatabaseMetaData dbm = connection.getMetaData();
431
- String escape = dbm.getSearchStringEscape();
432
- String schemaNamePattern = JdbcUtils.escapeSearchString(connection.getSchemaName(), escape);
433
-
434
- ResultSet rs = dbm.getPrimaryKeys(null, schemaNamePattern, tableName);
435
- ImmutableList.Builder<String> primaryKeysBuilder = ImmutableList.builder();
436
- try {
437
- while(rs.next()) {
438
- primaryKeysBuilder.add(rs.getString("COLUMN_NAME"));
439
- }
440
- } finally {
441
- rs.close();
442
- }
443
- ImmutableList<String> primaryKeys = primaryKeysBuilder.build();
444
-
445
- String tableNamePattern = JdbcUtils.escapeSearchString(tableName, escape);
446
- ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
447
- rs = dbm.getColumns(null, schemaNamePattern, tableNamePattern, null);
448
- try {
449
- while(rs.next()) {
450
- String columnName = rs.getString("COLUMN_NAME");
451
- String typeName = rs.getString("TYPE_NAME");
452
- boolean isPrimaryKey = primaryKeys.contains(columnName);
453
- typeName = typeName.toUpperCase(Locale.ENGLISH);
454
- int sqlType = rs.getInt("DATA_TYPE");
455
- int colSize = rs.getInt("COLUMN_SIZE");
456
- int decDigit = rs.getInt("DECIMAL_DIGITS");
457
- if (rs.wasNull()) {
458
- decDigit = -1;
459
- }
460
- //rs.getString("IS_NULLABLE").equals("NO") // "YES" or "" // TODO
461
- //rs.getString("COLUMN_DEF") // or null // TODO
462
- columns.add(new JdbcColumn(
463
- columnName, typeName,
464
- sqlType, colSize, decDigit, isPrimaryKey));
465
- }
466
- } finally {
467
- rs.close();
468
- }
469
- return new JdbcSchema(columns.build());
470
- }
471
-
472
- private JdbcSchema matchSchemaByColumnNames(Schema inputSchema, JdbcSchema targetTableSchema)
473
- {
474
- ImmutableList.Builder<JdbcColumn> jdbcColumns = ImmutableList.builder();
475
-
476
- outer : for (Column column : inputSchema.getColumns()) {
477
- for (JdbcColumn jdbcColumn : targetTableSchema.getColumns()) {
478
- if (jdbcColumn.getName().equals(column.getName())) {
479
- jdbcColumns.add(jdbcColumn);
480
- continue outer;
481
- }
482
- }
483
-
484
- jdbcColumns.add(JdbcColumn.skipColumn());
485
- }
486
-
487
- return new JdbcSchema(jdbcColumns.build());
488
- }
489
-
490
- public TransactionalPageOutput open(TaskSource taskSource, Schema schema, final int taskIndex)
491
- {
492
- final PluginTask task = taskSource.loadTask(getTaskClass());
493
- final Mode mode = task.getMode();
494
-
495
- BatchInsert batch;
496
- try {
497
- batch = newBatchInsert(task);
498
- } catch (IOException | SQLException ex) {
499
- throw new RuntimeException(ex);
500
- }
501
- try {
502
- PageReader reader = new PageReader(schema);
503
- ColumnSetterFactory factory = newColumnSetterFactory(batch, reader, null); // TODO TimestampFormatter
504
-
505
- JdbcSchema loadSchema = task.getLoadSchema();
506
-
507
- ImmutableList.Builder<JdbcColumn> insertColumns = ImmutableList.builder();
508
- ImmutableList.Builder<ColumnSetter> columnSetters = ImmutableList.builder();
509
- for (JdbcColumn c : loadSchema.getColumns()) {
510
- if (c.isSkipColumn()) {
511
- columnSetters.add(factory.newSkipColumnSetter());
512
- } else {
513
- columnSetters.add(factory.newColumnSetter(c));
514
- insertColumns.add(c);
515
- }
516
- }
517
- final JdbcSchema insertSchema = new JdbcSchema(insertColumns.build());
518
-
519
- final BatchInsert b = batch;
520
- withRetry(new IdempotentSqlRunnable() {
521
- public void run() throws SQLException
522
- {
523
- String loadTable;
524
- boolean createTable;
525
- if (mode.usesMultipleLoadTables()) {
526
- // insert, truncate_insert, merge, replace
527
- loadTable = formatMultipleLoadTableName(task, taskIndex);
528
- JdbcOutputConnection con = newConnection(task, true, true);
529
- try {
530
- con.createTableIfNotExists(loadTable, insertSchema);
531
- } finally {
532
- con.close();
533
- }
534
-
535
- } else if (!mode.usesMultipleLoadTables() && mode.createAndSwapTable()) {
536
- // replace_inplace
537
- loadTable = task.getSwapTable().get();
538
-
539
- } else {
540
- // insert_direct
541
- loadTable = task.getTable();
542
- }
543
- b.prepare(loadTable, insertSchema);
544
- }
545
- });
546
-
547
- PluginPageOutput output = newPluginPageOutput(reader, batch, columnSetters.build(),
548
- task.getBatchSize());
549
- batch = null;
550
- return output;
551
-
552
- } catch (SQLException | InterruptedException ex) {
553
- throw new RuntimeException(ex);
554
-
555
- } finally {
556
- if (batch != null) {
557
- try {
558
- batch.close();
559
- } catch (IOException | SQLException ex) {
560
- throw new RuntimeException(ex);
561
- }
562
- }
563
- }
564
- }
565
-
566
- protected ColumnSetterFactory newColumnSetterFactory(BatchInsert batch, PageReader pageReader,
567
- TimestampFormatter timestampFormatter)
568
- {
569
- return new ColumnSetterFactory(batch, pageReader, timestampFormatter);
570
- }
571
-
572
- protected PluginPageOutput newPluginPageOutput(PageReader reader,
573
- BatchInsert batch, List<ColumnSetter> columnSetters,
574
- int batchSize)
575
- {
576
- return new PluginPageOutput(reader, batch, columnSetters, batchSize);
577
- }
578
-
579
- public static class PluginPageOutput
580
- implements TransactionalPageOutput
581
- {
582
- protected final List<Column> columns;
583
- protected final List<ColumnSetter> columnSetters;
584
- private final PageReader pageReader;
585
- private final BatchInsert batch;
586
- private final int batchSize;
587
- private final int foraceBatchFlushSize;
588
-
589
- public PluginPageOutput(PageReader pageReader,
590
- BatchInsert batch, List<ColumnSetter> columnSetters,
591
- int batchSize)
592
- {
593
- this.pageReader = pageReader;
594
- this.batch = batch;
595
- this.columns = pageReader.getSchema().getColumns();
596
- this.columnSetters = columnSetters;
597
- this.batchSize = batchSize;
598
- this.foraceBatchFlushSize = batchSize * 2;
599
- }
600
-
601
- @Override
602
- public void add(Page page)
603
- {
604
- try {
605
- pageReader.setPage(page);
606
- while (pageReader.nextRecord()) {
607
- if (batch.getBatchWeight() > foraceBatchFlushSize) {
608
- batch.flush();
609
- }
610
- handleColumnsSetters();
611
- batch.add();
612
- }
613
- if (batch.getBatchWeight() > batchSize) {
614
- batch.flush();
615
- }
616
- } catch (IOException | SQLException ex) {
617
- throw new RuntimeException(ex);
618
- }
619
- }
620
-
621
- @Override
622
- public void finish()
623
- {
624
- try {
625
- batch.finish();
626
- } catch (IOException | SQLException ex) {
627
- throw new RuntimeException(ex);
628
- }
629
- }
630
-
631
- @Override
632
- public void close()
633
- {
634
- try {
635
- batch.close();
636
- } catch (IOException | SQLException ex) {
637
- throw new RuntimeException(ex);
638
- }
639
- }
640
-
641
- @Override
642
- public void abort()
643
- {
644
- }
645
-
646
- @Override
647
- public CommitReport commit()
648
- {
649
- return Exec.newCommitReport();
650
- }
651
-
652
- protected void handleColumnsSetters()
653
- {
654
- int size = columnSetters.size();
655
- for (int i=0; i < size; i++) {
656
- columns.get(i).visit(columnSetters.get(i));
657
- }
658
- }
659
-
660
- }
661
-
662
- public static interface IdempotentSqlRunnable
663
- {
664
- public void run() throws SQLException;
665
- }
666
-
667
- protected void withRetry(IdempotentSqlRunnable op)
668
- throws SQLException, InterruptedException
669
- {
670
- withRetry(op, "Operation failed");
671
- }
672
-
673
- protected void withRetry(final IdempotentSqlRunnable op, final String errorMessage)
674
- throws SQLException, InterruptedException
675
- {
676
- try {
677
- retryExecutor()
678
- .setRetryLimit(12)
679
- .setInitialRetryWait(1000)
680
- .setMaxRetryWait(30 * 60 * 1000)
681
- .runInterruptible(new IdempotentOperation<Void>() {
682
- public Void call() throws Exception
683
- {
684
- op.run();
685
- return null;
686
- }
687
-
688
- public void onRetry(Throwable exception, int retryCount, int retryLimit, int retryWait)
689
- {
690
- if (exception instanceof SQLException) {
691
- SQLException ex = (SQLException) exception;
692
- String sqlState = ex.getSQLState();
693
- int errorCode = ex.getErrorCode();
694
- logger.warn("{} ({}:{}), retrying {}/{} after {} seconds. Message: {}",
695
- errorMessage, errorCode, sqlState, retryCount, retryLimit, retryWait/1000,
696
- buildExceptionMessage(exception));
697
- } else {
698
- logger.warn("{}, retrying {}/{} after {} seconds. Message: {}",
699
- errorMessage, retryCount, retryLimit, retryWait/1000,
700
- buildExceptionMessage(exception));
701
- }
702
- if (retryCount % 3 == 0) {
703
- logger.info("Error details:", exception);
704
- }
705
- }
706
-
707
- public void onGiveup(Throwable firstException, Throwable lastException)
708
- {
709
- if (firstException instanceof SQLException) {
710
- SQLException ex = (SQLException) firstException;
711
- String sqlState = ex.getSQLState();
712
- int errorCode = ex.getErrorCode();
713
- logger.error("{} ({}:{})", errorMessage, errorCode, sqlState);
714
- }
715
- }
716
-
717
- public boolean isRetryableException(Throwable exception)
718
- {
719
- //if (exception instanceof SQLException) {
720
- // SQLException ex = (SQLException) exception;
721
- // String sqlState = ex.getSQLState();
722
- // int errorCode = ex.getErrorCode();
723
- // return isRetryableSQLException(ex);
724
- //}
725
- return false; // TODO
726
- }
727
- });
728
-
729
- } catch (ExecutionException ex) {
730
- Throwable cause = ex.getCause();
731
- Throwables.propagateIfInstanceOf(cause, SQLException.class);
732
- throw Throwables.propagate(cause);
733
- }
734
- }
735
-
736
- private String buildExceptionMessage(Throwable ex) {
737
- StringBuilder sb = new StringBuilder();
738
- sb.append(ex.getMessage());
739
- if (ex.getCause() != null) {
740
- buildExceptionMessageCont(sb, ex.getCause(), ex.getMessage());
741
- }
742
- return sb.toString();
743
- }
744
-
745
- private void buildExceptionMessageCont(StringBuilder sb, Throwable ex, String lastMessage) {
746
- if (!lastMessage.equals(ex.getMessage())) {
747
- // suppress same messages
748
- sb.append(" < ");
749
- sb.append(ex.getMessage());
750
- }
751
- if (ex.getCause() == null) {
752
- return;
753
- }
754
- buildExceptionMessageCont(sb, ex.getCause(), ex.getMessage());
755
- }
756
- }
1
+ package org.embulk.output.jdbc;
2
+
3
+ import java.util.HashSet;
4
+ import java.util.List;
5
+ import java.util.Locale;
6
+ import java.util.Properties;
7
+ import java.util.Set;
8
+ import java.util.concurrent.ExecutionException;
9
+ import java.io.IOException;
10
+ import java.nio.file.Paths;
11
+ import java.sql.Types;
12
+ import java.sql.ResultSet;
13
+ import java.sql.DatabaseMetaData;
14
+ import java.sql.SQLException;
15
+
16
+ import org.slf4j.Logger;
17
+
18
+ import com.google.common.base.Optional;
19
+ import com.google.common.base.Throwables;
20
+ import com.google.common.collect.ImmutableList;
21
+
22
+ import org.embulk.config.CommitReport;
23
+ import org.embulk.config.Config;
24
+ import org.embulk.config.ConfigDefault;
25
+ import org.embulk.config.ConfigDiff;
26
+ import org.embulk.config.ConfigException;
27
+ import org.embulk.config.ConfigSource;
28
+ import org.embulk.config.Task;
29
+ import org.embulk.config.TaskSource;
30
+ import org.embulk.spi.Exec;
31
+ import org.embulk.spi.Column;
32
+ import org.embulk.spi.ColumnVisitor;
33
+ import org.embulk.spi.OutputPlugin;
34
+ import org.embulk.spi.PluginClassLoader;
35
+ import org.embulk.spi.Schema;
36
+ import org.embulk.spi.TransactionalPageOutput;
37
+ import org.embulk.spi.Page;
38
+ import org.embulk.spi.PageReader;
39
+ import org.embulk.spi.time.Timestamp;
40
+ import org.embulk.spi.time.TimestampFormatter;
41
+ import org.embulk.output.jdbc.setter.ColumnSetter;
42
+ import org.embulk.output.jdbc.setter.ColumnSetterFactory;
43
+ import org.embulk.output.jdbc.RetryExecutor.IdempotentOperation;
44
+
45
+ import static org.embulk.output.jdbc.RetryExecutor.retryExecutor;
46
+
47
+ public abstract class AbstractJdbcOutputPlugin
48
+ implements OutputPlugin
49
+ {
50
+ private final static Set<String> loadedJarGlobs = new HashSet<String>();
51
+
52
+ private final Logger logger = Exec.getLogger(getClass());
53
+
54
+ public interface PluginTask
55
+ extends Task
56
+ {
57
+ @Config("options")
58
+ @ConfigDefault("{}")
59
+ public Properties getOptions();
60
+
61
+ @Config("table")
62
+ public String getTable();
63
+
64
+ @Config("mode")
65
+ public String getModeConfig();
66
+
67
+ @Config("batch_size")
68
+ @ConfigDefault("16777216")
69
+ // TODO set minimum number
70
+ public int getBatchSize();
71
+
72
+ public void setMode(Mode mode);
73
+ public Mode getMode();
74
+
75
+ public JdbcSchema getLoadSchema();
76
+ public void setLoadSchema(JdbcSchema schema);
77
+
78
+ public Optional<String> getSwapTable();
79
+ public void setSwapTable(Optional<String> name);
80
+
81
+ public Optional<String> getMultipleLoadTablePrefix();
82
+ public void setMultipleLoadTablePrefix(Optional<String> prefix);
83
+ }
84
+
85
+ protected void loadDriverJar(String glob)
86
+ {
87
+ synchronized (loadedJarGlobs) {
88
+ if (!loadedJarGlobs.contains(glob)) {
89
+ // TODO match glob
90
+ PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
91
+ loader.addPath(Paths.get(glob));
92
+ loadedJarGlobs.add(glob);
93
+ }
94
+ }
95
+ }
96
+
97
+ // for subclasses to add @Config
98
+ protected Class<? extends PluginTask> getTaskClass()
99
+ {
100
+ return PluginTask.class;
101
+ }
102
+
103
+ protected abstract JdbcOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation);
104
+
105
+ protected abstract BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException;
106
+
107
+ protected JdbcOutputConnection newConnection(PluginTask task, boolean retryableMetadataOperation,
108
+ boolean autoCommit) throws SQLException
109
+ {
110
+ return getConnector(task, retryableMetadataOperation).connect(autoCommit);
111
+ }
112
+
113
+ public enum Mode {
114
+ INSERT,
115
+ INSERT_DIRECT,
116
+ TRUNCATE_INSERT,
117
+ MERGE,
118
+ REPLACE,
119
+ REPLACE_INPLACE;
120
+ //REPLACE_PARTITIONING, // MySQL: partitioning, PostgreSQL: inheritance
121
+
122
+ public boolean isDirectWrite()
123
+ {
124
+ return this == INSERT_DIRECT;
125
+ }
126
+
127
+ public boolean isInplace()
128
+ {
129
+ return this == INSERT_DIRECT || this == REPLACE_INPLACE || this == MERGE;
130
+ }
131
+
132
+ public boolean isMerge()
133
+ {
134
+ return this == MERGE;
135
+ }
136
+
137
+ public boolean usesMultipleLoadTables()
138
+ {
139
+ return !isInplace();
140
+ }
141
+
142
+ public boolean createAndSwapTable()
143
+ {
144
+ return this == REPLACE_INPLACE || this == REPLACE;
145
+ }
146
+ }
147
+
148
+ public ConfigDiff transaction(ConfigSource config,
149
+ Schema schema, int taskCount,
150
+ OutputPlugin.Control control)
151
+ {
152
+ PluginTask task = config.loadConfig(getTaskClass());
153
+
154
+ // TODO this is a temporary code. behavior will change in a future release.
155
+ switch(task.getModeConfig()) {
156
+ case "insert":
157
+ task.setMode(Mode.INSERT_DIRECT);
158
+ break;
159
+ case "replace":
160
+ task.setMode(Mode.REPLACE_INPLACE);
161
+ break;
162
+ case "merge":
163
+ task.setMode(Mode.MERGE);
164
+ break;
165
+ default:
166
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are: insert, replace, merge", task.getModeConfig()));
167
+ }
168
+
169
+ //switch(task.getModeConfig()) {
170
+ ////case "insert":
171
+ //// task.setMode(Mode.INSERT);
172
+ //// break;
173
+ //case "insert_direct":
174
+ // task.setMode(Mode.INSERT_DIRECT);
175
+ // break;
176
+ ////case "truncate_insert": // TODO
177
+ //// task.setMode(Mode.TRUNCATE_INSERT);
178
+ //// break;
179
+ ////case "merge": // TODO
180
+ //// task.setMode(Mode.MERGE);
181
+ //// break;
182
+ ////case "replace":
183
+ //// task.setMode(Mode.REPLACE);
184
+ //// break;
185
+ //case "replace_inplace":
186
+ // task.setMode(Mode.REPLACE_INPLACE);
187
+ // break;
188
+ //default:
189
+ // new ConfigException(String.format("Unknown mode '%s'. Supported modes are: insert_direct, replace_inplace", task.getModeConfig()));
190
+ //}
191
+
192
+ task = begin(task, schema, taskCount);
193
+ control.run(task.dump());
194
+ return commit(task, schema, taskCount);
195
+ }
196
+
197
+ public ConfigDiff resume(TaskSource taskSource,
198
+ Schema schema, int taskCount,
199
+ OutputPlugin.Control control)
200
+ {
201
+ PluginTask task = taskSource.loadTask(getTaskClass());
202
+
203
+ if (task.getMode().isInplace()) {
204
+ throw new UnsupportedOperationException("inplace mode is not resumable. You need to delete partially-loaded records from the database and restart the entire transaction.");
205
+ }
206
+
207
+ task = begin(task, schema, taskCount);
208
+ control.run(task.dump());
209
+ return commit(task, schema, taskCount);
210
+ }
211
+
212
+ protected String getTransactionUniqueName()
213
+ {
214
+ // TODO use uuid?
215
+ Timestamp t = Exec.session().getTransactionTime();
216
+ return String.format("%016x%08x", t.getEpochSecond(), t.getNano());
217
+ }
218
+
219
+ private PluginTask begin(final PluginTask task,
220
+ final Schema schema, int taskCount)
221
+ {
222
+ try {
223
+ withRetry(new IdempotentSqlRunnable() { // no intermediate data if isDirectWrite == true
224
+ public void run() throws SQLException
225
+ {
226
+ JdbcOutputConnection con = newConnection(task, true, false);
227
+ try {
228
+ doBegin(con, task, schema);
229
+ } finally {
230
+ con.close();
231
+ }
232
+ }
233
+ });
234
+ } catch (SQLException | InterruptedException ex) {
235
+ throw new RuntimeException(ex);
236
+ }
237
+ return task;
238
+ }
239
+
240
+ private ConfigDiff commit(final PluginTask task,
241
+ Schema schema, final int taskCount)
242
+ {
243
+ if (!task.getMode().isDirectWrite()) { // no intermediate data if isDirectWrite == true
244
+ try {
245
+ withRetry(new IdempotentSqlRunnable() {
246
+ public void run() throws SQLException
247
+ {
248
+ JdbcOutputConnection con = newConnection(task, false, false);
249
+ try {
250
+ doCommit(con, task, taskCount);
251
+ } finally {
252
+ con.close();
253
+ }
254
+ }
255
+ });
256
+ } catch (SQLException | InterruptedException ex) {
257
+ throw new RuntimeException(ex);
258
+ }
259
+ }
260
+ return Exec.newConfigDiff();
261
+ }
262
+
263
+ public void cleanup(TaskSource taskSource,
264
+ Schema schema, final int taskCount,
265
+ final List<CommitReport> successCommitReports)
266
+ {
267
+ final PluginTask task = taskSource.loadTask(getTaskClass());
268
+
269
+ if (!task.getMode().isDirectWrite()) { // no intermediate data if isDirectWrite == true
270
+ try {
271
+ withRetry(new IdempotentSqlRunnable() {
272
+ public void run() throws SQLException
273
+ {
274
+ JdbcOutputConnection con = newConnection(task, true, true);
275
+ try {
276
+ doCleanup(con, task, taskCount, successCommitReports);
277
+ } finally {
278
+ con.close();
279
+ }
280
+ }
281
+ });
282
+ } catch (SQLException | InterruptedException ex) {
283
+ throw new RuntimeException(ex);
284
+ }
285
+ }
286
+ }
287
+
288
+ protected void doBegin(JdbcOutputConnection con,
289
+ PluginTask task, Schema schema) throws SQLException
290
+ {
291
+ Mode mode = task.getMode();
292
+
293
+ JdbcSchema targetTableSchema;
294
+ if (mode.createAndSwapTable()) {
295
+ // DROP TABLE IF EXISTS xyz__0000000054d92dee1e452158_bulk_load_temp
296
+ // CREATE TABLE IF NOT EXISTS xyz__0000000054d92dee1e452158_bulk_load_temp
297
+ // swapTableName = "xyz__0000000054d92dee1e452158_bulk_load_temp"
298
+ String swapTableName = generateSwapTableName(task);
299
+ con.dropTableIfExists(swapTableName);
300
+ con.createTableIfNotExists(swapTableName, newJdbcSchemaForNewTable(schema));
301
+ targetTableSchema = newJdbcSchemaFromExistentTable(con, swapTableName);
302
+ task.setSwapTable(Optional.of(swapTableName));
303
+ } else {
304
+ // CREATE TABLE IF NOT EXISTS xyz
305
+ con.createTableIfNotExists(task.getTable(), newJdbcSchemaForNewTable(schema));
306
+ targetTableSchema = newJdbcSchemaFromExistentTable(con, task.getTable());
307
+ task.setSwapTable(Optional.<String>absent());
308
+ }
309
+
310
+ if (mode.usesMultipleLoadTables()) {
311
+ // multipleLoadTablePrefix = "xyz__0000000054d92dee1e452158_"
312
+ // workers run:
313
+ // CREATE TABLE xyz__0000000054d92dee1e452158_%d
314
+ String multipleLoadTablePrefix = task.getTable() + "_" + getTransactionUniqueName();
315
+ task.setMultipleLoadTablePrefix(Optional.of(multipleLoadTablePrefix));
316
+ } else {
317
+ task.setMultipleLoadTablePrefix(Optional.<String>absent());
318
+ }
319
+
320
+ task.setLoadSchema(matchSchemaByColumnNames(schema, targetTableSchema));
321
+ }
322
+
323
+ protected String generateSwapTableName(PluginTask task) throws SQLException
324
+ {
325
+ return task.getTable() + "_" + getTransactionUniqueName() + "_bulk_load_temp";
326
+ }
327
+
328
+ protected void doCommit(JdbcOutputConnection con, PluginTask task, int taskCount)
329
+ throws SQLException
330
+ {
331
+ switch (task.getMode()) {
332
+ case INSERT:
333
+ // aggregate insert into target
334
+ //con.gatherInsertTables();
335
+ throw new UnsupportedOperationException("not implemented yet"); // TODO
336
+ case INSERT_DIRECT:
337
+ // already done
338
+ break;
339
+ case TRUNCATE_INSERT:
340
+ // truncate & aggregate insert into target
341
+ throw new UnsupportedOperationException("not implemented yet");
342
+ //break;
343
+ case MERGE:
344
+ // aggregate merge into target
345
+ throw new UnsupportedOperationException("not implemented yet");
346
+ //break;
347
+ case REPLACE:
348
+ if (taskCount == 1) {
349
+ // swap table
350
+ con.replaceTable(task.getSwapTable().get(), task.getLoadSchema(), task.getTable());
351
+ } else {
352
+ // aggregate insert into swap table & swap table
353
+ throw new UnsupportedOperationException("not implemented yet");
354
+ }
355
+ break;
356
+ case REPLACE_INPLACE:
357
+ // swap table
358
+ con.replaceTable(task.getSwapTable().get(), task.getLoadSchema(), task.getTable());
359
+ break;
360
+ }
361
+ }
362
+
363
+ protected void doCleanup(JdbcOutputConnection con, PluginTask task, int taskCount,
364
+ List<CommitReport> successCommitReports)
365
+ throws SQLException
366
+ {
367
+ if (task.getSwapTable().isPresent()) {
368
+ con.dropTableIfExists(task.getSwapTable().get());
369
+ }
370
+ if (task.getMultipleLoadTablePrefix().isPresent()) {
371
+ for (int i=0; i < taskCount; i++) {
372
+ con.dropTableIfExists(formatMultipleLoadTableName(task, i));
373
+ }
374
+ }
375
+ }
376
+
377
+ static String formatMultipleLoadTableName(PluginTask task, int taskIndex)
378
+ {
379
+ return task.getMultipleLoadTablePrefix().get() + String.format("%04x", taskIndex);
380
+ }
381
+
382
+ protected JdbcSchema newJdbcSchemaForNewTable(Schema schema)
383
+ {
384
+ final ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
385
+ for (Column c : schema.getColumns()) {
386
+ final String columnName = c.getName();
387
+ c.visit(new ColumnVisitor() {
388
+ public void booleanColumn(Column column)
389
+ {
390
+ columns.add(new JdbcColumn(
391
+ columnName, "BOOLEAN",
392
+ Types.BOOLEAN, 1, 0, false));
393
+ }
394
+
395
+ public void longColumn(Column column)
396
+ {
397
+ columns.add(new JdbcColumn(
398
+ columnName, "BIGINT",
399
+ Types.BIGINT, 22, 0, false));
400
+ }
401
+
402
+ public void doubleColumn(Column column)
403
+ {
404
+ columns.add(new JdbcColumn(
405
+ columnName, "DOUBLE PRECISION",
406
+ Types.FLOAT, 24, 0, false));
407
+ }
408
+
409
+ public void stringColumn(Column column)
410
+ {
411
+ columns.add(new JdbcColumn(
412
+ columnName, "CLOB",
413
+ Types.CLOB, 4000, 0, false)); // TODO size type param
414
+ }
415
+
416
+ public void timestampColumn(Column column)
417
+ {
418
+ columns.add(new JdbcColumn(
419
+ columnName, "TIMESTAMP",
420
+ Types.TIMESTAMP, 26, 0, false)); // size type param is from postgresql.
421
+ }
422
+ });
423
+ }
424
+ return new JdbcSchema(columns.build());
425
+ }
426
+
427
+ public JdbcSchema newJdbcSchemaFromExistentTable(JdbcOutputConnection connection,
428
+ String tableName) throws SQLException
429
+ {
430
+ DatabaseMetaData dbm = connection.getMetaData();
431
+ String escape = dbm.getSearchStringEscape();
432
+ String schemaNamePattern = JdbcUtils.escapeSearchString(connection.getSchemaName(), escape);
433
+
434
+ ResultSet rs = dbm.getPrimaryKeys(null, schemaNamePattern, tableName);
435
+ ImmutableList.Builder<String> primaryKeysBuilder = ImmutableList.builder();
436
+ try {
437
+ while(rs.next()) {
438
+ primaryKeysBuilder.add(rs.getString("COLUMN_NAME"));
439
+ }
440
+ } finally {
441
+ rs.close();
442
+ }
443
+ ImmutableList<String> primaryKeys = primaryKeysBuilder.build();
444
+
445
+ String tableNamePattern = JdbcUtils.escapeSearchString(tableName, escape);
446
+ ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
447
+ rs = dbm.getColumns(null, schemaNamePattern, tableNamePattern, null);
448
+ try {
449
+ while(rs.next()) {
450
+ String columnName = rs.getString("COLUMN_NAME");
451
+ String typeName = rs.getString("TYPE_NAME");
452
+ boolean isPrimaryKey = primaryKeys.contains(columnName);
453
+ typeName = typeName.toUpperCase(Locale.ENGLISH);
454
+ int sqlType = rs.getInt("DATA_TYPE");
455
+ int colSize = rs.getInt("COLUMN_SIZE");
456
+ int decDigit = rs.getInt("DECIMAL_DIGITS");
457
+ if (rs.wasNull()) {
458
+ decDigit = -1;
459
+ }
460
+ //rs.getString("IS_NULLABLE").equals("NO") // "YES" or "" // TODO
461
+ //rs.getString("COLUMN_DEF") // or null // TODO
462
+ columns.add(new JdbcColumn(
463
+ columnName, typeName,
464
+ sqlType, colSize, decDigit, isPrimaryKey));
465
+ }
466
+ } finally {
467
+ rs.close();
468
+ }
469
+ return new JdbcSchema(columns.build());
470
+ }
471
+
472
+ private JdbcSchema matchSchemaByColumnNames(Schema inputSchema, JdbcSchema targetTableSchema)
473
+ {
474
+ ImmutableList.Builder<JdbcColumn> jdbcColumns = ImmutableList.builder();
475
+
476
+ outer : for (Column column : inputSchema.getColumns()) {
477
+ for (JdbcColumn jdbcColumn : targetTableSchema.getColumns()) {
478
+ if (jdbcColumn.getName().equals(column.getName())) {
479
+ jdbcColumns.add(jdbcColumn);
480
+ continue outer;
481
+ }
482
+ }
483
+
484
+ jdbcColumns.add(JdbcColumn.skipColumn());
485
+ }
486
+
487
+ return new JdbcSchema(jdbcColumns.build());
488
+ }
489
+
490
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, final int taskIndex)
491
+ {
492
+ final PluginTask task = taskSource.loadTask(getTaskClass());
493
+ final Mode mode = task.getMode();
494
+
495
+ BatchInsert batch;
496
+ try {
497
+ batch = newBatchInsert(task);
498
+ } catch (IOException | SQLException ex) {
499
+ throw new RuntimeException(ex);
500
+ }
501
+ try {
502
+ PageReader reader = new PageReader(schema);
503
+ ColumnSetterFactory factory = newColumnSetterFactory(batch, reader, null); // TODO TimestampFormatter
504
+
505
+ JdbcSchema loadSchema = task.getLoadSchema();
506
+
507
+ ImmutableList.Builder<JdbcColumn> insertColumns = ImmutableList.builder();
508
+ ImmutableList.Builder<ColumnSetter> columnSetters = ImmutableList.builder();
509
+ for (JdbcColumn c : loadSchema.getColumns()) {
510
+ if (c.isSkipColumn()) {
511
+ columnSetters.add(factory.newSkipColumnSetter());
512
+ } else {
513
+ columnSetters.add(factory.newColumnSetter(c));
514
+ insertColumns.add(c);
515
+ }
516
+ }
517
+ final JdbcSchema insertSchema = new JdbcSchema(insertColumns.build());
518
+
519
+ final BatchInsert b = batch;
520
+ withRetry(new IdempotentSqlRunnable() {
521
+ public void run() throws SQLException
522
+ {
523
+ String loadTable;
524
+ boolean createTable;
525
+ if (mode.usesMultipleLoadTables()) {
526
+ // insert, truncate_insert, merge, replace
527
+ loadTable = formatMultipleLoadTableName(task, taskIndex);
528
+ JdbcOutputConnection con = newConnection(task, true, true);
529
+ try {
530
+ con.createTableIfNotExists(loadTable, insertSchema);
531
+ } finally {
532
+ con.close();
533
+ }
534
+
535
+ } else if (!mode.usesMultipleLoadTables() && mode.createAndSwapTable()) {
536
+ // replace_inplace
537
+ loadTable = task.getSwapTable().get();
538
+
539
+ } else {
540
+ // insert_direct
541
+ loadTable = task.getTable();
542
+ }
543
+ b.prepare(loadTable, insertSchema);
544
+ }
545
+ });
546
+
547
+ PluginPageOutput output = newPluginPageOutput(reader, batch, columnSetters.build(), task);
548
+ batch = null;
549
+ return output;
550
+
551
+ } catch (SQLException | InterruptedException ex) {
552
+ throw new RuntimeException(ex);
553
+
554
+ } finally {
555
+ if (batch != null) {
556
+ try {
557
+ batch.close();
558
+ } catch (IOException | SQLException ex) {
559
+ throw new RuntimeException(ex);
560
+ }
561
+ }
562
+ }
563
+ }
564
+
565
+ protected ColumnSetterFactory newColumnSetterFactory(BatchInsert batch, PageReader pageReader,
566
+ TimestampFormatter timestampFormatter)
567
+ {
568
+ return new ColumnSetterFactory(batch, pageReader, timestampFormatter);
569
+ }
570
+
571
+ protected PluginPageOutput newPluginPageOutput(PageReader reader,
572
+ BatchInsert batch, List<ColumnSetter> columnSetters,
573
+ PluginTask task)
574
+ {
575
+ return new PluginPageOutput(reader, batch, columnSetters, task.getBatchSize());
576
+ }
577
+
578
+ public static class PluginPageOutput
579
+ implements TransactionalPageOutput
580
+ {
581
+ protected final List<Column> columns;
582
+ protected final List<ColumnSetter> columnSetters;
583
+ private final PageReader pageReader;
584
+ private final BatchInsert batch;
585
+ private final int batchSize;
586
+ private final int foraceBatchFlushSize;
587
+
588
+ public PluginPageOutput(PageReader pageReader,
589
+ BatchInsert batch, List<ColumnSetter> columnSetters,
590
+ int batchSize)
591
+ {
592
+ this.pageReader = pageReader;
593
+ this.batch = batch;
594
+ this.columns = pageReader.getSchema().getColumns();
595
+ this.columnSetters = columnSetters;
596
+ this.batchSize = batchSize;
597
+ this.foraceBatchFlushSize = batchSize * 2;
598
+ }
599
+
600
+ @Override
601
+ public void add(Page page)
602
+ {
603
+ try {
604
+ pageReader.setPage(page);
605
+ while (pageReader.nextRecord()) {
606
+ if (batch.getBatchWeight() > foraceBatchFlushSize) {
607
+ batch.flush();
608
+ }
609
+ handleColumnsSetters();
610
+ batch.add();
611
+ }
612
+ if (batch.getBatchWeight() > batchSize) {
613
+ batch.flush();
614
+ }
615
+ } catch (IOException | SQLException ex) {
616
+ throw new RuntimeException(ex);
617
+ }
618
+ }
619
+
620
+ @Override
621
+ public void finish()
622
+ {
623
+ try {
624
+ batch.finish();
625
+ } catch (IOException | SQLException ex) {
626
+ throw new RuntimeException(ex);
627
+ }
628
+ }
629
+
630
+ @Override
631
+ public void close()
632
+ {
633
+ try {
634
+ batch.close();
635
+ } catch (IOException | SQLException ex) {
636
+ throw new RuntimeException(ex);
637
+ }
638
+ }
639
+
640
+ @Override
641
+ public void abort()
642
+ {
643
+ }
644
+
645
+ @Override
646
+ public CommitReport commit()
647
+ {
648
+ return Exec.newCommitReport();
649
+ }
650
+
651
+ protected void handleColumnsSetters()
652
+ {
653
+ int size = columnSetters.size();
654
+ for (int i=0; i < size; i++) {
655
+ columns.get(i).visit(columnSetters.get(i));
656
+ }
657
+ }
658
+
659
+ }
660
+
661
+ public static interface IdempotentSqlRunnable
662
+ {
663
+ public void run() throws SQLException;
664
+ }
665
+
666
+ protected void withRetry(IdempotentSqlRunnable op)
667
+ throws SQLException, InterruptedException
668
+ {
669
+ withRetry(op, "Operation failed");
670
+ }
671
+
672
+ protected void withRetry(final IdempotentSqlRunnable op, final String errorMessage)
673
+ throws SQLException, InterruptedException
674
+ {
675
+ try {
676
+ retryExecutor()
677
+ .setRetryLimit(12)
678
+ .setInitialRetryWait(1000)
679
+ .setMaxRetryWait(30 * 60 * 1000)
680
+ .runInterruptible(new IdempotentOperation<Void>() {
681
+ public Void call() throws Exception
682
+ {
683
+ op.run();
684
+ return null;
685
+ }
686
+
687
+ public void onRetry(Throwable exception, int retryCount, int retryLimit, int retryWait)
688
+ {
689
+ if (exception instanceof SQLException) {
690
+ SQLException ex = (SQLException) exception;
691
+ String sqlState = ex.getSQLState();
692
+ int errorCode = ex.getErrorCode();
693
+ logger.warn("{} ({}:{}), retrying {}/{} after {} seconds. Message: {}",
694
+ errorMessage, errorCode, sqlState, retryCount, retryLimit, retryWait/1000,
695
+ buildExceptionMessage(exception));
696
+ } else {
697
+ logger.warn("{}, retrying {}/{} after {} seconds. Message: {}",
698
+ errorMessage, retryCount, retryLimit, retryWait/1000,
699
+ buildExceptionMessage(exception));
700
+ }
701
+ if (retryCount % 3 == 0) {
702
+ logger.info("Error details:", exception);
703
+ }
704
+ }
705
+
706
+ public void onGiveup(Throwable firstException, Throwable lastException)
707
+ {
708
+ if (firstException instanceof SQLException) {
709
+ SQLException ex = (SQLException) firstException;
710
+ String sqlState = ex.getSQLState();
711
+ int errorCode = ex.getErrorCode();
712
+ logger.error("{} ({}:{})", errorMessage, errorCode, sqlState);
713
+ }
714
+ }
715
+
716
+ public boolean isRetryableException(Throwable exception)
717
+ {
718
+ //if (exception instanceof SQLException) {
719
+ // SQLException ex = (SQLException) exception;
720
+ // String sqlState = ex.getSQLState();
721
+ // int errorCode = ex.getErrorCode();
722
+ // return isRetryableSQLException(ex);
723
+ //}
724
+ return false; // TODO
725
+ }
726
+ });
727
+
728
+ } catch (ExecutionException ex) {
729
+ Throwable cause = ex.getCause();
730
+ Throwables.propagateIfInstanceOf(cause, SQLException.class);
731
+ throw Throwables.propagate(cause);
732
+ }
733
+ }
734
+
735
+ private String buildExceptionMessage(Throwable ex) {
736
+ StringBuilder sb = new StringBuilder();
737
+ sb.append(ex.getMessage());
738
+ if (ex.getCause() != null) {
739
+ buildExceptionMessageCont(sb, ex.getCause(), ex.getMessage());
740
+ }
741
+ return sb.toString();
742
+ }
743
+
744
+ private void buildExceptionMessageCont(StringBuilder sb, Throwable ex, String lastMessage) {
745
+ if (!lastMessage.equals(ex.getMessage())) {
746
+ // suppress same messages
747
+ sb.append(" < ");
748
+ sb.append(ex.getMessage());
749
+ }
750
+ if (ex.getCause() == null) {
751
+ return;
752
+ }
753
+ buildExceptionMessageCont(sb, ex.getCause(), ex.getMessage());
754
+ }
755
+ }