embulk-output-jdbc 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. checksums.yaml +7 -0
  2. data/build.gradle +2 -0
  3. data/classpath/embulk-output-jdbc-0.1.0.jar +0 -0
  4. data/lib/embulk/output/jdbc.rb +3 -0
  5. data/src/main/java/org/embulk/output/JdbcOutputPlugin.java +104 -0
  6. data/src/main/java/org/embulk/output/jdbc/AbstractJdbcOutputPlugin.java +701 -0
  7. data/src/main/java/org/embulk/output/jdbc/BatchInsert.java +54 -0
  8. data/src/main/java/org/embulk/output/jdbc/JdbcColumn.java +71 -0
  9. data/src/main/java/org/embulk/output/jdbc/JdbcOutputConnection.java +423 -0
  10. data/src/main/java/org/embulk/output/jdbc/JdbcOutputConnector.java +8 -0
  11. data/src/main/java/org/embulk/output/jdbc/JdbcSchema.java +37 -0
  12. data/src/main/java/org/embulk/output/jdbc/JdbcUtils.java +155 -0
  13. data/src/main/java/org/embulk/output/jdbc/RetryExecutor.java +105 -0
  14. data/src/main/java/org/embulk/output/jdbc/StandardBatchInsert.java +180 -0
  15. data/src/main/java/org/embulk/output/jdbc/setter/BooleanColumnSetter.java +52 -0
  16. data/src/main/java/org/embulk/output/jdbc/setter/ColumnSetter.java +121 -0
  17. data/src/main/java/org/embulk/output/jdbc/setter/ColumnSetterFactory.java +137 -0
  18. data/src/main/java/org/embulk/output/jdbc/setter/DoubleColumnSetter.java +51 -0
  19. data/src/main/java/org/embulk/output/jdbc/setter/LongColumnSetter.java +62 -0
  20. data/src/main/java/org/embulk/output/jdbc/setter/NullColumnSetter.java +43 -0
  21. data/src/main/java/org/embulk/output/jdbc/setter/SkipColumnSetter.java +35 -0
  22. data/src/main/java/org/embulk/output/jdbc/setter/SqlTimestampColumnSetter.java +48 -0
  23. data/src/main/java/org/embulk/output/jdbc/setter/StringColumnSetter.java +48 -0
  24. data/src/test/java/org/embulk/output/TestJdbcOutputPlugin.java +5 -0
  25. metadata +67 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 99881449e41482ec67cfa089d352b3006aa7caeb
4
+ data.tar.gz: ae1d3d4f95451d5462a12f3a748fa7e0ea8784d6
5
+ SHA512:
6
+ metadata.gz: a7af9fcf1de60876cdebc755f8c3c67c562c3ca36059f8f346fd7a4ba2062657bb484dc9417eb928fdc9381e0097a52a27c03035458e92cdf14e05806bf08635
7
+ data.tar.gz: 8bd197e134df1eab9e4446984e70c1b8c8dc710a7920511ad3599471e30f3557bca518497280b7e7ad4bfc92c2e6e0657ff863032966749e84ff38f6027ec147
data/build.gradle ADDED
@@ -0,0 +1,2 @@
1
+ dependencies {
2
+ }
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_output(
2
+ :jdbc, "org.embulk.output.JdbcOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,104 @@
1
+ package org.embulk.output;
2
+
3
+ import java.util.Properties;
4
+ import java.sql.Driver;
5
+ import java.io.IOException;
6
+ import java.sql.Connection;
7
+ import java.sql.SQLException;
8
+ import com.google.common.base.Throwables;
9
+ import org.embulk.spi.Exec;
10
+ import org.embulk.config.Config;
11
+ import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
12
+ import org.embulk.output.jdbc.BatchInsert;
13
+ import org.embulk.output.jdbc.StandardBatchInsert;
14
+ import org.embulk.output.jdbc.JdbcOutputConnector;
15
+ import org.embulk.output.jdbc.JdbcOutputConnection;
16
+
17
+ public class JdbcOutputPlugin
18
+ extends AbstractJdbcOutputPlugin
19
+ {
20
+ public interface GenericPluginTask extends PluginTask
21
+ {
22
+ @Config("driver_name")
23
+ public String getDriverName();
24
+
25
+ @Config("driver_class")
26
+ public String getDriverClass();
27
+ }
28
+
29
+ @Override
30
+ protected Class<? extends PluginTask> getTaskClass()
31
+ {
32
+ return GenericPluginTask.class;
33
+ }
34
+
35
+ @Override
36
+ protected GenericOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
37
+ {
38
+ GenericPluginTask g = (GenericPluginTask) task;
39
+
40
+ String url;
41
+ if (g.getPort().isPresent()) {
42
+ url = String.format("jdbc:%s://%s:%d/%s",
43
+ g.getDriverName(), g.getHost(), g.getPort().get(), g.getDatabase());
44
+ } else {
45
+ url = String.format("jdbc:%s://%s:%d/%s",
46
+ g.getDriverName(), g.getHost(), g.getDatabase());
47
+ }
48
+
49
+ Properties props = new Properties();
50
+ props.setProperty("user", g.getUser());
51
+ props.setProperty("password", g.getPassword());
52
+
53
+ props.putAll(g.getOptions());
54
+
55
+ return new GenericOutputConnector(url, props, g.getDriverClass(),
56
+ g.getSchema().orNull());
57
+ }
58
+
59
+ private static class GenericOutputConnector
60
+ implements JdbcOutputConnector
61
+ {
62
+ private final Driver driver;
63
+ private final String url;
64
+ private final Properties properties;
65
+ private final String schemaName;
66
+
67
+ public GenericOutputConnector(String url, Properties properties, String driverClass,
68
+ String schemaName)
69
+ {
70
+ try {
71
+ // TODO check Class.forName(driverClass) is a Driver before newInstance
72
+ // for security
73
+ this.driver = (Driver) Class.forName(driverClass).newInstance();
74
+ } catch (Exception ex) {
75
+ throw Throwables.propagate(ex);
76
+ }
77
+ this.url = url;
78
+ this.properties = properties;
79
+ this.schemaName = schemaName;
80
+ }
81
+
82
+ @Override
83
+ public JdbcOutputConnection connect(boolean autoCommit) throws SQLException
84
+ {
85
+ Connection c = driver.connect(url, properties);
86
+ try {
87
+ c.setAutoCommit(autoCommit);
88
+ JdbcOutputConnection con = new JdbcOutputConnection(c, schemaName);
89
+ c = null;
90
+ return con;
91
+ } finally {
92
+ if (c != null) {
93
+ c.close();
94
+ }
95
+ }
96
+ }
97
+ }
98
+
99
+ @Override
100
+ protected BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException
101
+ {
102
+ return new StandardBatchInsert(getConnector(task, true));
103
+ }
104
+ }
@@ -0,0 +1,701 @@
1
+ package org.embulk.output.jdbc;
2
+
3
+ import java.util.List;
4
+ import java.util.Locale;
5
+ import java.util.Properties;
6
+ import java.util.concurrent.ExecutionException;
7
+ import java.io.IOException;
8
+ import java.sql.Types;
9
+ import java.sql.Connection;
10
+ import java.sql.ResultSet;
11
+ import java.sql.DatabaseMetaData;
12
+ import java.sql.SQLException;
13
+ import org.slf4j.Logger;
14
+ import com.google.common.base.Optional;
15
+ import com.google.common.base.Throwables;
16
+ import com.google.common.collect.ImmutableList;
17
+ import org.embulk.config.CommitReport;
18
+ import org.embulk.config.Config;
19
+ import org.embulk.config.ConfigDefault;
20
+ import org.embulk.config.ConfigDiff;
21
+ import org.embulk.config.ConfigException;
22
+ import org.embulk.config.ConfigSource;
23
+ import org.embulk.config.Task;
24
+ import org.embulk.config.TaskSource;
25
+ import org.embulk.spi.Exec;
26
+ import org.embulk.spi.Column;
27
+ import org.embulk.spi.ColumnVisitor;
28
+ import org.embulk.spi.OutputPlugin;
29
+ import org.embulk.spi.PageOutput;
30
+ import org.embulk.spi.Schema;
31
+ import org.embulk.spi.TransactionalPageOutput;
32
+ import org.embulk.spi.Page;
33
+ import org.embulk.spi.PageReader;
34
+ import org.embulk.spi.time.Timestamp;
35
+ import org.embulk.output.jdbc.setter.ColumnSetter;
36
+ import org.embulk.output.jdbc.setter.ColumnSetterFactory;
37
+ import org.embulk.output.jdbc.RetryExecutor.IdempotentOperation;
38
+ import static org.embulk.output.jdbc.RetryExecutor.retryExecutor;
39
+
40
+ public abstract class AbstractJdbcOutputPlugin
41
+ implements OutputPlugin
42
+ {
43
+ private final Logger logger = Exec.getLogger(getClass());
44
+
45
+ public interface PluginTask
46
+ extends Task
47
+ {
48
+ @Config("host")
49
+ public String getHost();
50
+
51
+ @Config("port")
52
+ @ConfigDefault("null")
53
+ public Optional<Integer> getPort();
54
+
55
+ @Config("user")
56
+ public String getUser();
57
+
58
+ @Config("password")
59
+ @ConfigDefault("\"\"")
60
+ public String getPassword();
61
+
62
+ @Config("options")
63
+ @ConfigDefault("{}")
64
+ public Properties getOptions();
65
+
66
+ @Config("database")
67
+ public String getDatabase();
68
+
69
+ @Config("schema")
70
+ @ConfigDefault("null")
71
+ public Optional<String> getSchema();
72
+
73
+ @Config("table")
74
+ public String getTable();
75
+
76
+ @Config("mode")
77
+ public String getModeConfig();
78
+
79
+ @Config("batch_size")
80
+ @ConfigDefault("16777216")
81
+ // TODO set minimum number
82
+ public int getBatchSize();
83
+
84
+ public void setMode(Mode mode);
85
+ public Mode getMode();
86
+
87
+ public JdbcSchema getLoadSchema();
88
+ public void setLoadSchema(JdbcSchema schema);
89
+
90
+ public Optional<String> getSwapTable();
91
+ public void setSwapTable(Optional<String> name);
92
+
93
+ public Optional<String> getMultipleLoadTablePrefix();
94
+ public void setMultipleLoadTablePrefix(Optional<String> prefix);
95
+ }
96
+
97
+ // for subclasses to add @Config
98
+ protected Class<? extends PluginTask> getTaskClass()
99
+ {
100
+ return PluginTask.class;
101
+ }
102
+
103
+ protected abstract JdbcOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation);
104
+
105
+ protected abstract BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException;
106
+
107
+ protected JdbcOutputConnection newConnection(PluginTask task, boolean retryableMetadataOperation,
108
+ boolean autoCommit) throws SQLException
109
+ {
110
+ return getConnector(task, retryableMetadataOperation).connect(autoCommit);
111
+ }
112
+
113
+ public enum Mode {
114
+ INSERT,
115
+ INSERT_DIRECT,
116
+ TRUNCATE_INSERT,
117
+ MERGE,
118
+ REPLACE,
119
+ REPLACE_INPLACE;
120
+ //REPLACE_PARTITIONING, // MySQL: partitioning, PostgreSQL: inheritance
121
+
122
+ public boolean isDirectWrite()
123
+ {
124
+ return this == INSERT_DIRECT;
125
+ }
126
+
127
+ public boolean isInplace()
128
+ {
129
+ return this == INSERT_DIRECT || this == REPLACE_INPLACE;
130
+ }
131
+
132
+ public boolean usesMultipleLoadTables()
133
+ {
134
+ return !isInplace();
135
+ }
136
+
137
+ public boolean createAndSwapTable()
138
+ {
139
+ return this == REPLACE_INPLACE || this == REPLACE;
140
+ }
141
+ }
142
+
143
+ public ConfigDiff transaction(ConfigSource config,
144
+ Schema schema, int processorCount,
145
+ OutputPlugin.Control control)
146
+ {
147
+ PluginTask task = config.loadConfig(getTaskClass());
148
+
149
+ // TODO this is a temporary code. behavior will change in a future release.
150
+ switch(task.getModeConfig()) {
151
+ case "insert":
152
+ task.setMode(Mode.INSERT_DIRECT);
153
+ break;
154
+ case "replace":
155
+ task.setMode(Mode.REPLACE_INPLACE);
156
+ break;
157
+ default:
158
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are: insert, replace", task.getModeConfig()));
159
+ }
160
+
161
+ //switch(task.getModeConfig()) {
162
+ ////case "insert":
163
+ //// task.setMode(Mode.INSERT);
164
+ //// break;
165
+ //case "insert_direct":
166
+ // task.setMode(Mode.INSERT_DIRECT);
167
+ // break;
168
+ ////case "truncate_insert": // TODO
169
+ //// task.setMode(Mode.TRUNCATE_INSERT);
170
+ //// break;
171
+ ////case "merge": // TODO
172
+ //// task.setMode(Mode.MERGE);
173
+ //// break;
174
+ ////case "replace":
175
+ //// task.setMode(Mode.REPLACE);
176
+ //// break;
177
+ //case "replace_inplace":
178
+ // task.setMode(Mode.REPLACE_INPLACE);
179
+ // break;
180
+ //default:
181
+ // new ConfigException(String.format("Unknown mode '%s'. Supported modes are: insert_direct, replace_inplace", task.getModeConfig()));
182
+ //}
183
+
184
+ task = begin(task, schema, processorCount);
185
+ control.run(task.dump());
186
+ return commit(task, schema, processorCount);
187
+ }
188
+
189
+ public ConfigDiff resume(TaskSource taskSource,
190
+ Schema schema, int processorCount,
191
+ OutputPlugin.Control control)
192
+ {
193
+ PluginTask task = taskSource.loadTask(getTaskClass());
194
+
195
+ if (task.getMode().isInplace()) {
196
+ throw new UnsupportedOperationException("inplace mode is not resumable. You need to delete partially-loaded records from the database and restart the entire transaction.");
197
+ }
198
+
199
+ task = begin(task, schema, processorCount);
200
+ control.run(task.dump());
201
+ return commit(task, schema, processorCount);
202
+ }
203
+
204
+ private String getTransactionUniqueName()
205
+ {
206
+ // TODO use uuid?
207
+ Timestamp t = Exec.session().getTransactionTime();
208
+ return String.format("%016x%08x", t.getEpochSecond(), t.getNano());
209
+ }
210
+
211
+ private PluginTask begin(final PluginTask task,
212
+ final Schema schema, int processorCount)
213
+ {
214
+ try {
215
+ withRetry(new IdempotentSqlRunnable() { // no intermediate data if isDirectWrite == true
216
+ public void run() throws SQLException
217
+ {
218
+ JdbcOutputConnection con = newConnection(task, true, false);
219
+ try {
220
+ doBegin(con, task, schema);
221
+ } finally {
222
+ con.close();
223
+ }
224
+ }
225
+ });
226
+ } catch (SQLException | InterruptedException ex) {
227
+ throw new RuntimeException(ex);
228
+ }
229
+ return task;
230
+ }
231
+
232
+ private ConfigDiff commit(final PluginTask task,
233
+ Schema schema, final int processorCount)
234
+ {
235
+ if (!task.getMode().isDirectWrite()) { // no intermediate data if isDirectWrite == true
236
+ try {
237
+ withRetry(new IdempotentSqlRunnable() {
238
+ public void run() throws SQLException
239
+ {
240
+ JdbcOutputConnection con = newConnection(task, false, false);
241
+ try {
242
+ doCommit(con, task, processorCount);
243
+ } finally {
244
+ con.close();
245
+ }
246
+ }
247
+ });
248
+ } catch (SQLException | InterruptedException ex) {
249
+ throw new RuntimeException(ex);
250
+ }
251
+ }
252
+ return Exec.newConfigDiff();
253
+ }
254
+
255
+ public void cleanup(TaskSource taskSource,
256
+ Schema schema, final int processorCount,
257
+ final List<CommitReport> successCommitReports)
258
+ {
259
+ final PluginTask task = taskSource.loadTask(getTaskClass());
260
+
261
+ if (!task.getMode().isDirectWrite()) { // no intermediate data if isDirectWrite == true
262
+ try {
263
+ withRetry(new IdempotentSqlRunnable() {
264
+ public void run() throws SQLException
265
+ {
266
+ JdbcOutputConnection con = newConnection(task, true, true);
267
+ try {
268
+ doCleanup(con, task, processorCount, successCommitReports);
269
+ } finally {
270
+ con.close();
271
+ }
272
+ }
273
+ });
274
+ } catch (SQLException | InterruptedException ex) {
275
+ throw new RuntimeException(ex);
276
+ }
277
+ }
278
+ }
279
+
280
+ protected void doBegin(JdbcOutputConnection con,
281
+ PluginTask task, Schema schema) throws SQLException
282
+ {
283
+ Mode mode = task.getMode();
284
+
285
+ JdbcSchema targetTableSchema;
286
+ if (mode.createAndSwapTable()) {
287
+ // DROP TABLE IF EXISTS xyz__0000000054d92dee1e452158_bulk_load_temp
288
+ // CREATE TABLE IF NOT EXISTS xyz__0000000054d92dee1e452158_bulk_load_temp
289
+ // swapTableName = "xyz__0000000054d92dee1e452158_bulk_load_temp"
290
+ String swapTableName = task.getTable() + "_" + getTransactionUniqueName() + "_bulk_load_temp";
291
+ con.dropTableIfExists(swapTableName);
292
+ con.createTableIfNotExists(swapTableName, newJdbcSchemaForNewTable(schema));
293
+ targetTableSchema = newJdbcSchemaFromExistentTable(con, swapTableName);
294
+ task.setSwapTable(Optional.of(swapTableName));
295
+ } else {
296
+ // CREATE TABLE IF NOT EXISTS xyz
297
+ con.createTableIfNotExists(task.getTable(), newJdbcSchemaForNewTable(schema));
298
+ targetTableSchema = newJdbcSchemaFromExistentTable(con, task.getTable());
299
+ task.setSwapTable(Optional.<String>absent());
300
+ }
301
+
302
+ if (mode.usesMultipleLoadTables()) {
303
+ // multipleLoadTablePrefix = "xyz__0000000054d92dee1e452158_"
304
+ // workers run:
305
+ // CREATE TABLE xyz__0000000054d92dee1e452158_%d
306
+ String multipleLoadTablePrefix = task.getTable() + "_" + getTransactionUniqueName();
307
+ task.setMultipleLoadTablePrefix(Optional.of(multipleLoadTablePrefix));
308
+ } else {
309
+ task.setMultipleLoadTablePrefix(Optional.<String>absent());
310
+ }
311
+
312
+ task.setLoadSchema(matchSchemaByColumnNames(schema, targetTableSchema));
313
+ }
314
+
315
+ protected void doCommit(JdbcOutputConnection con, PluginTask task, int processorCount)
316
+ throws SQLException
317
+ {
318
+ switch (task.getMode()) {
319
+ case INSERT:
320
+ // aggregate insert into target
321
+ //con.gatherInsertTables();
322
+ throw new UnsupportedOperationException("not implemented yet"); // TODO
323
+ case INSERT_DIRECT:
324
+ // already done
325
+ break;
326
+ case TRUNCATE_INSERT:
327
+ // truncate & aggregate insert into target
328
+ throw new UnsupportedOperationException("not implemented yet");
329
+ //break;
330
+ case MERGE:
331
+ // aggregate merge into target
332
+ throw new UnsupportedOperationException("not implemented yet");
333
+ //break;
334
+ case REPLACE:
335
+ if (processorCount == 1) {
336
+ // swap table
337
+ con.replaceTable(task.getSwapTable().get(), task.getLoadSchema(), task.getTable());
338
+ } else {
339
+ // aggregate insert into swap table & swap table
340
+ throw new UnsupportedOperationException("not implemented yet");
341
+ }
342
+ break;
343
+ case REPLACE_INPLACE:
344
+ // swap table
345
+ con.replaceTable(task.getSwapTable().get(), task.getLoadSchema(), task.getTable());
346
+ break;
347
+ }
348
+ }
349
+
350
+ protected void doCleanup(JdbcOutputConnection con, PluginTask task, int processorCount,
351
+ List<CommitReport> successCommitReports)
352
+ throws SQLException
353
+ {
354
+ if (task.getSwapTable().isPresent()) {
355
+ con.dropTableIfExists(task.getSwapTable().get());
356
+ }
357
+ if (task.getMultipleLoadTablePrefix().isPresent()) {
358
+ for (int i=0; i < processorCount; i++) {
359
+ con.dropTableIfExists(formatMultipleLoadTableName(task, i));
360
+ }
361
+ }
362
+ }
363
+
364
+ static String formatMultipleLoadTableName(PluginTask task, int processorIndex)
365
+ {
366
+ return task.getMultipleLoadTablePrefix().get() + String.format("%04x", processorIndex);
367
+ }
368
+
369
+ protected JdbcSchema newJdbcSchemaForNewTable(Schema schema)
370
+ {
371
+ final ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
372
+ for (Column c : schema.getColumns()) {
373
+ final String columnName = c.getName();
374
+ c.visit(new ColumnVisitor() {
375
+ public void booleanColumn(Column column)
376
+ {
377
+ columns.add(new JdbcColumn(
378
+ columnName, "BOOLEAN",
379
+ Types.BOOLEAN, 1, 0));
380
+ }
381
+
382
+ public void longColumn(Column column)
383
+ {
384
+ columns.add(new JdbcColumn(
385
+ columnName, "BIGINT",
386
+ Types.BIGINT, 22, 0));
387
+ }
388
+
389
+ public void doubleColumn(Column column)
390
+ {
391
+ columns.add(new JdbcColumn(
392
+ columnName, "DOUBLE PRECISION",
393
+ Types.FLOAT, 24, 0));
394
+ }
395
+
396
+ public void stringColumn(Column column)
397
+ {
398
+ columns.add(new JdbcColumn(
399
+ columnName, "CLOB",
400
+ Types.CLOB, 4000, 0)); // TODO size type param
401
+ }
402
+
403
+ public void timestampColumn(Column column)
404
+ {
405
+ columns.add(new JdbcColumn(
406
+ columnName, "TIMESTAMP",
407
+ Types.TIMESTAMP, 26, 0)); // size type param is from postgresql.
408
+ }
409
+ });
410
+ }
411
+ return new JdbcSchema(columns.build());
412
+ }
413
+
414
+ public JdbcSchema newJdbcSchemaFromExistentTable(JdbcOutputConnection connection,
415
+ String tableName) throws SQLException
416
+ {
417
+ DatabaseMetaData dbm = connection.getMetaData();
418
+ String escape = dbm.getSearchStringEscape();
419
+
420
+ ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
421
+ String schemaNamePattern = JdbcUtils.escapeSearchString(connection.getSchemaName(), escape);
422
+ String tableNamePattern = JdbcUtils.escapeSearchString(tableName, escape);
423
+ ResultSet rs = dbm.getColumns(null, schemaNamePattern, tableNamePattern, null);
424
+ try {
425
+ while(rs.next()) {
426
+ String columnName = rs.getString("COLUMN_NAME");
427
+ String typeName = rs.getString("TYPE_NAME");
428
+ typeName = typeName.toUpperCase(Locale.ENGLISH);
429
+ int sqlType = rs.getInt("DATA_TYPE");
430
+ int colSize = rs.getInt("COLUMN_SIZE");
431
+ int decDigit = rs.getInt("DECIMAL_DIGITS");
432
+ if (rs.wasNull()) {
433
+ decDigit = -1;
434
+ }
435
+ //rs.getString("IS_NULLABLE").equals("NO") // "YES" or "" // TODO
436
+ //rs.getString("COLUMN_DEF") // or null // TODO
437
+ columns.add(new JdbcColumn(
438
+ columnName, typeName,
439
+ sqlType, colSize, decDigit));
440
+ }
441
+ } finally {
442
+ rs.close();
443
+ }
444
+ return new JdbcSchema(columns.build());
445
+ }
446
+
447
+ private JdbcSchema matchSchemaByColumnNames(Schema inputSchema, JdbcSchema targetTableSchema)
448
+ {
449
+ // TODO for each inputSchema.getColumns(), search a column whose name
450
+ // matches with targetTableSchema. if not match, create JdbcSchema.skipColumn().
451
+ return targetTableSchema;
452
+ }
453
+
454
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, final int processorIndex)
455
+ {
456
+ final PluginTask task = taskSource.loadTask(getTaskClass());
457
+ final Mode mode = task.getMode();
458
+
459
+ BatchInsert batch;
460
+ try {
461
+ batch = newBatchInsert(task);
462
+ } catch (IOException | SQLException ex) {
463
+ throw new RuntimeException(ex);
464
+ }
465
+ try {
466
+ PageReader reader = new PageReader(schema);
467
+ ColumnSetterFactory factory = new ColumnSetterFactory(batch, reader, null); // TODO TimestampFormatter
468
+
469
+ JdbcSchema loadSchema = task.getLoadSchema();
470
+
471
+ ImmutableList.Builder<JdbcColumn> insertColumns = ImmutableList.builder();
472
+ ImmutableList.Builder<ColumnSetter> columnSetters = ImmutableList.builder();
473
+ for (JdbcColumn c : loadSchema.getColumns()) {
474
+ if (c.isSkipColumn()) {
475
+ columnSetters.add(factory.newSkipColumnSetter());
476
+ } else {
477
+ columnSetters.add(factory.newColumnSetter(c));
478
+ insertColumns.add(c);
479
+ }
480
+ }
481
+ final JdbcSchema insertSchema = new JdbcSchema(insertColumns.build());
482
+
483
+ final BatchInsert b = batch;
484
+ withRetry(new IdempotentSqlRunnable() {
485
+ public void run() throws SQLException
486
+ {
487
+ String loadTable;
488
+ boolean createTable;
489
+ if (mode.usesMultipleLoadTables()) {
490
+ // insert, truncate_insert, merge, replace
491
+ loadTable = formatMultipleLoadTableName(task, processorIndex);
492
+ JdbcOutputConnection con = newConnection(task, true, true);
493
+ try {
494
+ con.createTableIfNotExists(loadTable, insertSchema);
495
+ } finally {
496
+ con.close();
497
+ }
498
+
499
+ } else if (!mode.usesMultipleLoadTables() && mode.createAndSwapTable()) {
500
+ // replace_inplace
501
+ loadTable = task.getSwapTable().get();
502
+
503
+ } else {
504
+ // insert_direct
505
+ loadTable = task.getTable();
506
+ }
507
+
508
+ b.prepare(loadTable, insertSchema);
509
+ }
510
+ });
511
+
512
+ PluginPageOutput output = new PluginPageOutput(reader, batch, columnSetters.build(),
513
+ task.getBatchSize());
514
+ batch = null;
515
+ return output;
516
+
517
+ } catch (SQLException | InterruptedException ex) {
518
+ throw new RuntimeException(ex);
519
+
520
+ } finally {
521
+ if (batch != null) {
522
+ try {
523
+ batch.close();
524
+ } catch (IOException | SQLException ex) {
525
+ throw new RuntimeException(ex);
526
+ }
527
+ }
528
+ }
529
+ }
530
+
531
+ public static class PluginPageOutput
532
+ implements TransactionalPageOutput
533
+ {
534
+ private final PageReader pageReader;
535
+ private final BatchInsert batch;
536
+ private final List<Column> columns;
537
+ private final List<ColumnSetter> columnSetters;
538
+ private final int batchSize;
539
+ private final int foraceBatchFlushSize;
540
+
541
+ public PluginPageOutput(PageReader pageReader,
542
+ BatchInsert batch, List<ColumnSetter> columnSetters,
543
+ int batchSize)
544
+ {
545
+ this.pageReader = pageReader;
546
+ this.batch = batch;
547
+ this.columns = pageReader.getSchema().getColumns();
548
+ this.columnSetters = columnSetters;
549
+ this.batchSize = batchSize;
550
+ this.foraceBatchFlushSize = batchSize * 2;
551
+ }
552
+
553
+ @Override
554
+ public void add(Page page)
555
+ {
556
+ try {
557
+ pageReader.setPage(page);
558
+ while (pageReader.nextRecord()) {
559
+ if (batch.getBatchWeight() > foraceBatchFlushSize) {
560
+ batch.flush();
561
+ }
562
+ for (int i=0; i < columnSetters.size(); i++) {
563
+ columns.get(i).visit(columnSetters.get(i));
564
+ }
565
+ batch.add();
566
+ }
567
+ if (batch.getBatchWeight() > batchSize) {
568
+ batch.flush();
569
+ }
570
+ } catch (IOException | SQLException ex) {
571
+ throw new RuntimeException(ex);
572
+ }
573
+ }
574
+
575
+ @Override
576
+ public void finish()
577
+ {
578
+ try {
579
+ batch.finish();
580
+ } catch (IOException | SQLException ex) {
581
+ throw new RuntimeException(ex);
582
+ }
583
+ }
584
+
585
+ @Override
586
+ public void close()
587
+ {
588
+ try {
589
+ batch.close();
590
+ } catch (IOException | SQLException ex) {
591
+ throw new RuntimeException(ex);
592
+ }
593
+ }
594
+
595
+ @Override
596
+ public void abort()
597
+ {
598
+ }
599
+
600
+ @Override
601
+ public CommitReport commit()
602
+ {
603
+ return Exec.newCommitReport();
604
+ }
605
+ }
606
+
607
+ public static interface IdempotentSqlRunnable
608
+ {
609
+ public void run() throws SQLException;
610
+ }
611
+
612
+ protected void withRetry(IdempotentSqlRunnable op)
613
+ throws SQLException, InterruptedException
614
+ {
615
+ withRetry(op, "Operation failed");
616
+ }
617
+
618
+ protected void withRetry(final IdempotentSqlRunnable op, final String errorMessage)
619
+ throws SQLException, InterruptedException
620
+ {
621
+ try {
622
+ retryExecutor()
623
+ .setRetryLimit(12)
624
+ .setInitialRetryWait(1000)
625
+ .setMaxRetryWait(30 * 60 * 1000)
626
+ .runInterruptible(new IdempotentOperation<Void>() {
627
+ public Void call() throws Exception
628
+ {
629
+ op.run();
630
+ return null;
631
+ }
632
+
633
+ public void onRetry(Throwable exception, int retryCount, int retryLimit, int retryWait)
634
+ {
635
+ if (exception instanceof SQLException) {
636
+ SQLException ex = (SQLException) exception;
637
+ String sqlState = ex.getSQLState();
638
+ int errorCode = ex.getErrorCode();
639
+ logger.warn("{} ({}:{}), retrying {}/{} after {} seconds. Message: {}",
640
+ errorMessage, errorCode, sqlState, retryCount, retryLimit, retryWait/1000,
641
+ buildExceptionMessage(exception));
642
+ } else {
643
+ logger.warn("{}, retrying {}/{} after {} seconds. Message: {}",
644
+ errorMessage, retryCount, retryLimit, retryWait/1000,
645
+ buildExceptionMessage(exception));
646
+ }
647
+ if (retryCount % 3 == 0) {
648
+ logger.info("Error details:", exception);
649
+ }
650
+ }
651
+
652
+ public void onGiveup(Throwable firstException, Throwable lastException)
653
+ {
654
+ if (firstException instanceof SQLException) {
655
+ SQLException ex = (SQLException) firstException;
656
+ String sqlState = ex.getSQLState();
657
+ int errorCode = ex.getErrorCode();
658
+ logger.error("{} ({}:{})", errorMessage, errorCode, sqlState);
659
+ }
660
+ }
661
+
662
+ public boolean isRetryableException(Throwable exception)
663
+ {
664
+ if (exception instanceof SQLException) {
665
+ SQLException ex = (SQLException) exception;
666
+ String sqlState = ex.getSQLState();
667
+ int errorCode = ex.getErrorCode();
668
+ return isRetryableException(ex);
669
+ }
670
+ return false; // TODO
671
+ }
672
+ });
673
+
674
+ } catch (ExecutionException ex) {
675
+ Throwable cause = ex.getCause();
676
+ Throwables.propagateIfInstanceOf(cause, SQLException.class);
677
+ throw Throwables.propagate(cause);
678
+ }
679
+ }
680
+
681
+ private String buildExceptionMessage(Throwable ex) {
682
+ StringBuilder sb = new StringBuilder();
683
+ sb.append(ex.getMessage());
684
+ if (ex.getCause() != null) {
685
+ buildExceptionMessageCont(sb, ex.getCause(), ex.getMessage());
686
+ }
687
+ return sb.toString();
688
+ }
689
+
690
+ private void buildExceptionMessageCont(StringBuilder sb, Throwable ex, String lastMessage) {
691
+ if (!lastMessage.equals(ex.getMessage())) {
692
+ // suppress same messages
693
+ sb.append(" < ");
694
+ sb.append(ex.getMessage());
695
+ }
696
+ if (ex.getCause() == null) {
697
+ return;
698
+ }
699
+ buildExceptionMessageCont(sb, ex.getCause(), ex.getMessage());
700
+ }
701
+ }