embulk-output-jdbc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. checksums.yaml +7 -0
  2. data/build.gradle +2 -0
  3. data/classpath/embulk-output-jdbc-0.1.0.jar +0 -0
  4. data/lib/embulk/output/jdbc.rb +3 -0
  5. data/src/main/java/org/embulk/output/JdbcOutputPlugin.java +104 -0
  6. data/src/main/java/org/embulk/output/jdbc/AbstractJdbcOutputPlugin.java +701 -0
  7. data/src/main/java/org/embulk/output/jdbc/BatchInsert.java +54 -0
  8. data/src/main/java/org/embulk/output/jdbc/JdbcColumn.java +71 -0
  9. data/src/main/java/org/embulk/output/jdbc/JdbcOutputConnection.java +423 -0
  10. data/src/main/java/org/embulk/output/jdbc/JdbcOutputConnector.java +8 -0
  11. data/src/main/java/org/embulk/output/jdbc/JdbcSchema.java +37 -0
  12. data/src/main/java/org/embulk/output/jdbc/JdbcUtils.java +155 -0
  13. data/src/main/java/org/embulk/output/jdbc/RetryExecutor.java +105 -0
  14. data/src/main/java/org/embulk/output/jdbc/StandardBatchInsert.java +180 -0
  15. data/src/main/java/org/embulk/output/jdbc/setter/BooleanColumnSetter.java +52 -0
  16. data/src/main/java/org/embulk/output/jdbc/setter/ColumnSetter.java +121 -0
  17. data/src/main/java/org/embulk/output/jdbc/setter/ColumnSetterFactory.java +137 -0
  18. data/src/main/java/org/embulk/output/jdbc/setter/DoubleColumnSetter.java +51 -0
  19. data/src/main/java/org/embulk/output/jdbc/setter/LongColumnSetter.java +62 -0
  20. data/src/main/java/org/embulk/output/jdbc/setter/NullColumnSetter.java +43 -0
  21. data/src/main/java/org/embulk/output/jdbc/setter/SkipColumnSetter.java +35 -0
  22. data/src/main/java/org/embulk/output/jdbc/setter/SqlTimestampColumnSetter.java +48 -0
  23. data/src/main/java/org/embulk/output/jdbc/setter/StringColumnSetter.java +48 -0
  24. data/src/test/java/org/embulk/output/TestJdbcOutputPlugin.java +5 -0
  25. metadata +67 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 99881449e41482ec67cfa089d352b3006aa7caeb
4
+ data.tar.gz: ae1d3d4f95451d5462a12f3a748fa7e0ea8784d6
5
+ SHA512:
6
+ metadata.gz: a7af9fcf1de60876cdebc755f8c3c67c562c3ca36059f8f346fd7a4ba2062657bb484dc9417eb928fdc9381e0097a52a27c03035458e92cdf14e05806bf08635
7
+ data.tar.gz: 8bd197e134df1eab9e4446984e70c1b8c8dc710a7920511ad3599471e30f3557bca518497280b7e7ad4bfc92c2e6e0657ff863032966749e84ff38f6027ec147
data/build.gradle ADDED
@@ -0,0 +1,2 @@
1
+ dependencies {
2
+ }
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_output(
2
+ :jdbc, "org.embulk.output.JdbcOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,104 @@
1
+ package org.embulk.output;
2
+
3
+ import java.util.Properties;
4
+ import java.sql.Driver;
5
+ import java.io.IOException;
6
+ import java.sql.Connection;
7
+ import java.sql.SQLException;
8
+ import com.google.common.base.Throwables;
9
+ import org.embulk.spi.Exec;
10
+ import org.embulk.config.Config;
11
+ import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
12
+ import org.embulk.output.jdbc.BatchInsert;
13
+ import org.embulk.output.jdbc.StandardBatchInsert;
14
+ import org.embulk.output.jdbc.JdbcOutputConnector;
15
+ import org.embulk.output.jdbc.JdbcOutputConnection;
16
+
17
+ public class JdbcOutputPlugin
18
+ extends AbstractJdbcOutputPlugin
19
+ {
20
+ public interface GenericPluginTask extends PluginTask
21
+ {
22
+ @Config("driver_name")
23
+ public String getDriverName();
24
+
25
+ @Config("driver_class")
26
+ public String getDriverClass();
27
+ }
28
+
29
+ @Override
30
+ protected Class<? extends PluginTask> getTaskClass()
31
+ {
32
+ return GenericPluginTask.class;
33
+ }
34
+
35
+ @Override
36
+ protected GenericOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
37
+ {
38
+ GenericPluginTask g = (GenericPluginTask) task;
39
+
40
+ String url;
41
+ if (g.getPort().isPresent()) {
42
+ url = String.format("jdbc:%s://%s:%d/%s",
43
+ g.getDriverName(), g.getHost(), g.getPort().get(), g.getDatabase());
44
+ } else {
45
+ url = String.format("jdbc:%s://%s:%d/%s",
46
+ g.getDriverName(), g.getHost(), g.getDatabase());
47
+ }
48
+
49
+ Properties props = new Properties();
50
+ props.setProperty("user", g.getUser());
51
+ props.setProperty("password", g.getPassword());
52
+
53
+ props.putAll(g.getOptions());
54
+
55
+ return new GenericOutputConnector(url, props, g.getDriverClass(),
56
+ g.getSchema().orNull());
57
+ }
58
+
59
+ private static class GenericOutputConnector
60
+ implements JdbcOutputConnector
61
+ {
62
+ private final Driver driver;
63
+ private final String url;
64
+ private final Properties properties;
65
+ private final String schemaName;
66
+
67
+ public GenericOutputConnector(String url, Properties properties, String driverClass,
68
+ String schemaName)
69
+ {
70
+ try {
71
+ // TODO check Class.forName(driverClass) is a Driver before newInstance
72
+ // for security
73
+ this.driver = (Driver) Class.forName(driverClass).newInstance();
74
+ } catch (Exception ex) {
75
+ throw Throwables.propagate(ex);
76
+ }
77
+ this.url = url;
78
+ this.properties = properties;
79
+ this.schemaName = schemaName;
80
+ }
81
+
82
+ @Override
83
+ public JdbcOutputConnection connect(boolean autoCommit) throws SQLException
84
+ {
85
+ Connection c = driver.connect(url, properties);
86
+ try {
87
+ c.setAutoCommit(autoCommit);
88
+ JdbcOutputConnection con = new JdbcOutputConnection(c, schemaName);
89
+ c = null;
90
+ return con;
91
+ } finally {
92
+ if (c != null) {
93
+ c.close();
94
+ }
95
+ }
96
+ }
97
+ }
98
+
99
+ @Override
100
+ protected BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException
101
+ {
102
+ return new StandardBatchInsert(getConnector(task, true));
103
+ }
104
+ }
@@ -0,0 +1,701 @@
1
+ package org.embulk.output.jdbc;
2
+
3
+ import java.util.List;
4
+ import java.util.Locale;
5
+ import java.util.Properties;
6
+ import java.util.concurrent.ExecutionException;
7
+ import java.io.IOException;
8
+ import java.sql.Types;
9
+ import java.sql.Connection;
10
+ import java.sql.ResultSet;
11
+ import java.sql.DatabaseMetaData;
12
+ import java.sql.SQLException;
13
+ import org.slf4j.Logger;
14
+ import com.google.common.base.Optional;
15
+ import com.google.common.base.Throwables;
16
+ import com.google.common.collect.ImmutableList;
17
+ import org.embulk.config.CommitReport;
18
+ import org.embulk.config.Config;
19
+ import org.embulk.config.ConfigDefault;
20
+ import org.embulk.config.ConfigDiff;
21
+ import org.embulk.config.ConfigException;
22
+ import org.embulk.config.ConfigSource;
23
+ import org.embulk.config.Task;
24
+ import org.embulk.config.TaskSource;
25
+ import org.embulk.spi.Exec;
26
+ import org.embulk.spi.Column;
27
+ import org.embulk.spi.ColumnVisitor;
28
+ import org.embulk.spi.OutputPlugin;
29
+ import org.embulk.spi.PageOutput;
30
+ import org.embulk.spi.Schema;
31
+ import org.embulk.spi.TransactionalPageOutput;
32
+ import org.embulk.spi.Page;
33
+ import org.embulk.spi.PageReader;
34
+ import org.embulk.spi.time.Timestamp;
35
+ import org.embulk.output.jdbc.setter.ColumnSetter;
36
+ import org.embulk.output.jdbc.setter.ColumnSetterFactory;
37
+ import org.embulk.output.jdbc.RetryExecutor.IdempotentOperation;
38
+ import static org.embulk.output.jdbc.RetryExecutor.retryExecutor;
39
+
40
+ public abstract class AbstractJdbcOutputPlugin
41
+ implements OutputPlugin
42
+ {
43
+ private final Logger logger = Exec.getLogger(getClass());
44
+
45
+ public interface PluginTask
46
+ extends Task
47
+ {
48
+ @Config("host")
49
+ public String getHost();
50
+
51
+ @Config("port")
52
+ @ConfigDefault("null")
53
+ public Optional<Integer> getPort();
54
+
55
+ @Config("user")
56
+ public String getUser();
57
+
58
+ @Config("password")
59
+ @ConfigDefault("\"\"")
60
+ public String getPassword();
61
+
62
+ @Config("options")
63
+ @ConfigDefault("{}")
64
+ public Properties getOptions();
65
+
66
+ @Config("database")
67
+ public String getDatabase();
68
+
69
+ @Config("schema")
70
+ @ConfigDefault("null")
71
+ public Optional<String> getSchema();
72
+
73
+ @Config("table")
74
+ public String getTable();
75
+
76
+ @Config("mode")
77
+ public String getModeConfig();
78
+
79
+ @Config("batch_size")
80
+ @ConfigDefault("16777216")
81
+ // TODO set minimum number
82
+ public int getBatchSize();
83
+
84
+ public void setMode(Mode mode);
85
+ public Mode getMode();
86
+
87
+ public JdbcSchema getLoadSchema();
88
+ public void setLoadSchema(JdbcSchema schema);
89
+
90
+ public Optional<String> getSwapTable();
91
+ public void setSwapTable(Optional<String> name);
92
+
93
+ public Optional<String> getMultipleLoadTablePrefix();
94
+ public void setMultipleLoadTablePrefix(Optional<String> prefix);
95
+ }
96
+
97
+ // for subclasses to add @Config
98
+ protected Class<? extends PluginTask> getTaskClass()
99
+ {
100
+ return PluginTask.class;
101
+ }
102
+
103
+ protected abstract JdbcOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation);
104
+
105
+ protected abstract BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException;
106
+
107
+ protected JdbcOutputConnection newConnection(PluginTask task, boolean retryableMetadataOperation,
108
+ boolean autoCommit) throws SQLException
109
+ {
110
+ return getConnector(task, retryableMetadataOperation).connect(autoCommit);
111
+ }
112
+
113
+ public enum Mode {
114
+ INSERT,
115
+ INSERT_DIRECT,
116
+ TRUNCATE_INSERT,
117
+ MERGE,
118
+ REPLACE,
119
+ REPLACE_INPLACE;
120
+ //REPLACE_PARTITIONING, // MySQL: partitioning, PostgreSQL: inheritance
121
+
122
+ public boolean isDirectWrite()
123
+ {
124
+ return this == INSERT_DIRECT;
125
+ }
126
+
127
+ public boolean isInplace()
128
+ {
129
+ return this == INSERT_DIRECT || this == REPLACE_INPLACE;
130
+ }
131
+
132
+ public boolean usesMultipleLoadTables()
133
+ {
134
+ return !isInplace();
135
+ }
136
+
137
+ public boolean createAndSwapTable()
138
+ {
139
+ return this == REPLACE_INPLACE || this == REPLACE;
140
+ }
141
+ }
142
+
143
+ public ConfigDiff transaction(ConfigSource config,
144
+ Schema schema, int processorCount,
145
+ OutputPlugin.Control control)
146
+ {
147
+ PluginTask task = config.loadConfig(getTaskClass());
148
+
149
+ // TODO this is a temporary code. behavior will change in a future release.
150
+ switch(task.getModeConfig()) {
151
+ case "insert":
152
+ task.setMode(Mode.INSERT_DIRECT);
153
+ break;
154
+ case "replace":
155
+ task.setMode(Mode.REPLACE_INPLACE);
156
+ break;
157
+ default:
158
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are: insert, replace", task.getModeConfig()));
159
+ }
160
+
161
+ //switch(task.getModeConfig()) {
162
+ ////case "insert":
163
+ //// task.setMode(Mode.INSERT);
164
+ //// break;
165
+ //case "insert_direct":
166
+ // task.setMode(Mode.INSERT_DIRECT);
167
+ // break;
168
+ ////case "truncate_insert": // TODO
169
+ //// task.setMode(Mode.TRUNCATE_INSERT);
170
+ //// break;
171
+ ////case "merge": // TODO
172
+ //// task.setMode(Mode.MERGE);
173
+ //// break;
174
+ ////case "replace":
175
+ //// task.setMode(Mode.REPLACE);
176
+ //// break;
177
+ //case "replace_inplace":
178
+ // task.setMode(Mode.REPLACE_INPLACE);
179
+ // break;
180
+ //default:
181
+ // new ConfigException(String.format("Unknown mode '%s'. Supported modes are: insert_direct, replace_inplace", task.getModeConfig()));
182
+ //}
183
+
184
+ task = begin(task, schema, processorCount);
185
+ control.run(task.dump());
186
+ return commit(task, schema, processorCount);
187
+ }
188
+
189
+ public ConfigDiff resume(TaskSource taskSource,
190
+ Schema schema, int processorCount,
191
+ OutputPlugin.Control control)
192
+ {
193
+ PluginTask task = taskSource.loadTask(getTaskClass());
194
+
195
+ if (task.getMode().isInplace()) {
196
+ throw new UnsupportedOperationException("inplace mode is not resumable. You need to delete partially-loaded records from the database and restart the entire transaction.");
197
+ }
198
+
199
+ task = begin(task, schema, processorCount);
200
+ control.run(task.dump());
201
+ return commit(task, schema, processorCount);
202
+ }
203
+
204
+ private String getTransactionUniqueName()
205
+ {
206
+ // TODO use uuid?
207
+ Timestamp t = Exec.session().getTransactionTime();
208
+ return String.format("%016x%08x", t.getEpochSecond(), t.getNano());
209
+ }
210
+
211
+ private PluginTask begin(final PluginTask task,
212
+ final Schema schema, int processorCount)
213
+ {
214
+ try {
215
+ withRetry(new IdempotentSqlRunnable() { // no intermediate data if isDirectWrite == true
216
+ public void run() throws SQLException
217
+ {
218
+ JdbcOutputConnection con = newConnection(task, true, false);
219
+ try {
220
+ doBegin(con, task, schema);
221
+ } finally {
222
+ con.close();
223
+ }
224
+ }
225
+ });
226
+ } catch (SQLException | InterruptedException ex) {
227
+ throw new RuntimeException(ex);
228
+ }
229
+ return task;
230
+ }
231
+
232
+ private ConfigDiff commit(final PluginTask task,
233
+ Schema schema, final int processorCount)
234
+ {
235
+ if (!task.getMode().isDirectWrite()) { // no intermediate data if isDirectWrite == true
236
+ try {
237
+ withRetry(new IdempotentSqlRunnable() {
238
+ public void run() throws SQLException
239
+ {
240
+ JdbcOutputConnection con = newConnection(task, false, false);
241
+ try {
242
+ doCommit(con, task, processorCount);
243
+ } finally {
244
+ con.close();
245
+ }
246
+ }
247
+ });
248
+ } catch (SQLException | InterruptedException ex) {
249
+ throw new RuntimeException(ex);
250
+ }
251
+ }
252
+ return Exec.newConfigDiff();
253
+ }
254
+
255
+ public void cleanup(TaskSource taskSource,
256
+ Schema schema, final int processorCount,
257
+ final List<CommitReport> successCommitReports)
258
+ {
259
+ final PluginTask task = taskSource.loadTask(getTaskClass());
260
+
261
+ if (!task.getMode().isDirectWrite()) { // no intermediate data if isDirectWrite == true
262
+ try {
263
+ withRetry(new IdempotentSqlRunnable() {
264
+ public void run() throws SQLException
265
+ {
266
+ JdbcOutputConnection con = newConnection(task, true, true);
267
+ try {
268
+ doCleanup(con, task, processorCount, successCommitReports);
269
+ } finally {
270
+ con.close();
271
+ }
272
+ }
273
+ });
274
+ } catch (SQLException | InterruptedException ex) {
275
+ throw new RuntimeException(ex);
276
+ }
277
+ }
278
+ }
279
+
280
+ protected void doBegin(JdbcOutputConnection con,
281
+ PluginTask task, Schema schema) throws SQLException
282
+ {
283
+ Mode mode = task.getMode();
284
+
285
+ JdbcSchema targetTableSchema;
286
+ if (mode.createAndSwapTable()) {
287
+ // DROP TABLE IF EXISTS xyz__0000000054d92dee1e452158_bulk_load_temp
288
+ // CREATE TABLE IF NOT EXISTS xyz__0000000054d92dee1e452158_bulk_load_temp
289
+ // swapTableName = "xyz__0000000054d92dee1e452158_bulk_load_temp"
290
+ String swapTableName = task.getTable() + "_" + getTransactionUniqueName() + "_bulk_load_temp";
291
+ con.dropTableIfExists(swapTableName);
292
+ con.createTableIfNotExists(swapTableName, newJdbcSchemaForNewTable(schema));
293
+ targetTableSchema = newJdbcSchemaFromExistentTable(con, swapTableName);
294
+ task.setSwapTable(Optional.of(swapTableName));
295
+ } else {
296
+ // CREATE TABLE IF NOT EXISTS xyz
297
+ con.createTableIfNotExists(task.getTable(), newJdbcSchemaForNewTable(schema));
298
+ targetTableSchema = newJdbcSchemaFromExistentTable(con, task.getTable());
299
+ task.setSwapTable(Optional.<String>absent());
300
+ }
301
+
302
+ if (mode.usesMultipleLoadTables()) {
303
+ // multipleLoadTablePrefix = "xyz__0000000054d92dee1e452158_"
304
+ // workers run:
305
+ // CREATE TABLE xyz__0000000054d92dee1e452158_%d
306
+ String multipleLoadTablePrefix = task.getTable() + "_" + getTransactionUniqueName();
307
+ task.setMultipleLoadTablePrefix(Optional.of(multipleLoadTablePrefix));
308
+ } else {
309
+ task.setMultipleLoadTablePrefix(Optional.<String>absent());
310
+ }
311
+
312
+ task.setLoadSchema(matchSchemaByColumnNames(schema, targetTableSchema));
313
+ }
314
+
315
+ protected void doCommit(JdbcOutputConnection con, PluginTask task, int processorCount)
316
+ throws SQLException
317
+ {
318
+ switch (task.getMode()) {
319
+ case INSERT:
320
+ // aggregate insert into target
321
+ //con.gatherInsertTables();
322
+ throw new UnsupportedOperationException("not implemented yet"); // TODO
323
+ case INSERT_DIRECT:
324
+ // already done
325
+ break;
326
+ case TRUNCATE_INSERT:
327
+ // truncate & aggregate insert into target
328
+ throw new UnsupportedOperationException("not implemented yet");
329
+ //break;
330
+ case MERGE:
331
+ // aggregate merge into target
332
+ throw new UnsupportedOperationException("not implemented yet");
333
+ //break;
334
+ case REPLACE:
335
+ if (processorCount == 1) {
336
+ // swap table
337
+ con.replaceTable(task.getSwapTable().get(), task.getLoadSchema(), task.getTable());
338
+ } else {
339
+ // aggregate insert into swap table & swap table
340
+ throw new UnsupportedOperationException("not implemented yet");
341
+ }
342
+ break;
343
+ case REPLACE_INPLACE:
344
+ // swap table
345
+ con.replaceTable(task.getSwapTable().get(), task.getLoadSchema(), task.getTable());
346
+ break;
347
+ }
348
+ }
349
+
350
+ protected void doCleanup(JdbcOutputConnection con, PluginTask task, int processorCount,
351
+ List<CommitReport> successCommitReports)
352
+ throws SQLException
353
+ {
354
+ if (task.getSwapTable().isPresent()) {
355
+ con.dropTableIfExists(task.getSwapTable().get());
356
+ }
357
+ if (task.getMultipleLoadTablePrefix().isPresent()) {
358
+ for (int i=0; i < processorCount; i++) {
359
+ con.dropTableIfExists(formatMultipleLoadTableName(task, i));
360
+ }
361
+ }
362
+ }
363
+
364
+ static String formatMultipleLoadTableName(PluginTask task, int processorIndex)
365
+ {
366
+ return task.getMultipleLoadTablePrefix().get() + String.format("%04x", processorIndex);
367
+ }
368
+
369
+ protected JdbcSchema newJdbcSchemaForNewTable(Schema schema)
370
+ {
371
+ final ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
372
+ for (Column c : schema.getColumns()) {
373
+ final String columnName = c.getName();
374
+ c.visit(new ColumnVisitor() {
375
+ public void booleanColumn(Column column)
376
+ {
377
+ columns.add(new JdbcColumn(
378
+ columnName, "BOOLEAN",
379
+ Types.BOOLEAN, 1, 0));
380
+ }
381
+
382
+ public void longColumn(Column column)
383
+ {
384
+ columns.add(new JdbcColumn(
385
+ columnName, "BIGINT",
386
+ Types.BIGINT, 22, 0));
387
+ }
388
+
389
+ public void doubleColumn(Column column)
390
+ {
391
+ columns.add(new JdbcColumn(
392
+ columnName, "DOUBLE PRECISION",
393
+ Types.FLOAT, 24, 0));
394
+ }
395
+
396
+ public void stringColumn(Column column)
397
+ {
398
+ columns.add(new JdbcColumn(
399
+ columnName, "CLOB",
400
+ Types.CLOB, 4000, 0)); // TODO size type param
401
+ }
402
+
403
+ public void timestampColumn(Column column)
404
+ {
405
+ columns.add(new JdbcColumn(
406
+ columnName, "TIMESTAMP",
407
+ Types.TIMESTAMP, 26, 0)); // size type param is from postgresql.
408
+ }
409
+ });
410
+ }
411
+ return new JdbcSchema(columns.build());
412
+ }
413
+
414
+ public JdbcSchema newJdbcSchemaFromExistentTable(JdbcOutputConnection connection,
415
+ String tableName) throws SQLException
416
+ {
417
+ DatabaseMetaData dbm = connection.getMetaData();
418
+ String escape = dbm.getSearchStringEscape();
419
+
420
+ ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
421
+ String schemaNamePattern = JdbcUtils.escapeSearchString(connection.getSchemaName(), escape);
422
+ String tableNamePattern = JdbcUtils.escapeSearchString(tableName, escape);
423
+ ResultSet rs = dbm.getColumns(null, schemaNamePattern, tableNamePattern, null);
424
+ try {
425
+ while(rs.next()) {
426
+ String columnName = rs.getString("COLUMN_NAME");
427
+ String typeName = rs.getString("TYPE_NAME");
428
+ typeName = typeName.toUpperCase(Locale.ENGLISH);
429
+ int sqlType = rs.getInt("DATA_TYPE");
430
+ int colSize = rs.getInt("COLUMN_SIZE");
431
+ int decDigit = rs.getInt("DECIMAL_DIGITS");
432
+ if (rs.wasNull()) {
433
+ decDigit = -1;
434
+ }
435
+ //rs.getString("IS_NULLABLE").equals("NO") // "YES" or "" // TODO
436
+ //rs.getString("COLUMN_DEF") // or null // TODO
437
+ columns.add(new JdbcColumn(
438
+ columnName, typeName,
439
+ sqlType, colSize, decDigit));
440
+ }
441
+ } finally {
442
+ rs.close();
443
+ }
444
+ return new JdbcSchema(columns.build());
445
+ }
446
+
447
+ private JdbcSchema matchSchemaByColumnNames(Schema inputSchema, JdbcSchema targetTableSchema)
448
+ {
449
+ // TODO for each inputSchema.getColumns(), search a column whose name
450
+ // matches with targetTableSchema. if not match, create JdbcSchema.skipColumn().
451
+ return targetTableSchema;
452
+ }
453
+
454
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, final int processorIndex)
455
+ {
456
+ final PluginTask task = taskSource.loadTask(getTaskClass());
457
+ final Mode mode = task.getMode();
458
+
459
+ BatchInsert batch;
460
+ try {
461
+ batch = newBatchInsert(task);
462
+ } catch (IOException | SQLException ex) {
463
+ throw new RuntimeException(ex);
464
+ }
465
+ try {
466
+ PageReader reader = new PageReader(schema);
467
+ ColumnSetterFactory factory = new ColumnSetterFactory(batch, reader, null); // TODO TimestampFormatter
468
+
469
+ JdbcSchema loadSchema = task.getLoadSchema();
470
+
471
+ ImmutableList.Builder<JdbcColumn> insertColumns = ImmutableList.builder();
472
+ ImmutableList.Builder<ColumnSetter> columnSetters = ImmutableList.builder();
473
+ for (JdbcColumn c : loadSchema.getColumns()) {
474
+ if (c.isSkipColumn()) {
475
+ columnSetters.add(factory.newSkipColumnSetter());
476
+ } else {
477
+ columnSetters.add(factory.newColumnSetter(c));
478
+ insertColumns.add(c);
479
+ }
480
+ }
481
+ final JdbcSchema insertSchema = new JdbcSchema(insertColumns.build());
482
+
483
+ final BatchInsert b = batch;
484
+ withRetry(new IdempotentSqlRunnable() {
485
+ public void run() throws SQLException
486
+ {
487
+ String loadTable;
488
+ boolean createTable;
489
+ if (mode.usesMultipleLoadTables()) {
490
+ // insert, truncate_insert, merge, replace
491
+ loadTable = formatMultipleLoadTableName(task, processorIndex);
492
+ JdbcOutputConnection con = newConnection(task, true, true);
493
+ try {
494
+ con.createTableIfNotExists(loadTable, insertSchema);
495
+ } finally {
496
+ con.close();
497
+ }
498
+
499
+ } else if (!mode.usesMultipleLoadTables() && mode.createAndSwapTable()) {
500
+ // replace_inplace
501
+ loadTable = task.getSwapTable().get();
502
+
503
+ } else {
504
+ // insert_direct
505
+ loadTable = task.getTable();
506
+ }
507
+
508
+ b.prepare(loadTable, insertSchema);
509
+ }
510
+ });
511
+
512
+ PluginPageOutput output = new PluginPageOutput(reader, batch, columnSetters.build(),
513
+ task.getBatchSize());
514
+ batch = null;
515
+ return output;
516
+
517
+ } catch (SQLException | InterruptedException ex) {
518
+ throw new RuntimeException(ex);
519
+
520
+ } finally {
521
+ if (batch != null) {
522
+ try {
523
+ batch.close();
524
+ } catch (IOException | SQLException ex) {
525
+ throw new RuntimeException(ex);
526
+ }
527
+ }
528
+ }
529
+ }
530
+
531
+ public static class PluginPageOutput
532
+ implements TransactionalPageOutput
533
+ {
534
+ private final PageReader pageReader;
535
+ private final BatchInsert batch;
536
+ private final List<Column> columns;
537
+ private final List<ColumnSetter> columnSetters;
538
+ private final int batchSize;
539
+ private final int foraceBatchFlushSize;
540
+
541
+ public PluginPageOutput(PageReader pageReader,
542
+ BatchInsert batch, List<ColumnSetter> columnSetters,
543
+ int batchSize)
544
+ {
545
+ this.pageReader = pageReader;
546
+ this.batch = batch;
547
+ this.columns = pageReader.getSchema().getColumns();
548
+ this.columnSetters = columnSetters;
549
+ this.batchSize = batchSize;
550
+ this.foraceBatchFlushSize = batchSize * 2;
551
+ }
552
+
553
+ @Override
554
+ public void add(Page page)
555
+ {
556
+ try {
557
+ pageReader.setPage(page);
558
+ while (pageReader.nextRecord()) {
559
+ if (batch.getBatchWeight() > foraceBatchFlushSize) {
560
+ batch.flush();
561
+ }
562
+ for (int i=0; i < columnSetters.size(); i++) {
563
+ columns.get(i).visit(columnSetters.get(i));
564
+ }
565
+ batch.add();
566
+ }
567
+ if (batch.getBatchWeight() > batchSize) {
568
+ batch.flush();
569
+ }
570
+ } catch (IOException | SQLException ex) {
571
+ throw new RuntimeException(ex);
572
+ }
573
+ }
574
+
575
+ @Override
576
+ public void finish()
577
+ {
578
+ try {
579
+ batch.finish();
580
+ } catch (IOException | SQLException ex) {
581
+ throw new RuntimeException(ex);
582
+ }
583
+ }
584
+
585
+ @Override
586
+ public void close()
587
+ {
588
+ try {
589
+ batch.close();
590
+ } catch (IOException | SQLException ex) {
591
+ throw new RuntimeException(ex);
592
+ }
593
+ }
594
+
595
+ @Override
596
+ public void abort()
597
+ {
598
+ }
599
+
600
+ @Override
601
+ public CommitReport commit()
602
+ {
603
+ return Exec.newCommitReport();
604
+ }
605
+ }
606
+
607
+ public static interface IdempotentSqlRunnable
608
+ {
609
+ public void run() throws SQLException;
610
+ }
611
+
612
+ protected void withRetry(IdempotentSqlRunnable op)
613
+ throws SQLException, InterruptedException
614
+ {
615
+ withRetry(op, "Operation failed");
616
+ }
617
+
618
+ protected void withRetry(final IdempotentSqlRunnable op, final String errorMessage)
619
+ throws SQLException, InterruptedException
620
+ {
621
+ try {
622
+ retryExecutor()
623
+ .setRetryLimit(12)
624
+ .setInitialRetryWait(1000)
625
+ .setMaxRetryWait(30 * 60 * 1000)
626
+ .runInterruptible(new IdempotentOperation<Void>() {
627
+ public Void call() throws Exception
628
+ {
629
+ op.run();
630
+ return null;
631
+ }
632
+
633
+ public void onRetry(Throwable exception, int retryCount, int retryLimit, int retryWait)
634
+ {
635
+ if (exception instanceof SQLException) {
636
+ SQLException ex = (SQLException) exception;
637
+ String sqlState = ex.getSQLState();
638
+ int errorCode = ex.getErrorCode();
639
+ logger.warn("{} ({}:{}), retrying {}/{} after {} seconds. Message: {}",
640
+ errorMessage, errorCode, sqlState, retryCount, retryLimit, retryWait/1000,
641
+ buildExceptionMessage(exception));
642
+ } else {
643
+ logger.warn("{}, retrying {}/{} after {} seconds. Message: {}",
644
+ errorMessage, retryCount, retryLimit, retryWait/1000,
645
+ buildExceptionMessage(exception));
646
+ }
647
+ if (retryCount % 3 == 0) {
648
+ logger.info("Error details:", exception);
649
+ }
650
+ }
651
+
652
+ public void onGiveup(Throwable firstException, Throwable lastException)
653
+ {
654
+ if (firstException instanceof SQLException) {
655
+ SQLException ex = (SQLException) firstException;
656
+ String sqlState = ex.getSQLState();
657
+ int errorCode = ex.getErrorCode();
658
+ logger.error("{} ({}:{})", errorMessage, errorCode, sqlState);
659
+ }
660
+ }
661
+
662
+ public boolean isRetryableException(Throwable exception)
663
+ {
664
+ if (exception instanceof SQLException) {
665
+ SQLException ex = (SQLException) exception;
666
+ String sqlState = ex.getSQLState();
667
+ int errorCode = ex.getErrorCode();
668
+ return isRetryableException(ex);
669
+ }
670
+ return false; // TODO
671
+ }
672
+ });
673
+
674
+ } catch (ExecutionException ex) {
675
+ Throwable cause = ex.getCause();
676
+ Throwables.propagateIfInstanceOf(cause, SQLException.class);
677
+ throw Throwables.propagate(cause);
678
+ }
679
+ }
680
+
681
+ private String buildExceptionMessage(Throwable ex) {
682
+ StringBuilder sb = new StringBuilder();
683
+ sb.append(ex.getMessage());
684
+ if (ex.getCause() != null) {
685
+ buildExceptionMessageCont(sb, ex.getCause(), ex.getMessage());
686
+ }
687
+ return sb.toString();
688
+ }
689
+
690
+ private void buildExceptionMessageCont(StringBuilder sb, Throwable ex, String lastMessage) {
691
+ if (!lastMessage.equals(ex.getMessage())) {
692
+ // suppress same messages
693
+ sb.append(" < ");
694
+ sb.append(ex.getMessage());
695
+ }
696
+ if (ex.getCause() == null) {
697
+ return;
698
+ }
699
+ buildExceptionMessageCont(sb, ex.getCause(), ex.getMessage());
700
+ }
701
+ }