embulk-input-postgresql 0.7.2 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c855700b84c5cad076bb37c8ceb53f874be5d084
4
- data.tar.gz: 0e9425e94c4bfad10f95e350f652097b92a03251
3
+ metadata.gz: 93171986c203df79e84845be61756890f6108e3c
4
+ data.tar.gz: f76cd155e82c075761ed4b5d3196de5d872af719
5
5
  SHA512:
6
- metadata.gz: f829395a1ccd22d20e6a03d188a3297fb76dfdfa6e393eb333661d20021e6a7d0f1a6f56d9918a441b1dbda62884a10e3336b2c928215999071aaa0bfbca4b28
7
- data.tar.gz: edda38237731d6e180935e8b3471c6d15e752d8bb74f68e723d12641e9049b643f36de201ccde504bddf7bf7ec676cc501c40efb7aed8ec8713d53400c69e425
6
+ metadata.gz: dc8b710ebcdbb08a5505a762095621c779f448b256398c1c40fa8c7a0739b587965ec64486f460c9377699649d699ee9ddec5cb5654cee90d597facefc364c80
7
+ data.tar.gz: fdc4e9af5ff63a5f93f17db6ee93792c897ee616a11db40e2f1a0d5fdff735ae51238950dfad3ac6c30e0d2557c504cbb41df79243939e3ae55805c27adb4f90
data/README.md CHANGED
@@ -19,14 +19,18 @@ PostgreSQL input plugins for Embulk loads records from PostgreSQL.
19
19
  - **connect_timeout**: timeout for establishment of a database connection. (integer (seconds), default: 300)
20
20
  - **socket_timeout**: timeout for socket read operations. 0 means no timeout. (integer (seconds), default: 1800)
21
21
  - **ssl**: enables SSL. data will be encrypted but CA or certification will not be verified (boolean, default: false)
22
+ - **application_name**: application name shown on pg_stat_activity. (string, default: "embulk-input-postgresql")
22
23
  - **options**: extra JDBC properties (hash, default: {})
23
24
  - If you write SQL directly,
24
25
  - **query**: SQL to run (string)
25
26
  - If **query** is not set,
26
27
  - **table**: destination table name (string, required)
27
- - **select**: comma-separated list of columns to select (string, default: "*")
28
+ - **select**: expression of select (e.g. `id, created_at`) (string, default: "*")
28
29
  - **where**: WHERE condition to filter the rows (string, default: no-condition)
29
- - **order_by**: name of the column that rows are sorted by (string, default: not sorted)
30
+ - **order_by**: expression of ORDER BY to sort rows (e.g. `created_at DESC, id ASC`) (string, default: not sorted)
31
+ - **incremental**: if true, enables incremental loading. See next section for details (boolean, default: false)
32
+ - **incremental_columns**: column names for incremental loading (array of strings, default: use primary keys)
33
+ - **last_record**: values of the last record for incremental loading (array of objects, default: load all records)
30
34
  - **default_timezone**: If the sql type of a column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted int this default_timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
31
35
  - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
32
36
  - **value_type**: embulk get values from database as this value_type. Typically, the value_type determines `getXXX` method of `java.sql.PreparedStatement`.
@@ -56,6 +60,42 @@ In addition, `json` type is supported for `hstore` column, and output will be as
56
60
  `value_type` is ignored.
57
61
 
58
62
 
63
+ ### Incremental loading
64
+
65
+ Incremental loading uses monotonically increasing unique columns (such as auto-increment (serial / bigserial) column) to load records inserted (or updated) after last execution.
66
+
67
+ First, if `incremental: true` is set, this plugin loads all records with additional ORDER BY. For example, if `incremental_columns: [updated_at, id]` option is set, query will be as following:
68
+
69
+ ```
70
+ SELECT * FROM (
71
+ ...original query is here...
72
+ )
73
+ ORDER BY updated_at, id
74
+ ```
75
+
76
+ When bulk data loading finishes successfully, it outputs `last_record: ` paramater as config-diff so that next execution uses it.
77
+
78
+ At the next execution, when `last_record: ` is also set, this plugin generates additional WHERE conditions to load records larger than the last record. For example, if `last_record: ["2017-01-01 00:32:12", 5291]` is set,
79
+
80
+ ```
81
+ SELECT * FROM (
82
+ ...original query is here...
83
+ )
84
+ WHERE created_at > '2017-01-01 00:32:12' OR (created_at = '2017-01-01 00:32:12' AND id > 5291)
85
+ ORDER BY updated_at, id
86
+ ```
87
+
88
+ Then, it updates `last_record: ` so that next execution uses the updated last_record.
89
+
90
+ **IMPORTANT**: If you set `incremental_columns: ` option, make sure that there is an index on the columns to avoid full table scan. For this example, following index should be created:
91
+
92
+ ```
93
+ CREATE INDEX embulk_incremental_loading_index ON table (updated_at, id);
94
+ ```
95
+
96
+ Recommended usage is to leave `incremental_columns` unset and let this plugin automatically finds an auto-increment (serial / bigserial) primary key. Currently, only strings and integers are supported as incremental_columns.
97
+
98
+
59
99
  ## Example
60
100
 
61
101
  ```yaml
@@ -68,6 +108,16 @@ in:
68
108
  table: my_table
69
109
  select: "col1, col2, col3"
70
110
  where: "col4 != 'a'"
111
+ order_by: "col1 DESC"
112
+ ```
113
+
114
+ This configuration will generate following SQL:
115
+
116
+ ```
117
+ SELECT col1, col2, col3
118
+ FROM "my_table"
119
+ WHERE col4 != 'a'
120
+ ORDER BY col1 DESC
71
121
  ```
72
122
 
73
123
  If you need a complex SQL,
@@ -48,6 +48,10 @@ public class PostgreSQLInputPlugin
48
48
  @Config("ssl")
49
49
  @ConfigDefault("false")
50
50
  public boolean getSsl();
51
+
52
+ @Config("application_name")
53
+ @ConfigDefault("\"embulk-input-postgresql\"")
54
+ public String getApplicationName();
51
55
  }
52
56
 
53
57
  @Override
@@ -82,6 +86,8 @@ public class PostgreSQLInputPlugin
82
86
  }
83
87
  // setting ssl=false enables SSL. See org.postgresql.core.v3.openConnectionImpl.
84
88
 
89
+ props.setProperty("ApplicationName", t.getApplicationName());
90
+
85
91
  props.putAll(t.getOptions());
86
92
 
87
93
  Connection con = driver.connect(url, props);
@@ -1,5 +1,6 @@
1
1
  package org.embulk.input.postgresql;
2
2
 
3
+ import java.util.List;
3
4
  import java.sql.Connection;
4
5
  import java.sql.PreparedStatement;
5
6
  import java.sql.ResultSet;
@@ -7,6 +8,8 @@ import java.sql.SQLException;
7
8
  import org.slf4j.Logger;
8
9
  import org.embulk.spi.Exec;
9
10
  import org.embulk.input.jdbc.JdbcInputConnection;
11
+ import org.embulk.input.jdbc.JdbcLiteral;
12
+ import org.embulk.input.jdbc.getter.ColumnGetter;
10
13
 
11
14
  public class PostgreSQLInputConnection
12
15
  extends JdbcInputConnection
@@ -20,9 +23,24 @@ public class PostgreSQLInputConnection
20
23
  }
21
24
 
22
25
  @Override
23
- protected CursorSelect newBatchSelect(String select, int fetchRows, int queryTimeout) throws SQLException
26
+ protected BatchSelect newBatchSelect(PreparedQuery preparedQuery,
27
+ List<ColumnGetter> getters,
28
+ int fetchRows, int queryTimeout) throws SQLException
24
29
  {
25
- executeUpdate("DECLARE cur NO SCROLL CURSOR FOR "+select);
30
+ String query = "DECLARE cur NO SCROLL CURSOR FOR " + preparedQuery.getQuery();
31
+ List<JdbcLiteral> params = preparedQuery.getParameters();
32
+
33
+ logger.info("SQL: " + query);
34
+ PreparedStatement stmt = connection.prepareStatement(query);
35
+ try {
36
+ if (!params.isEmpty()) {
37
+ logger.info("Parameters: {}", params);
38
+ prepareParameters(stmt, getters, params);
39
+ }
40
+ stmt.executeUpdate();
41
+ } finally {
42
+ stmt.close();
43
+ }
26
44
 
27
45
  String fetchSql = "FETCH FORWARD "+fetchRows+" FROM cur";
28
46
  // Because socketTimeout is set in Connection, don't need to set quertyTimeout.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-postgresql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-24 00:00:00.000000000 Z
11
+ date: 2016-08-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Selects records from a table.
14
14
  email:
@@ -19,6 +19,9 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - README.md
21
21
  - build.gradle
22
+ - classpath/embulk-input-jdbc-0.7.3.jar
23
+ - classpath/embulk-input-postgresql-0.7.3.jar
24
+ - classpath/postgresql-9.4-1205-jdbc41.jar
22
25
  - lib/embulk/input/postgresql.rb
23
26
  - src/main/java/org/embulk/input/PostgreSQLInputPlugin.java
24
27
  - src/main/java/org/embulk/input/postgresql/PostgreSQLInputConnection.java
@@ -27,9 +30,6 @@ files:
27
30
  - src/test/java/org/embulk/input/postgresql/PostgreSQLInputPluginTest.java
28
31
  - src/test/resources/yml/input_hstore.yml
29
32
  - src/test/resources/yml/input_hstore2.yml
30
- - classpath/embulk-input-jdbc-0.7.2.jar
31
- - classpath/embulk-input-postgresql-0.7.2.jar
32
- - classpath/postgresql-9.4-1205-jdbc41.jar
33
33
  homepage: https://github.com/embulk/embulk-input-jdbc
34
34
  licenses:
35
35
  - Apache 2.0
@@ -40,17 +40,17 @@ require_paths:
40
40
  - lib
41
41
  required_ruby_version: !ruby/object:Gem::Requirement
42
42
  requirements:
43
- - - '>='
43
+ - - ">="
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
46
  required_rubygems_version: !ruby/object:Gem::Requirement
47
47
  requirements:
48
- - - '>='
48
+ - - ">="
49
49
  - !ruby/object:Gem::Version
50
50
  version: '0'
51
51
  requirements: []
52
52
  rubyforge_project:
53
- rubygems_version: 2.1.9
53
+ rubygems_version: 2.4.8
54
54
  signing_key:
55
55
  specification_version: 4
56
56
  summary: JDBC input plugin for Embulk
Binary file