embulk-input-postgresql 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c855700b84c5cad076bb37c8ceb53f874be5d084
4
- data.tar.gz: 0e9425e94c4bfad10f95e350f652097b92a03251
3
+ metadata.gz: 93171986c203df79e84845be61756890f6108e3c
4
+ data.tar.gz: f76cd155e82c075761ed4b5d3196de5d872af719
5
5
  SHA512:
6
- metadata.gz: f829395a1ccd22d20e6a03d188a3297fb76dfdfa6e393eb333661d20021e6a7d0f1a6f56d9918a441b1dbda62884a10e3336b2c928215999071aaa0bfbca4b28
7
- data.tar.gz: edda38237731d6e180935e8b3471c6d15e752d8bb74f68e723d12641e9049b643f36de201ccde504bddf7bf7ec676cc501c40efb7aed8ec8713d53400c69e425
6
+ metadata.gz: dc8b710ebcdbb08a5505a762095621c779f448b256398c1c40fa8c7a0739b587965ec64486f460c9377699649d699ee9ddec5cb5654cee90d597facefc364c80
7
+ data.tar.gz: fdc4e9af5ff63a5f93f17db6ee93792c897ee616a11db40e2f1a0d5fdff735ae51238950dfad3ac6c30e0d2557c504cbb41df79243939e3ae55805c27adb4f90
data/README.md CHANGED
@@ -19,14 +19,18 @@ PostgreSQL input plugins for Embulk loads records from PostgreSQL.
19
19
  - **connect_timeout**: timeout for establishment of a database connection. (integer (seconds), default: 300)
20
20
  - **socket_timeout**: timeout for socket read operations. 0 means no timeout. (integer (seconds), default: 1800)
21
21
  - **ssl**: enables SSL. data will be encrypted but CA or certification will not be verified (boolean, default: false)
22
+ - **application_name**: application name shown on pg_stat_activity. (string, default: "embulk-input-postgresql")
22
23
  - **options**: extra JDBC properties (hash, default: {})
23
24
  - If you write SQL directly,
24
25
  - **query**: SQL to run (string)
25
26
  - If **query** is not set,
26
27
  - **table**: destination table name (string, required)
27
- - **select**: comma-separated list of columns to select (string, default: "*")
28
+ - **select**: expression of select (e.g. `id, created_at`) (string, default: "*")
28
29
  - **where**: WHERE condition to filter the rows (string, default: no-condition)
29
- - **order_by**: name of the column that rows are sorted by (string, default: not sorted)
30
+ - **order_by**: expression of ORDER BY to sort rows (e.g. `created_at DESC, id ASC`) (string, default: not sorted)
31
+ - **incremental**: if true, enables incremental loading. See next section for details (boolean, default: false)
32
+ - **incremental_columns**: column names for incremental loading (array of strings, default: use primary keys)
33
+ - **last_record**: values of the last record for incremental loading (array of objects, default: load all records)
30
34
  - **default_timezone**: If the sql type of a column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted int this default_timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
31
35
  - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
32
36
  - **value_type**: embulk get values from database as this value_type. Typically, the value_type determines `getXXX` method of `java.sql.PreparedStatement`.
@@ -56,6 +60,42 @@ In addition, `json` type is supported for `hstore` column, and output will be as
56
60
  `value_type` is ignored.
57
61
 
58
62
 
63
+ ### Incremental loading
64
+
65
+ Incremental loading uses monotonically increasing unique columns (such as auto-increment (serial / bigserial) column) to load records inserted (or updated) after last execution.
66
+
67
+ First, if `incremental: true` is set, this plugin loads all records with additional ORDER BY. For example, if `incremental_columns: [updated_at, id]` option is set, query will be as following:
68
+
69
+ ```
70
+ SELECT * FROM (
71
+ ...original query is here...
72
+ )
73
+ ORDER BY updated_at, id
74
+ ```
75
+
76
+ When bulk data loading finishes successfully, it outputs `last_record: ` paramater as config-diff so that next execution uses it.
77
+
78
+ At the next execution, when `last_record: ` is also set, this plugin generates additional WHERE conditions to load records larger than the last record. For example, if `last_record: ["2017-01-01 00:32:12", 5291]` is set,
79
+
80
+ ```
81
+ SELECT * FROM (
82
+ ...original query is here...
83
+ )
84
+ WHERE created_at > '2017-01-01 00:32:12' OR (created_at = '2017-01-01 00:32:12' AND id > 5291)
85
+ ORDER BY updated_at, id
86
+ ```
87
+
88
+ Then, it updates `last_record: ` so that next execution uses the updated last_record.
89
+
90
+ **IMPORTANT**: If you set `incremental_columns: ` option, make sure that there is an index on the columns to avoid full table scan. For this example, following index should be created:
91
+
92
+ ```
93
+ CREATE INDEX embulk_incremental_loading_index ON table (updated_at, id);
94
+ ```
95
+
96
+ Recommended usage is to leave `incremental_columns` unset and let this plugin automatically finds an auto-increment (serial / bigserial) primary key. Currently, only strings and integers are supported as incremental_columns.
97
+
98
+
59
99
  ## Example
60
100
 
61
101
  ```yaml
@@ -68,6 +108,16 @@ in:
68
108
  table: my_table
69
109
  select: "col1, col2, col3"
70
110
  where: "col4 != 'a'"
111
+ order_by: "col1 DESC"
112
+ ```
113
+
114
+ This configuration will generate following SQL:
115
+
116
+ ```
117
+ SELECT col1, col2, col3
118
+ FROM "my_table"
119
+ WHERE col4 != 'a'
120
+ ORDER BY col1 DESC
71
121
  ```
72
122
 
73
123
  If you need a complex SQL,
@@ -48,6 +48,10 @@ public class PostgreSQLInputPlugin
48
48
  @Config("ssl")
49
49
  @ConfigDefault("false")
50
50
  public boolean getSsl();
51
+
52
+ @Config("application_name")
53
+ @ConfigDefault("\"embulk-input-postgresql\"")
54
+ public String getApplicationName();
51
55
  }
52
56
 
53
57
  @Override
@@ -82,6 +86,8 @@ public class PostgreSQLInputPlugin
82
86
  }
83
87
  // setting ssl=false enables SSL. See org.postgresql.core.v3.openConnectionImpl.
84
88
 
89
+ props.setProperty("ApplicationName", t.getApplicationName());
90
+
85
91
  props.putAll(t.getOptions());
86
92
 
87
93
  Connection con = driver.connect(url, props);
@@ -1,5 +1,6 @@
1
1
  package org.embulk.input.postgresql;
2
2
 
3
+ import java.util.List;
3
4
  import java.sql.Connection;
4
5
  import java.sql.PreparedStatement;
5
6
  import java.sql.ResultSet;
@@ -7,6 +8,8 @@ import java.sql.SQLException;
7
8
  import org.slf4j.Logger;
8
9
  import org.embulk.spi.Exec;
9
10
  import org.embulk.input.jdbc.JdbcInputConnection;
11
+ import org.embulk.input.jdbc.JdbcLiteral;
12
+ import org.embulk.input.jdbc.getter.ColumnGetter;
10
13
 
11
14
  public class PostgreSQLInputConnection
12
15
  extends JdbcInputConnection
@@ -20,9 +23,24 @@ public class PostgreSQLInputConnection
20
23
  }
21
24
 
22
25
  @Override
23
- protected CursorSelect newBatchSelect(String select, int fetchRows, int queryTimeout) throws SQLException
26
+ protected BatchSelect newBatchSelect(PreparedQuery preparedQuery,
27
+ List<ColumnGetter> getters,
28
+ int fetchRows, int queryTimeout) throws SQLException
24
29
  {
25
- executeUpdate("DECLARE cur NO SCROLL CURSOR FOR "+select);
30
+ String query = "DECLARE cur NO SCROLL CURSOR FOR " + preparedQuery.getQuery();
31
+ List<JdbcLiteral> params = preparedQuery.getParameters();
32
+
33
+ logger.info("SQL: " + query);
34
+ PreparedStatement stmt = connection.prepareStatement(query);
35
+ try {
36
+ if (!params.isEmpty()) {
37
+ logger.info("Parameters: {}", params);
38
+ prepareParameters(stmt, getters, params);
39
+ }
40
+ stmt.executeUpdate();
41
+ } finally {
42
+ stmt.close();
43
+ }
26
44
 
27
45
  String fetchSql = "FETCH FORWARD "+fetchRows+" FROM cur";
28
46
  // Because socketTimeout is set in Connection, don't need to set quertyTimeout.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-postgresql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-24 00:00:00.000000000 Z
11
+ date: 2016-08-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Selects records from a table.
14
14
  email:
@@ -19,6 +19,9 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - README.md
21
21
  - build.gradle
22
+ - classpath/embulk-input-jdbc-0.7.3.jar
23
+ - classpath/embulk-input-postgresql-0.7.3.jar
24
+ - classpath/postgresql-9.4-1205-jdbc41.jar
22
25
  - lib/embulk/input/postgresql.rb
23
26
  - src/main/java/org/embulk/input/PostgreSQLInputPlugin.java
24
27
  - src/main/java/org/embulk/input/postgresql/PostgreSQLInputConnection.java
@@ -27,9 +30,6 @@ files:
27
30
  - src/test/java/org/embulk/input/postgresql/PostgreSQLInputPluginTest.java
28
31
  - src/test/resources/yml/input_hstore.yml
29
32
  - src/test/resources/yml/input_hstore2.yml
30
- - classpath/embulk-input-jdbc-0.7.2.jar
31
- - classpath/embulk-input-postgresql-0.7.2.jar
32
- - classpath/postgresql-9.4-1205-jdbc41.jar
33
33
  homepage: https://github.com/embulk/embulk-input-jdbc
34
34
  licenses:
35
35
  - Apache 2.0
@@ -40,17 +40,17 @@ require_paths:
40
40
  - lib
41
41
  required_ruby_version: !ruby/object:Gem::Requirement
42
42
  requirements:
43
- - - '>='
43
+ - - ">="
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
46
  required_rubygems_version: !ruby/object:Gem::Requirement
47
47
  requirements:
48
- - - '>='
48
+ - - ">="
49
49
  - !ruby/object:Gem::Version
50
50
  version: '0'
51
51
  requirements: []
52
52
  rubyforge_project:
53
- rubygems_version: 2.1.9
53
+ rubygems_version: 2.4.8
54
54
  signing_key:
55
55
  specification_version: 4
56
56
  summary: JDBC input plugin for Embulk
Binary file