embulk-input-postgresql 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -2
- data/classpath/embulk-input-jdbc-0.7.3.jar +0 -0
- data/classpath/embulk-input-postgresql-0.7.3.jar +0 -0
- data/src/main/java/org/embulk/input/PostgreSQLInputPlugin.java +6 -0
- data/src/main/java/org/embulk/input/postgresql/PostgreSQLInputConnection.java +20 -2
- metadata +8 -8
- data/classpath/embulk-input-jdbc-0.7.2.jar +0 -0
- data/classpath/embulk-input-postgresql-0.7.2.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93171986c203df79e84845be61756890f6108e3c
|
4
|
+
data.tar.gz: f76cd155e82c075761ed4b5d3196de5d872af719
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc8b710ebcdbb08a5505a762095621c779f448b256398c1c40fa8c7a0739b587965ec64486f460c9377699649d699ee9ddec5cb5654cee90d597facefc364c80
|
7
|
+
data.tar.gz: fdc4e9af5ff63a5f93f17db6ee93792c897ee616a11db40e2f1a0d5fdff735ae51238950dfad3ac6c30e0d2557c504cbb41df79243939e3ae55805c27adb4f90
|
data/README.md
CHANGED
@@ -19,14 +19,18 @@ PostgreSQL input plugins for Embulk loads records from PostgreSQL.
|
|
19
19
|
- **connect_timeout**: timeout for establishment of a database connection. (integer (seconds), default: 300)
|
20
20
|
- **socket_timeout**: timeout for socket read operations. 0 means no timeout. (integer (seconds), default: 1800)
|
21
21
|
- **ssl**: enables SSL. data will be encrypted but CA or certification will not be verified (boolean, default: false)
|
22
|
+
- **application_name**: application name shown on pg_stat_activity. (string, default: "embulk-input-postgresql")
|
22
23
|
- **options**: extra JDBC properties (hash, default: {})
|
23
24
|
- If you write SQL directly,
|
24
25
|
- **query**: SQL to run (string)
|
25
26
|
- If **query** is not set,
|
26
27
|
- **table**: destination table name (string, required)
|
27
|
-
- **select**:
|
28
|
+
- **select**: expression of select (e.g. `id, created_at`) (string, default: "*")
|
28
29
|
- **where**: WHERE condition to filter the rows (string, default: no-condition)
|
29
|
-
- **order_by**:
|
30
|
+
- **order_by**: expression of ORDER BY to sort rows (e.g. `created_at DESC, id ASC`) (string, default: not sorted)
|
31
|
+
- **incremental**: if true, enables incremental loading. See next section for details (boolean, default: false)
|
32
|
+
- **incremental_columns**: column names for incremental loading (array of strings, default: use primary keys)
|
33
|
+
- **last_record**: values of the last record for incremental loading (array of objects, default: load all records)
|
30
34
|
- **default_timezone**: If the sql type of a column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted int this default_timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
31
35
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
32
36
|
- **value_type**: embulk get values from database as this value_type. Typically, the value_type determines `getXXX` method of `java.sql.PreparedStatement`.
|
@@ -56,6 +60,42 @@ In addition, `json` type is supported for `hstore` column, and output will be as
|
|
56
60
|
`value_type` is ignored.
|
57
61
|
|
58
62
|
|
63
|
+
### Incremental loading
|
64
|
+
|
65
|
+
Incremental loading uses monotonically increasing unique columns (such as auto-increment (serial / bigserial) column) to load records inserted (or updated) after last execution.
|
66
|
+
|
67
|
+
First, if `incremental: true` is set, this plugin loads all records with additional ORDER BY. For example, if `incremental_columns: [updated_at, id]` option is set, query will be as following:
|
68
|
+
|
69
|
+
```
|
70
|
+
SELECT * FROM (
|
71
|
+
...original query is here...
|
72
|
+
)
|
73
|
+
ORDER BY updated_at, id
|
74
|
+
```
|
75
|
+
|
76
|
+
When bulk data loading finishes successfully, it outputs `last_record: ` paramater as config-diff so that next execution uses it.
|
77
|
+
|
78
|
+
At the next execution, when `last_record: ` is also set, this plugin generates additional WHERE conditions to load records larger than the last record. For example, if `last_record: ["2017-01-01 00:32:12", 5291]` is set,
|
79
|
+
|
80
|
+
```
|
81
|
+
SELECT * FROM (
|
82
|
+
...original query is here...
|
83
|
+
)
|
84
|
+
WHERE created_at > '2017-01-01 00:32:12' OR (created_at = '2017-01-01 00:32:12' AND id > 5291)
|
85
|
+
ORDER BY updated_at, id
|
86
|
+
```
|
87
|
+
|
88
|
+
Then, it updates `last_record: ` so that next execution uses the updated last_record.
|
89
|
+
|
90
|
+
**IMPORTANT**: If you set `incremental_columns: ` option, make sure that there is an index on the columns to avoid full table scan. For this example, following index should be created:
|
91
|
+
|
92
|
+
```
|
93
|
+
CREATE INDEX embulk_incremental_loading_index ON table (updated_at, id);
|
94
|
+
```
|
95
|
+
|
96
|
+
Recommended usage is to leave `incremental_columns` unset and let this plugin automatically finds an auto-increment (serial / bigserial) primary key. Currently, only strings and integers are supported as incremental_columns.
|
97
|
+
|
98
|
+
|
59
99
|
## Example
|
60
100
|
|
61
101
|
```yaml
|
@@ -68,6 +108,16 @@ in:
|
|
68
108
|
table: my_table
|
69
109
|
select: "col1, col2, col3"
|
70
110
|
where: "col4 != 'a'"
|
111
|
+
order_by: "col1 DESC"
|
112
|
+
```
|
113
|
+
|
114
|
+
This configuration will generate following SQL:
|
115
|
+
|
116
|
+
```
|
117
|
+
SELECT col1, col2, col3
|
118
|
+
FROM "my_table"
|
119
|
+
WHERE col4 != 'a'
|
120
|
+
ORDER BY col1 DESC
|
71
121
|
```
|
72
122
|
|
73
123
|
If you need a complex SQL,
|
Binary file
|
Binary file
|
@@ -48,6 +48,10 @@ public class PostgreSQLInputPlugin
|
|
48
48
|
@Config("ssl")
|
49
49
|
@ConfigDefault("false")
|
50
50
|
public boolean getSsl();
|
51
|
+
|
52
|
+
@Config("application_name")
|
53
|
+
@ConfigDefault("\"embulk-input-postgresql\"")
|
54
|
+
public String getApplicationName();
|
51
55
|
}
|
52
56
|
|
53
57
|
@Override
|
@@ -82,6 +86,8 @@ public class PostgreSQLInputPlugin
|
|
82
86
|
}
|
83
87
|
// setting ssl=false enables SSL. See org.postgresql.core.v3.openConnectionImpl.
|
84
88
|
|
89
|
+
props.setProperty("ApplicationName", t.getApplicationName());
|
90
|
+
|
85
91
|
props.putAll(t.getOptions());
|
86
92
|
|
87
93
|
Connection con = driver.connect(url, props);
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.input.postgresql;
|
2
2
|
|
3
|
+
import java.util.List;
|
3
4
|
import java.sql.Connection;
|
4
5
|
import java.sql.PreparedStatement;
|
5
6
|
import java.sql.ResultSet;
|
@@ -7,6 +8,8 @@ import java.sql.SQLException;
|
|
7
8
|
import org.slf4j.Logger;
|
8
9
|
import org.embulk.spi.Exec;
|
9
10
|
import org.embulk.input.jdbc.JdbcInputConnection;
|
11
|
+
import org.embulk.input.jdbc.JdbcLiteral;
|
12
|
+
import org.embulk.input.jdbc.getter.ColumnGetter;
|
10
13
|
|
11
14
|
public class PostgreSQLInputConnection
|
12
15
|
extends JdbcInputConnection
|
@@ -20,9 +23,24 @@ public class PostgreSQLInputConnection
|
|
20
23
|
}
|
21
24
|
|
22
25
|
@Override
|
23
|
-
protected
|
26
|
+
protected BatchSelect newBatchSelect(PreparedQuery preparedQuery,
|
27
|
+
List<ColumnGetter> getters,
|
28
|
+
int fetchRows, int queryTimeout) throws SQLException
|
24
29
|
{
|
25
|
-
|
30
|
+
String query = "DECLARE cur NO SCROLL CURSOR FOR " + preparedQuery.getQuery();
|
31
|
+
List<JdbcLiteral> params = preparedQuery.getParameters();
|
32
|
+
|
33
|
+
logger.info("SQL: " + query);
|
34
|
+
PreparedStatement stmt = connection.prepareStatement(query);
|
35
|
+
try {
|
36
|
+
if (!params.isEmpty()) {
|
37
|
+
logger.info("Parameters: {}", params);
|
38
|
+
prepareParameters(stmt, getters, params);
|
39
|
+
}
|
40
|
+
stmt.executeUpdate();
|
41
|
+
} finally {
|
42
|
+
stmt.close();
|
43
|
+
}
|
26
44
|
|
27
45
|
String fetchSql = "FETCH FORWARD "+fetchRows+" FROM cur";
|
28
46
|
// Because socketTimeout is set in Connection, don't need to set quertyTimeout.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-postgresql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Selects records from a table.
|
14
14
|
email:
|
@@ -19,6 +19,9 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- README.md
|
21
21
|
- build.gradle
|
22
|
+
- classpath/embulk-input-jdbc-0.7.3.jar
|
23
|
+
- classpath/embulk-input-postgresql-0.7.3.jar
|
24
|
+
- classpath/postgresql-9.4-1205-jdbc41.jar
|
22
25
|
- lib/embulk/input/postgresql.rb
|
23
26
|
- src/main/java/org/embulk/input/PostgreSQLInputPlugin.java
|
24
27
|
- src/main/java/org/embulk/input/postgresql/PostgreSQLInputConnection.java
|
@@ -27,9 +30,6 @@ files:
|
|
27
30
|
- src/test/java/org/embulk/input/postgresql/PostgreSQLInputPluginTest.java
|
28
31
|
- src/test/resources/yml/input_hstore.yml
|
29
32
|
- src/test/resources/yml/input_hstore2.yml
|
30
|
-
- classpath/embulk-input-jdbc-0.7.2.jar
|
31
|
-
- classpath/embulk-input-postgresql-0.7.2.jar
|
32
|
-
- classpath/postgresql-9.4-1205-jdbc41.jar
|
33
33
|
homepage: https://github.com/embulk/embulk-input-jdbc
|
34
34
|
licenses:
|
35
35
|
- Apache 2.0
|
@@ -40,17 +40,17 @@ require_paths:
|
|
40
40
|
- lib
|
41
41
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
42
|
requirements:
|
43
|
-
- -
|
43
|
+
- - ">="
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
46
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
47
|
requirements:
|
48
|
-
- -
|
48
|
+
- - ">="
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: '0'
|
51
51
|
requirements: []
|
52
52
|
rubyforge_project:
|
53
|
-
rubygems_version: 2.
|
53
|
+
rubygems_version: 2.4.8
|
54
54
|
signing_key:
|
55
55
|
specification_version: 4
|
56
56
|
summary: JDBC input plugin for Embulk
|
Binary file
|
Binary file
|