embulk-input-postgresql 0.7.2 → 0.7.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +52 -2
- data/classpath/embulk-input-jdbc-0.7.3.jar +0 -0
- data/classpath/embulk-input-postgresql-0.7.3.jar +0 -0
- data/src/main/java/org/embulk/input/PostgreSQLInputPlugin.java +6 -0
- data/src/main/java/org/embulk/input/postgresql/PostgreSQLInputConnection.java +20 -2
- metadata +8 -8
- data/classpath/embulk-input-jdbc-0.7.2.jar +0 -0
- data/classpath/embulk-input-postgresql-0.7.2.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93171986c203df79e84845be61756890f6108e3c
|
4
|
+
data.tar.gz: f76cd155e82c075761ed4b5d3196de5d872af719
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc8b710ebcdbb08a5505a762095621c779f448b256398c1c40fa8c7a0739b587965ec64486f460c9377699649d699ee9ddec5cb5654cee90d597facefc364c80
|
7
|
+
data.tar.gz: fdc4e9af5ff63a5f93f17db6ee93792c897ee616a11db40e2f1a0d5fdff735ae51238950dfad3ac6c30e0d2557c504cbb41df79243939e3ae55805c27adb4f90
|
data/README.md
CHANGED
@@ -19,14 +19,18 @@ PostgreSQL input plugins for Embulk loads records from PostgreSQL.
|
|
19
19
|
- **connect_timeout**: timeout for establishment of a database connection. (integer (seconds), default: 300)
|
20
20
|
- **socket_timeout**: timeout for socket read operations. 0 means no timeout. (integer (seconds), default: 1800)
|
21
21
|
- **ssl**: enables SSL. data will be encrypted but CA or certification will not be verified (boolean, default: false)
|
22
|
+
- **application_name**: application name shown on pg_stat_activity. (string, default: "embulk-input-postgresql")
|
22
23
|
- **options**: extra JDBC properties (hash, default: {})
|
23
24
|
- If you write SQL directly,
|
24
25
|
- **query**: SQL to run (string)
|
25
26
|
- If **query** is not set,
|
26
27
|
- **table**: destination table name (string, required)
|
27
|
-
- **select**:
|
28
|
+
- **select**: expression of select (e.g. `id, created_at`) (string, default: "*")
|
28
29
|
- **where**: WHERE condition to filter the rows (string, default: no-condition)
|
29
|
-
- **order_by**:
|
30
|
+
- **order_by**: expression of ORDER BY to sort rows (e.g. `created_at DESC, id ASC`) (string, default: not sorted)
|
31
|
+
- **incremental**: if true, enables incremental loading. See next section for details (boolean, default: false)
|
32
|
+
- **incremental_columns**: column names for incremental loading (array of strings, default: use primary keys)
|
33
|
+
- **last_record**: values of the last record for incremental loading (array of objects, default: load all records)
|
30
34
|
- **default_timezone**: If the sql type of a column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted int this default_timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
31
35
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
32
36
|
- **value_type**: embulk get values from database as this value_type. Typically, the value_type determines `getXXX` method of `java.sql.PreparedStatement`.
|
@@ -56,6 +60,42 @@ In addition, `json` type is supported for `hstore` column, and output will be as
|
|
56
60
|
`value_type` is ignored.
|
57
61
|
|
58
62
|
|
63
|
+
### Incremental loading
|
64
|
+
|
65
|
+
Incremental loading uses monotonically increasing unique columns (such as auto-increment (serial / bigserial) column) to load records inserted (or updated) after last execution.
|
66
|
+
|
67
|
+
First, if `incremental: true` is set, this plugin loads all records with additional ORDER BY. For example, if `incremental_columns: [updated_at, id]` option is set, query will be as following:
|
68
|
+
|
69
|
+
```
|
70
|
+
SELECT * FROM (
|
71
|
+
...original query is here...
|
72
|
+
)
|
73
|
+
ORDER BY updated_at, id
|
74
|
+
```
|
75
|
+
|
76
|
+
When bulk data loading finishes successfully, it outputs `last_record: ` paramater as config-diff so that next execution uses it.
|
77
|
+
|
78
|
+
At the next execution, when `last_record: ` is also set, this plugin generates additional WHERE conditions to load records larger than the last record. For example, if `last_record: ["2017-01-01 00:32:12", 5291]` is set,
|
79
|
+
|
80
|
+
```
|
81
|
+
SELECT * FROM (
|
82
|
+
...original query is here...
|
83
|
+
)
|
84
|
+
WHERE created_at > '2017-01-01 00:32:12' OR (created_at = '2017-01-01 00:32:12' AND id > 5291)
|
85
|
+
ORDER BY updated_at, id
|
86
|
+
```
|
87
|
+
|
88
|
+
Then, it updates `last_record: ` so that next execution uses the updated last_record.
|
89
|
+
|
90
|
+
**IMPORTANT**: If you set `incremental_columns: ` option, make sure that there is an index on the columns to avoid full table scan. For this example, following index should be created:
|
91
|
+
|
92
|
+
```
|
93
|
+
CREATE INDEX embulk_incremental_loading_index ON table (updated_at, id);
|
94
|
+
```
|
95
|
+
|
96
|
+
Recommended usage is to leave `incremental_columns` unset and let this plugin automatically finds an auto-increment (serial / bigserial) primary key. Currently, only strings and integers are supported as incremental_columns.
|
97
|
+
|
98
|
+
|
59
99
|
## Example
|
60
100
|
|
61
101
|
```yaml
|
@@ -68,6 +108,16 @@ in:
|
|
68
108
|
table: my_table
|
69
109
|
select: "col1, col2, col3"
|
70
110
|
where: "col4 != 'a'"
|
111
|
+
order_by: "col1 DESC"
|
112
|
+
```
|
113
|
+
|
114
|
+
This configuration will generate following SQL:
|
115
|
+
|
116
|
+
```
|
117
|
+
SELECT col1, col2, col3
|
118
|
+
FROM "my_table"
|
119
|
+
WHERE col4 != 'a'
|
120
|
+
ORDER BY col1 DESC
|
71
121
|
```
|
72
122
|
|
73
123
|
If you need a complex SQL,
|
Binary file
|
Binary file
|
@@ -48,6 +48,10 @@ public class PostgreSQLInputPlugin
|
|
48
48
|
@Config("ssl")
|
49
49
|
@ConfigDefault("false")
|
50
50
|
public boolean getSsl();
|
51
|
+
|
52
|
+
@Config("application_name")
|
53
|
+
@ConfigDefault("\"embulk-input-postgresql\"")
|
54
|
+
public String getApplicationName();
|
51
55
|
}
|
52
56
|
|
53
57
|
@Override
|
@@ -82,6 +86,8 @@ public class PostgreSQLInputPlugin
|
|
82
86
|
}
|
83
87
|
// setting ssl=false enables SSL. See org.postgresql.core.v3.openConnectionImpl.
|
84
88
|
|
89
|
+
props.setProperty("ApplicationName", t.getApplicationName());
|
90
|
+
|
85
91
|
props.putAll(t.getOptions());
|
86
92
|
|
87
93
|
Connection con = driver.connect(url, props);
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.input.postgresql;
|
2
2
|
|
3
|
+
import java.util.List;
|
3
4
|
import java.sql.Connection;
|
4
5
|
import java.sql.PreparedStatement;
|
5
6
|
import java.sql.ResultSet;
|
@@ -7,6 +8,8 @@ import java.sql.SQLException;
|
|
7
8
|
import org.slf4j.Logger;
|
8
9
|
import org.embulk.spi.Exec;
|
9
10
|
import org.embulk.input.jdbc.JdbcInputConnection;
|
11
|
+
import org.embulk.input.jdbc.JdbcLiteral;
|
12
|
+
import org.embulk.input.jdbc.getter.ColumnGetter;
|
10
13
|
|
11
14
|
public class PostgreSQLInputConnection
|
12
15
|
extends JdbcInputConnection
|
@@ -20,9 +23,24 @@ public class PostgreSQLInputConnection
|
|
20
23
|
}
|
21
24
|
|
22
25
|
@Override
|
23
|
-
protected
|
26
|
+
protected BatchSelect newBatchSelect(PreparedQuery preparedQuery,
|
27
|
+
List<ColumnGetter> getters,
|
28
|
+
int fetchRows, int queryTimeout) throws SQLException
|
24
29
|
{
|
25
|
-
|
30
|
+
String query = "DECLARE cur NO SCROLL CURSOR FOR " + preparedQuery.getQuery();
|
31
|
+
List<JdbcLiteral> params = preparedQuery.getParameters();
|
32
|
+
|
33
|
+
logger.info("SQL: " + query);
|
34
|
+
PreparedStatement stmt = connection.prepareStatement(query);
|
35
|
+
try {
|
36
|
+
if (!params.isEmpty()) {
|
37
|
+
logger.info("Parameters: {}", params);
|
38
|
+
prepareParameters(stmt, getters, params);
|
39
|
+
}
|
40
|
+
stmt.executeUpdate();
|
41
|
+
} finally {
|
42
|
+
stmt.close();
|
43
|
+
}
|
26
44
|
|
27
45
|
String fetchSql = "FETCH FORWARD "+fetchRows+" FROM cur";
|
28
46
|
// Because socketTimeout is set in Connection, don't need to set quertyTimeout.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-postgresql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Selects records from a table.
|
14
14
|
email:
|
@@ -19,6 +19,9 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- README.md
|
21
21
|
- build.gradle
|
22
|
+
- classpath/embulk-input-jdbc-0.7.3.jar
|
23
|
+
- classpath/embulk-input-postgresql-0.7.3.jar
|
24
|
+
- classpath/postgresql-9.4-1205-jdbc41.jar
|
22
25
|
- lib/embulk/input/postgresql.rb
|
23
26
|
- src/main/java/org/embulk/input/PostgreSQLInputPlugin.java
|
24
27
|
- src/main/java/org/embulk/input/postgresql/PostgreSQLInputConnection.java
|
@@ -27,9 +30,6 @@ files:
|
|
27
30
|
- src/test/java/org/embulk/input/postgresql/PostgreSQLInputPluginTest.java
|
28
31
|
- src/test/resources/yml/input_hstore.yml
|
29
32
|
- src/test/resources/yml/input_hstore2.yml
|
30
|
-
- classpath/embulk-input-jdbc-0.7.2.jar
|
31
|
-
- classpath/embulk-input-postgresql-0.7.2.jar
|
32
|
-
- classpath/postgresql-9.4-1205-jdbc41.jar
|
33
33
|
homepage: https://github.com/embulk/embulk-input-jdbc
|
34
34
|
licenses:
|
35
35
|
- Apache 2.0
|
@@ -40,17 +40,17 @@ require_paths:
|
|
40
40
|
- lib
|
41
41
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
42
|
requirements:
|
43
|
-
- -
|
43
|
+
- - ">="
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
46
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
47
|
requirements:
|
48
|
-
- -
|
48
|
+
- - ">="
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: '0'
|
51
51
|
requirements: []
|
52
52
|
rubyforge_project:
|
53
|
-
rubygems_version: 2.
|
53
|
+
rubygems_version: 2.4.8
|
54
54
|
signing_key:
|
55
55
|
specification_version: 4
|
56
56
|
summary: JDBC input plugin for Embulk
|
Binary file
|
Binary file
|