embulk-input-postgresql 0.8.5 → 0.8.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 06c0715712d158b155fdec049b1d902ad2c73286
4
- data.tar.gz: 7d9c2e7114cc14883303756a2810111c151237c1
3
+ metadata.gz: f267963fed40c4b706f5c345b06fb29d2d4c04a7
4
+ data.tar.gz: 257c86658208b973aae246019f43a2b48ce4010a
5
5
  SHA512:
6
- metadata.gz: 2d1012d143bf14b25ef74805e7784da11a6dd1b39e6da8fe4c17108aba4c204799d1326716fdfd3a161f6f022db073d71692c174a8f3fbcfa3bd3b8fedacdade
7
- data.tar.gz: 02c1db91904b99b8305c9bd9e057cfd6b498daadc652fac00b2edfd343908e2fbc8684bf33b953627f7659c486b52343f8c0d883cb8cc0e721c74051ed299a61
6
+ metadata.gz: 15d1b0963f75aa994d747e28f2b33b1b3b6e1fdeb5060063237bd09b5506a22fd104e5472c7a8c07ed8118739e80b4d8d777ce15ed1688d628b03645733f5576
7
+ data.tar.gz: 7466b193d8105c41f5acc2d07b8f84bf10b956b64be7fafdabee1100a36b9b2a5932fde36f9a68edbcfc408a7d92a317a6050dcafc1dd25ac78d6ccce28cf9ac
data/README.md CHANGED
@@ -35,7 +35,7 @@ PostgreSQL input plugin for Embulk loads records from PostgreSQL.
35
35
  - **default_timezone**: If the sql type of a column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted int this default_timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
36
36
  - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
37
37
  - **value_type**: embulk get values from database as this value_type. Typically, the value_type determines `getXXX` method of `java.sql.PreparedStatement`.
38
- (string, default: depends on the sql type of the column. Available values options are: `long`, `double`, `float`, `decimal`, `boolean`, `string`, `json`, `date`, `time`, `timestamp`)
38
+ (string, default: depends on the sql type of the column. Available values options are: `long`, `double`, `float`, `decimal`, `boolean`, `string`, `json`, `date`, `time`, `timestamp`, `array`)
39
39
  See below for `hstore` column.
40
40
  - **type**: Column values are converted to this embulk type.
41
41
  Available values options are: `boolean`, `long`, `double`, `string`, `json`, `timestamp`).
@@ -60,6 +60,24 @@ In addition, `json` type is supported for `hstore` column, and output will be as
60
60
 
61
61
  `value_type` is ignored.
62
62
 
63
+ ### Arrays column support
64
+
65
+ PostgreSQL allows columns of a table to be defined as variable-length multidimensional arrays and this plugin supports converting its value into `string` or `json`.
66
+
67
+ By default, `type` of `column_options` for `array` column is `string`, and output will be similar to what `psql` produces:
68
+ ```
69
+ {1000,2000,3000,4000}, {{red,green},{blue,cyan}}
70
+ {5000,6000,7000,8000}, {{yellow,magenta},{purple,"light,dark"}}
71
+ ```
72
+
73
+ Output of `json` type will be as follow:
74
+ ```
75
+ [1000,2000,3000,4000],[["red","green"],["blue","cyan"]]
76
+ [5000,6000,7000,8000],[["yellow","magenta"],["purple"","light,dark"]]
77
+ ```
78
+ However, the support for `json` type has the following limitations:
79
+ - Postgres server version must be 8.3.0 and above
80
+ - The value type of array element must be number, bool, or text, e.g. bool[], integer[], text[][], bigint[][][]...
63
81
 
64
82
  ### Incremental loading
65
83
 
@@ -82,7 +100,7 @@ At the next execution, when `last_record: ` is also set, this plugin generates a
82
100
  SELECT * FROM (
83
101
  ...original query is here...
84
102
  )
85
- WHERE created_at > '2017-01-01 00:32:12' OR (created_at = '2017-01-01 00:32:12' AND id > 5291)
103
+ WHERE updated_at > '2017-01-01 00:32:12' OR (updated_at = '2017-01-01 00:32:12' AND id > 5291)
86
104
  ORDER BY updated_at, id
87
105
  ```
88
106
 
@@ -0,0 +1,104 @@
1
+ package org.embulk.input.postgresql.getter;
2
+
3
+ import com.fasterxml.jackson.databind.node.ArrayNode;
4
+ import com.fasterxml.jackson.databind.node.NullNode;
5
+ import org.embulk.input.jdbc.getter.AbstractColumnGetter;
6
+ import org.embulk.spi.Column;
7
+ import org.embulk.spi.PageBuilder;
8
+ import org.embulk.spi.json.JsonParseException;
9
+ import org.embulk.spi.json.JsonParser;
10
+ import org.embulk.spi.type.Type;
11
+ import org.msgpack.value.Value;
12
+
13
+ import java.sql.Array;
14
+ import java.sql.ResultSet;
15
+ import java.sql.SQLException;
16
+
17
+ import java.sql.Types;
18
+
19
+ public class ArrayColumnGetter
20
+ extends AbstractColumnGetter
21
+ {
22
+ protected Array value;
23
+
24
+ protected final JsonParser jsonParser = new JsonParser();
25
+
26
+ public ArrayColumnGetter(PageBuilder to, Type toType)
27
+ {
28
+ super(to, toType);
29
+ }
30
+
31
+ @Override
32
+ protected void fetch(ResultSet from, int fromIndex) throws SQLException
33
+ {
34
+ value = from.getArray(fromIndex);
35
+ }
36
+
37
+ private ArrayNode buildJsonArray(Object[] elements)
38
+ throws SQLException
39
+ {
40
+ ArrayNode arrayNode = jsonNodeFactory.arrayNode();
41
+ for (Object v : elements) {
42
+ if (v == null) {
43
+ arrayNode.add(NullNode.getInstance());
44
+ continue;
45
+ }
46
+ if (v.getClass().isArray()) {
47
+ arrayNode.add(buildJsonArray((Object[]) v));
48
+ }
49
+ else {
50
+ switch (value.getBaseType()) {
51
+ case Types.TINYINT:
52
+ case Types.SMALLINT:
53
+ case Types.INTEGER:
54
+ case Types.BIGINT:
55
+ arrayNode.add(jsonNodeFactory.numberNode(((Number) v).longValue()));
56
+ break;
57
+ case Types.FLOAT:
58
+ case Types.REAL:
59
+ case Types.DOUBLE:
60
+ arrayNode.add(jsonNodeFactory.numberNode(((Number) v).doubleValue()));
61
+ break;
62
+ case Types.BOOLEAN:
63
+ case Types.BIT: // JDBC BIT is boolean, unlike SQL-92
64
+ arrayNode.add(jsonNodeFactory.booleanNode((Boolean) v));
65
+ break;
66
+ case Types.CHAR:
67
+ case Types.VARCHAR:
68
+ case Types.LONGVARCHAR:
69
+ case Types.CLOB:
70
+ case Types.NCHAR:
71
+ case Types.NVARCHAR:
72
+ case Types.LONGNVARCHAR:
73
+ arrayNode.add(jsonNodeFactory.textNode((String) v));
74
+ break;
75
+ }
76
+ }
77
+ }
78
+ return arrayNode;
79
+ }
80
+
81
+ @Override
82
+ protected Type getDefaultToType()
83
+ {
84
+ return org.embulk.spi.type.Types.STRING;
85
+ }
86
+
87
+ @Override
88
+ public void jsonColumn(Column column)
89
+ {
90
+ try {
91
+ Value v = jsonParser.parse(buildJsonArray((Object[]) value.getArray()).toString());
92
+ to.setJson(column, v);
93
+ }
94
+ catch (JsonParseException | SQLException | ClassCastException e) {
95
+ super.jsonColumn(column);
96
+ }
97
+ }
98
+
99
+ @Override
100
+ public void stringColumn(Column column)
101
+ {
102
+ to.setString(column, value.toString());
103
+ }
104
+ }
@@ -27,6 +27,10 @@ public class PostgreSQLColumnGetterFactory extends ColumnGetterFactory
27
27
  return new HstoreToJsonColumnGetter(to, Types.JSON);
28
28
  }
29
29
 
30
+ if (column.getSqlType() == java.sql.Types.ARRAY) {
31
+ return new ArrayColumnGetter(to, getToType(option));
32
+ }
33
+
30
34
  ColumnGetter getter = super.newColumnGetter(con, task, column, option);
31
35
 
32
36
  // incremental loading wrapper
@@ -48,7 +52,8 @@ public class PostgreSQLColumnGetterFactory extends ColumnGetterFactory
48
52
  case "jsonb":
49
53
  return "json";
50
54
  case "hstore":
51
- // hstore is converted to string by default
55
+ case "array":
56
+ // array & hstore is converted to string by default
52
57
  return "string";
53
58
  default:
54
59
  return super.sqlTypeToValueType(column, sqlType);
@@ -0,0 +1,74 @@
1
+ package org.embulk.input.postgresql;
2
+
3
+ import org.embulk.config.ConfigSource;
4
+ import org.embulk.input.PostgreSQLInputPlugin;
5
+ import org.embulk.spi.InputPlugin;
6
+ import org.embulk.test.EmbulkTests;
7
+ import org.embulk.test.TestingEmbulk;
8
+ import org.embulk.test.TestingEmbulk.RunResult;
9
+ import org.junit.Before;
10
+ import org.junit.Rule;
11
+ import org.junit.Test;
12
+
13
+ import java.nio.file.Path;
14
+
15
+ import static org.embulk.input.postgresql.PostgreSQLTests.execute;
16
+ import static org.embulk.test.EmbulkTests.readSortedFile;
17
+ import static org.hamcrest.Matchers.is;
18
+ import static org.junit.Assert.assertThat;
19
+
20
+ public class ArrayTest
21
+ {
22
+ private static final String BASIC_RESOURCE_PATH = "org/embulk/input/postgresql/test/expect/array/";
23
+
24
+ private static ConfigSource loadYamlResource(TestingEmbulk embulk, String fileName)
25
+ {
26
+ return embulk.loadYamlResource(BASIC_RESOURCE_PATH + fileName);
27
+ }
28
+
29
+ private static String readResource(String fileName)
30
+ {
31
+ return EmbulkTests.readResource(BASIC_RESOURCE_PATH + fileName);
32
+ }
33
+
34
+ @Rule
35
+ public TestingEmbulk embulk = TestingEmbulk.builder()
36
+ .registerPlugin(InputPlugin.class, "postgresql", PostgreSQLInputPlugin.class)
37
+ .build();
38
+
39
+ private ConfigSource baseConfig;
40
+
41
+ @Before
42
+ public void setup()
43
+ {
44
+ baseConfig = PostgreSQLTests.baseConfig();
45
+ }
46
+
47
+ @Test
48
+ public void loadAsStringByDefault() throws Exception
49
+ {
50
+ execute(readResource("setup.sql"));
51
+
52
+ Path out1 = embulk.createTempFile("csv");
53
+ embulk.runInput(
54
+ baseConfig.merge(loadYamlResource(embulk, "as_string.yml")),
55
+ out1);
56
+ assertThat(
57
+ readSortedFile(out1),
58
+ is(readResource("expected_string.csv")));
59
+ }
60
+
61
+ @Test
62
+ public void loadAsJson() throws Exception
63
+ {
64
+ execute(readResource("setup.sql"));
65
+
66
+ Path out1 = embulk.createTempFile("csv");
67
+ embulk.runInput(
68
+ baseConfig.merge(loadYamlResource(embulk, "as_json.yml")),
69
+ out1);
70
+ assertThat(
71
+ readSortedFile(out1),
72
+ is(readResource("expected_json.csv")));
73
+ }
74
+ }
@@ -4,6 +4,8 @@ import org.embulk.test.EmbulkTests;
4
4
  import com.google.common.base.Throwables;
5
5
  import com.google.common.io.ByteStreams;
6
6
  import java.io.IOException;
7
+ import jnr.ffi.Platform;
8
+ import jnr.ffi.Platform.OS;
7
9
  import org.embulk.config.ConfigSource;
8
10
  import static java.util.Locale.ENGLISH;
9
11
 
@@ -17,7 +19,7 @@ public class PostgreSQLTests
17
19
  public static void execute(String sql)
18
20
  {
19
21
  ConfigSource config = baseConfig();
20
- ProcessBuilder pb = new ProcessBuilder("psql", "-w", "--set", "ON_ERROR_STOP=1", "-c", sql);
22
+ ProcessBuilder pb = new ProcessBuilder("psql", "-w", "--set", "ON_ERROR_STOP=1", "-c", convert(sql));
21
23
  pb.environment().put("PGUSER", config.get(String.class, "user"));
22
24
  pb.environment().put("PGPASSWORD", config.get(String.class, "password"));
23
25
  pb.environment().put("PGDATABASE", config.get(String.class, "database"));
@@ -36,4 +38,13 @@ public class PostgreSQLTests
36
38
  "Command finished with non-zero exit code. Exit code is %d.", code));
37
39
  }
38
40
  }
41
+
42
+ private static String convert(String sql)
43
+ {
44
+ if (Platform.getNativePlatform().getOS().equals(OS.WINDOWS)) {
45
+ // '"' should be '\"' is Windows
46
+ return sql.replace("\"", "\\\"");
47
+ }
48
+ return sql;
49
+ }
39
50
  }
@@ -0,0 +1,5 @@
1
+ table: input_array
2
+ column_options:
3
+ c1: {type: json}
4
+ c2: {type: json}
5
+ c3: {type: json}
@@ -0,0 +1,2 @@
1
+ "[1000,2000,3000,4000]","[[""red"",""green""],[""blue"",""cyan""]]",[[[true]]]
2
+ "[5000,6000,7000,8000]","[[""yellow"",""magenta""],[""purple"",""light,dark""]]","[[[true,true],[false,false]],[[true,false],[false,true]]]"
@@ -0,0 +1,2 @@
1
+ "{1000,2000,3000,4000}","{{red,green},{blue,cyan}}",{{{t}}}
2
+ "{5000,6000,7000,8000}","{{yellow,magenta},{purple,""light,dark""}}","{{{t,t},{f,f}},{{t,f},{f,t}}}"
@@ -0,0 +1,11 @@
1
+ drop table if exists input_array;
2
+
3
+ create table input_array (
4
+ c1 integer[],
5
+ c2 text[][],
6
+ c3 bool[][][]
7
+ );
8
+
9
+ insert into input_array (c1, c2, c3) values ('{1000, 2000, 3000, 4000}', '{{"red", "green"}, {"blue", "cyan"}}', '{{{true}}}');
10
+
11
+ insert into input_array (c1, c2, c3) values ('{5000, 6000, 7000, 8000}', '{{"yellow", "magenta"}, {"purple", "light,dark"}}', '{{{t,t},{f,f}},{{t,f},{f,t}}}');
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-postgresql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.5
4
+ version: 0.8.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-31 00:00:00.000000000 Z
11
+ date: 2017-11-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Selects records from a table.
14
14
  email:
@@ -19,17 +19,24 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - README.md
21
21
  - build.gradle
22
- - classpath/embulk-input-jdbc-0.8.5.jar
23
- - classpath/embulk-input-postgresql-0.8.5.jar
22
+ - classpath/embulk-input-jdbc-0.8.6.jar
23
+ - classpath/embulk-input-postgresql-0.8.6.jar
24
24
  - default_jdbc_driver/postgresql-9.4-1205-jdbc41.jar
25
25
  - lib/embulk/input/postgresql.rb
26
26
  - src/main/java/org/embulk/input/PostgreSQLInputPlugin.java
27
27
  - src/main/java/org/embulk/input/postgresql/PostgreSQLInputConnection.java
28
+ - src/main/java/org/embulk/input/postgresql/getter/ArrayColumnGetter.java
28
29
  - src/main/java/org/embulk/input/postgresql/getter/HstoreToJsonColumnGetter.java
29
30
  - src/main/java/org/embulk/input/postgresql/getter/PostgreSQLColumnGetterFactory.java
31
+ - src/test/java/org/embulk/input/postgresql/ArrayTest.java
30
32
  - src/test/java/org/embulk/input/postgresql/HstoreTest.java
31
33
  - src/test/java/org/embulk/input/postgresql/IncrementalTest.java
32
34
  - src/test/java/org/embulk/input/postgresql/PostgreSQLTests.java
35
+ - src/test/resources/org/embulk/input/postgresql/test/expect/array/as_json.yml
36
+ - src/test/resources/org/embulk/input/postgresql/test/expect/array/as_string.yml
37
+ - src/test/resources/org/embulk/input/postgresql/test/expect/array/expected_json.csv
38
+ - src/test/resources/org/embulk/input/postgresql/test/expect/array/expected_string.csv
39
+ - src/test/resources/org/embulk/input/postgresql/test/expect/array/setup.sql
33
40
  - src/test/resources/org/embulk/input/postgresql/test/expect/hstore/as_json.yml
34
41
  - src/test/resources/org/embulk/input/postgresql/test/expect/hstore/as_string.yml
35
42
  - src/test/resources/org/embulk/input/postgresql/test/expect/hstore/expected_json.csv
Binary file