embulk-output-s3_per_record 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +46 -6
- data/build.gradle +2 -1
- data/example/.bundle/config +2 -0
- data/example/Gemfile +3 -0
- data/example/Gemfile.lock +13 -0
- data/example/example_array.json +1 -0
- data/example/example_array.yml.liquid +16 -0
- data/example/example_long.json +1 -0
- data/example/example_long.yml.liquid +16 -0
- data/example/example_map.json +2 -0
- data/example/example_map.yml.liquid +16 -0
- data/example/example_map_json.yml.liquid +16 -0
- data/example/example_multicolumn.json +3 -0
- data/example/example_multicolumn.yml.liquid +23 -0
- data/example/example_multicolumn_json.yml.liquid +25 -0
- data/example/example_string.json +2 -0
- data/example/example_string.yml.liquid +16 -0
- data/example/example_string_json.yml.liquid +16 -0
- data/example/example_timestamp.json +1 -0
- data/example/example_timestamp.yml.liquid +18 -0
- data/example/example_timestamp_json.yml.liquid +16 -0
- data/gradlew.bat +90 -90
- data/src/main/java/org/embulk/output/s3_per_record/S3PerRecordOutputPlugin.java +149 -39
- data/src/main/java/org/embulk/output/s3_per_record/visitor/JsonMultiColumnVisitor.java +73 -0
- data/src/main/java/org/embulk/output/s3_per_record/visitor/JsonSingleColumnVisitor.java +63 -0
- data/src/main/java/org/embulk/output/s3_per_record/visitor/MessagePackMultiColumnVisitor.java +77 -0
- data/src/main/java/org/embulk/output/s3_per_record/visitor/MessagePackSingleColumnVisitor.java +88 -0
- data/src/main/java/org/embulk/output/s3_per_record/visitor/S3PerRecordOutputColumnVisitor.java +7 -0
- metadata +47 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf8a267d6dac20c30dd45de82a04f3f47e670fdb
|
4
|
+
data.tar.gz: 9ebdc01acab6fb9da2e9b6311c01132f137ff7c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b642a794a73d81ec10f3c9a530648124af38594e627c5e874cd9ab5f5ee02aa2e804f63a57717ba058f4a348f614059f17b998df64c967739ccd3275b80f8e4
|
7
|
+
data.tar.gz: 56ddc01a455e8c5a12cfaea19378e6b924e92eeb2f7d09266101f5f173e54598f09e93237df02cca44ce0fba0effe0e67c469892bfb326cf7b92188bb0435925
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -3,6 +3,18 @@
|
|
3
3
|
This plugin uploads a column's value to S3 as one S3 object per row.
|
4
4
|
S3 object key can be composed of another column.
|
5
5
|
|
6
|
+
## Breaking Changes from 0.3.x
|
7
|
+
This plugin serialize data columns by itself.
|
8
|
+
at present, Supported formats are `msgpack` and `json`.
|
9
|
+
Because of it, config parameters are changed.
|
10
|
+
|
11
|
+
- Rename `data_column` to `data_columns`, and change type from `string` to `array`
|
12
|
+
- Add `mode`
|
13
|
+
- Add `serializer`
|
14
|
+
- Add `column_options`.
|
15
|
+
|
16
|
+
Please update your configs.
|
17
|
+
|
6
18
|
## Overview
|
7
19
|
|
8
20
|
* **Plugin type**: output
|
@@ -13,33 +25,61 @@ S3 object key can be composed of another column.
|
|
13
25
|
|
14
26
|
- **bucket**: S3 bucket name.
|
15
27
|
- **key**: S3 object key. `${column}` is replaced by the column's value.
|
16
|
-
- **
|
28
|
+
- **data_columns**: Columns for object's body.
|
29
|
+
- **serializer**: Serializer format. Supported formats are `msgpack` and `json`. default is `msgpack`.
|
30
|
+
- **mode**: Set mode. Supported modes are `multi_column` and `single_column`. default is `multi_column`.
|
17
31
|
- **aws_access_key_id**: (optional) AWS access key id. If not given, [DefaultAWSCredentialsProviderChain](http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) is used to get credentials.
|
18
32
|
- **aws_secret_access_key**: (optional) AWS secret access key. Required if `aws_access_key_id` is given.
|
19
|
-
- **base64**: (default false) If true, decode the value as Base64 before uploading.
|
20
33
|
- **retry_limit**: (default 2) On connection errors,this plugin automatically retry up to this times.
|
34
|
+
- **column_options**: Timestamp formatting option for columns.
|
21
35
|
|
22
36
|
## Example
|
23
37
|
|
38
|
+
### multi_column mode
|
39
|
+
|
24
40
|
```yaml
|
25
41
|
out:
|
26
42
|
type: s3_per_record
|
27
43
|
bucket: your-bucket-name
|
28
44
|
key: "sample/${id}.txt"
|
29
|
-
|
45
|
+
mode: multi_column
|
46
|
+
serializer: msgpack
|
47
|
+
data_columns: [id, payload]
|
30
48
|
```
|
31
49
|
|
32
50
|
```
|
33
|
-
id | payload
|
51
|
+
id | payload (json type)
|
34
52
|
------------
|
35
53
|
1 | hello
|
36
54
|
5 | world
|
37
55
|
12 | embulk
|
38
56
|
```
|
39
57
|
|
40
|
-
This generates `s3://your-bucket-name/sample/1.txt` with its content `hello
|
41
|
-
`s3://your-bucket-name/sample/5.txt` with its content `world`, and so on.
|
58
|
+
This generates `s3://your-bucket-name/sample/1.txt` with its content `{"id": 1, "payload": "hello"}` that is formatted with msgpack format,
|
59
|
+
`s3://your-bucket-name/sample/5.txt` with its content `{"id": 5, "payload": "world"}`, and so on.
|
60
|
+
|
61
|
+
### single_column mode
|
62
|
+
|
63
|
+
```yaml
|
64
|
+
out:
|
65
|
+
type: s3_per_record
|
66
|
+
bucket: your-bucket-name
|
67
|
+
key: "sample/${id}.txt"
|
68
|
+
mode: single_column
|
69
|
+
serializer: msgpack
|
70
|
+
data_columns: [payload]
|
71
|
+
```
|
72
|
+
|
73
|
+
```
|
74
|
+
id | payload (json type)
|
75
|
+
------------
|
76
|
+
1 | hello
|
77
|
+
5 | world
|
78
|
+
12 | embulk
|
79
|
+
```
|
42
80
|
|
81
|
+
This generates `s3://your-bucket-name/sample/1.txt` with its content `"hello"` that is formatted with msgpack format,
|
82
|
+
`s3://your-bucket-name/sample/5.txt` with its content `"world"`, and so on.
|
43
83
|
|
44
84
|
## Build
|
45
85
|
|
data/build.gradle
CHANGED
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.4.0"
|
17
17
|
|
18
18
|
sourceCompatibility = 1.7
|
19
19
|
targetCompatibility = 1.7
|
@@ -22,6 +22,7 @@ dependencies {
|
|
22
22
|
compile "org.embulk:embulk-core:0.8.8"
|
23
23
|
provided "org.embulk:embulk-core:0.8.8"
|
24
24
|
compile 'com.amazonaws:aws-java-sdk-s3:1.10.71'
|
25
|
+
compile 'org.msgpack:jackson-dataformat-msgpack:0.8.3'
|
25
26
|
testCompile "junit:junit:4.+"
|
26
27
|
}
|
27
28
|
|
data/example/Gemfile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
{"key": "test_key1", "data": [1,2,3]}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_array.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data", type: "json"}
|
9
|
+
|
10
|
+
out:
|
11
|
+
type: s3_per_record
|
12
|
+
data_columns: [data]
|
13
|
+
mode: single_column
|
14
|
+
bucket: {{ env.S3_BUCKET }}
|
15
|
+
key: "${key}"
|
16
|
+
serializer: msgpack
|
@@ -0,0 +1 @@
|
|
1
|
+
{"key": "test_key1", "data": 1}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_long.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data", type: "long"}
|
9
|
+
|
10
|
+
out:
|
11
|
+
type: s3_per_record
|
12
|
+
data_columns: [data]
|
13
|
+
mode: single_column
|
14
|
+
bucket: {{ env.S3_BUCKET }}
|
15
|
+
key: "${key}"
|
16
|
+
serializer: msgpack
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_map.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data", type: "json"}
|
9
|
+
|
10
|
+
out:
|
11
|
+
type: s3_per_record
|
12
|
+
data_columns: [data]
|
13
|
+
mode: single_column
|
14
|
+
bucket: {{ env.S3_BUCKET }}
|
15
|
+
key: "${key}"
|
16
|
+
serializer: msgpack
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_map.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data", type: "json"}
|
9
|
+
|
10
|
+
out:
|
11
|
+
type: s3_per_record
|
12
|
+
data_columns: [data]
|
13
|
+
mode: single_column
|
14
|
+
bucket: {{ env.S3_BUCKET }}
|
15
|
+
key: "${key}"
|
16
|
+
serializer: json
|
@@ -0,0 +1,3 @@
|
|
1
|
+
{"key": "test_key_multi1", "data1": {"a": 1, "b": 2, "c": 3}, "data2": [1, 2], "data3": 42, "data4": "2016-10-01 10:21:00"}
|
2
|
+
{"key": "test_key_multi2", "data1": {"a": 4, "b": 5, "c": 6}, "data2": [2, 4], "data3": 42, "data4": "2016-10-01 11:21:00"}
|
3
|
+
{"key": "test_key_multi3", "data1": {"a": 7, "b": 8, "c": 9}, "data2": [4, 8], "data3": 42, "data4": "2016-10-01 12:21:00"}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_multicolumn.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data1", type: "json"}
|
9
|
+
- {name: "data2", type: "json"}
|
10
|
+
- {name: "data3", type: "long"}
|
11
|
+
- {name: "data4", type: "timestamp", format: "%Y-%m-%d %H:%M:%S", timezone: "Asia/Tokyo"}
|
12
|
+
|
13
|
+
out:
|
14
|
+
type: s3_per_record
|
15
|
+
data_columns:
|
16
|
+
- data1
|
17
|
+
- data2
|
18
|
+
- data3
|
19
|
+
- data4
|
20
|
+
mode: multi_column
|
21
|
+
bucket: {{ env.S3_BUCKET }}
|
22
|
+
key: "${key}"
|
23
|
+
serializer: msgpack
|
@@ -0,0 +1,25 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_multicolumn.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data1", type: "json"}
|
9
|
+
- {name: "data2", type: "json"}
|
10
|
+
- {name: "data3", type: "long"}
|
11
|
+
- {name: "data4", type: "timestamp", format: "%Y-%m-%d %H:%M:%S", timezone: "Asia/Tokyo"}
|
12
|
+
|
13
|
+
out:
|
14
|
+
type: s3_per_record
|
15
|
+
data_columns:
|
16
|
+
- data1
|
17
|
+
- data2
|
18
|
+
- data3
|
19
|
+
- data4
|
20
|
+
mode: multi_column
|
21
|
+
bucket: {{ env.S3_BUCKET }}
|
22
|
+
key: "${key}"
|
23
|
+
serializer: json
|
24
|
+
column_options:
|
25
|
+
data4: {format: "%Y/%m/%d %H:%M:%S %z", timezone: "Asia/Tokyo"}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_string.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data", type: "string"}
|
9
|
+
|
10
|
+
out:
|
11
|
+
type: s3_per_record
|
12
|
+
data_columns: [data]
|
13
|
+
mode: single_column
|
14
|
+
bucket: {{ env.S3_BUCKET }}
|
15
|
+
key: "${key}"
|
16
|
+
serializer: msgpack
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_string.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data", type: "string"}
|
9
|
+
|
10
|
+
out:
|
11
|
+
type: s3_per_record
|
12
|
+
data_columns: [data]
|
13
|
+
mode: single_column
|
14
|
+
bucket: {{ env.S3_BUCKET }}
|
15
|
+
key: "${key}"
|
16
|
+
serializer: json
|
@@ -0,0 +1 @@
|
|
1
|
+
{"key": "test_key1", "data": "2016-10-01 10:21:00"}
|
@@ -0,0 +1,18 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_timestamp.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data", type: "timestamp", format: "%Y-%m-%d %H:%M:%S", timezone: "Asia/Tokyo"}
|
9
|
+
|
10
|
+
out:
|
11
|
+
type: s3_per_record
|
12
|
+
data_columns: [data]
|
13
|
+
mode: single_column
|
14
|
+
bucket: {{ env.S3_BUCKET }}
|
15
|
+
key: "${key}"
|
16
|
+
serializer: msgpack
|
17
|
+
column_options:
|
18
|
+
data: {format: "%Y/%m/%d %H:%M:%S %z"}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "example_timestamp.json"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "key", type: "string"}
|
8
|
+
- {name: "data", type: "timestamp", format: "%Y-%m-%d %H:%M:%S", timezone: "Asia/Tokyo"}
|
9
|
+
|
10
|
+
out:
|
11
|
+
type: s3_per_record
|
12
|
+
data_columns: [data]
|
13
|
+
mode: single_column
|
14
|
+
bucket: {{ env.S3_BUCKET }}
|
15
|
+
key: "${key}"
|
16
|
+
serializer: json
|
data/gradlew.bat
CHANGED
@@ -1,90 +1,90 @@
|
|
1
|
-
@if "%DEBUG%" == "" @echo off
|
2
|
-
@rem ##########################################################################
|
3
|
-
@rem
|
4
|
-
@rem Gradle startup script for Windows
|
5
|
-
@rem
|
6
|
-
@rem ##########################################################################
|
7
|
-
|
8
|
-
@rem Set local scope for the variables with windows NT shell
|
9
|
-
if "%OS%"=="Windows_NT" setlocal
|
10
|
-
|
11
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
-
set DEFAULT_JVM_OPTS=
|
13
|
-
|
14
|
-
set DIRNAME=%~dp0
|
15
|
-
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
-
set APP_BASE_NAME=%~n0
|
17
|
-
set APP_HOME=%DIRNAME%
|
18
|
-
|
19
|
-
@rem Find java.exe
|
20
|
-
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
-
|
22
|
-
set JAVA_EXE=java.exe
|
23
|
-
%JAVA_EXE% -version >NUL 2>&1
|
24
|
-
if "%ERRORLEVEL%" == "0" goto init
|
25
|
-
|
26
|
-
echo.
|
27
|
-
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
-
echo.
|
29
|
-
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
-
echo location of your Java installation.
|
31
|
-
|
32
|
-
goto fail
|
33
|
-
|
34
|
-
:findJavaFromJavaHome
|
35
|
-
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
-
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
-
|
38
|
-
if exist "%JAVA_EXE%" goto init
|
39
|
-
|
40
|
-
echo.
|
41
|
-
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
-
echo.
|
43
|
-
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
-
echo location of your Java installation.
|
45
|
-
|
46
|
-
goto fail
|
47
|
-
|
48
|
-
:init
|
49
|
-
@rem Get command-line arguments, handling Windowz variants
|
50
|
-
|
51
|
-
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
-
|
54
|
-
:win9xME_args
|
55
|
-
@rem Slurp the command line arguments.
|
56
|
-
set CMD_LINE_ARGS=
|
57
|
-
set _SKIP=2
|
58
|
-
|
59
|
-
:win9xME_args_slurp
|
60
|
-
if "x%~1" == "x" goto execute
|
61
|
-
|
62
|
-
set CMD_LINE_ARGS=%*
|
63
|
-
goto execute
|
64
|
-
|
65
|
-
:4NT_args
|
66
|
-
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
-
set CMD_LINE_ARGS=%$
|
68
|
-
|
69
|
-
:execute
|
70
|
-
@rem Setup the command line
|
71
|
-
|
72
|
-
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
-
|
74
|
-
@rem Execute Gradle
|
75
|
-
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
-
|
77
|
-
:end
|
78
|
-
@rem End local scope for the variables with windows NT shell
|
79
|
-
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
-
|
81
|
-
:fail
|
82
|
-
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
-
rem the _cmd.exe /c_ return code!
|
84
|
-
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
-
exit /b 1
|
86
|
-
|
87
|
-
:mainEnd
|
88
|
-
if "%OS%"=="Windows_NT" endlocal
|
89
|
-
|
90
|
-
:omega
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
+
set DEFAULT_JVM_OPTS=
|
13
|
+
|
14
|
+
set DIRNAME=%~dp0
|
15
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
+
set APP_BASE_NAME=%~n0
|
17
|
+
set APP_HOME=%DIRNAME%
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windowz variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
+
|
54
|
+
:win9xME_args
|
55
|
+
@rem Slurp the command line arguments.
|
56
|
+
set CMD_LINE_ARGS=
|
57
|
+
set _SKIP=2
|
58
|
+
|
59
|
+
:win9xME_args_slurp
|
60
|
+
if "x%~1" == "x" goto execute
|
61
|
+
|
62
|
+
set CMD_LINE_ARGS=%*
|
63
|
+
goto execute
|
64
|
+
|
65
|
+
:4NT_args
|
66
|
+
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
+
set CMD_LINE_ARGS=%$
|
68
|
+
|
69
|
+
:execute
|
70
|
+
@rem Setup the command line
|
71
|
+
|
72
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
+
|
74
|
+
@rem Execute Gradle
|
75
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
+
|
77
|
+
:end
|
78
|
+
@rem End local scope for the variables with windows NT shell
|
79
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
+
|
81
|
+
:fail
|
82
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
+
rem the _cmd.exe /c_ return code!
|
84
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
+
exit /b 1
|
86
|
+
|
87
|
+
:mainEnd
|
88
|
+
if "%OS%"=="Windows_NT" endlocal
|
89
|
+
|
90
|
+
:omega
|
@@ -1,23 +1,28 @@
|
|
1
1
|
package org.embulk.output.s3_per_record;
|
2
2
|
|
3
|
-
import
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import java.nio.charset.StandardCharsets;
|
7
|
-
import java.util.ArrayList;
|
8
|
-
import java.util.List;
|
9
|
-
import java.util.concurrent.atomic.AtomicLong;
|
10
|
-
|
11
|
-
import javax.validation.constraints.NotNull;
|
12
|
-
|
3
|
+
import com.amazonaws.auth.AWSCredentials;
|
4
|
+
import com.amazonaws.auth.BasicAWSCredentials;
|
5
|
+
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
|
13
6
|
import com.amazonaws.services.s3.model.AmazonS3Exception;
|
7
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
8
|
+
import com.amazonaws.services.s3.transfer.TransferManager;
|
9
|
+
import com.amazonaws.services.s3.transfer.Upload;
|
10
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
11
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
12
|
+
import com.google.common.base.Optional;
|
14
13
|
import org.embulk.config.Config;
|
15
14
|
import org.embulk.config.ConfigDefault;
|
16
15
|
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigException;
|
17
17
|
import org.embulk.config.ConfigSource;
|
18
18
|
import org.embulk.config.Task;
|
19
19
|
import org.embulk.config.TaskReport;
|
20
20
|
import org.embulk.config.TaskSource;
|
21
|
+
import org.embulk.output.s3_per_record.visitor.JsonMultiColumnVisitor;
|
22
|
+
import org.embulk.output.s3_per_record.visitor.JsonSingleColumnVisitor;
|
23
|
+
import org.embulk.output.s3_per_record.visitor.MessagePackMultiColumnVisitor;
|
24
|
+
import org.embulk.output.s3_per_record.visitor.MessagePackSingleColumnVisitor;
|
25
|
+
import org.embulk.output.s3_per_record.visitor.S3PerRecordOutputColumnVisitor;
|
21
26
|
import org.embulk.spi.Column;
|
22
27
|
import org.embulk.spi.Exec;
|
23
28
|
import org.embulk.spi.OutputPlugin;
|
@@ -25,17 +30,20 @@ import org.embulk.spi.Page;
|
|
25
30
|
import org.embulk.spi.PageReader;
|
26
31
|
import org.embulk.spi.Schema;
|
27
32
|
import org.embulk.spi.TransactionalPageOutput;
|
28
|
-
|
29
|
-
import
|
30
|
-
import com.amazonaws.auth.BasicAWSCredentials;
|
31
|
-
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
|
32
|
-
import com.amazonaws.services.s3.model.ObjectMetadata;
|
33
|
-
import com.amazonaws.services.s3.transfer.TransferManager;
|
34
|
-
import com.amazonaws.services.s3.transfer.Upload;
|
35
|
-
import com.amazonaws.util.Base64;
|
36
|
-
import com.google.common.base.Optional;
|
33
|
+
import org.embulk.spi.time.TimestampFormatter;
|
34
|
+
import org.embulk.spi.util.Timestamps;
|
37
35
|
import org.slf4j.Logger;
|
38
36
|
|
37
|
+
import javax.validation.constraints.NotNull;
|
38
|
+
import java.io.ByteArrayInputStream;
|
39
|
+
import java.io.IOException;
|
40
|
+
import java.io.InputStream;
|
41
|
+
import java.util.ArrayList;
|
42
|
+
import java.util.List;
|
43
|
+
import java.util.Locale;
|
44
|
+
import java.util.Map;
|
45
|
+
import java.util.concurrent.atomic.AtomicLong;
|
46
|
+
|
39
47
|
|
40
48
|
public class S3PerRecordOutputPlugin
|
41
49
|
implements OutputPlugin
|
@@ -50,7 +58,7 @@ public class S3PerRecordOutputPlugin
|
|
50
58
|
private static long startTime = System.currentTimeMillis();
|
51
59
|
|
52
60
|
public interface PluginTask
|
53
|
-
extends Task
|
61
|
+
extends Task, TimestampFormatter.Task
|
54
62
|
{
|
55
63
|
// S3 bucket name.
|
56
64
|
@Config("bucket")
|
@@ -61,8 +69,9 @@ public class S3PerRecordOutputPlugin
|
|
61
69
|
String getKey();
|
62
70
|
|
63
71
|
// Column name.
|
64
|
-
@Config("
|
65
|
-
|
72
|
+
@Config("data_columns")
|
73
|
+
@ConfigDefault("[]")
|
74
|
+
List<String> getDataColumns();
|
66
75
|
|
67
76
|
// AWS access key id.
|
68
77
|
@Config("aws_access_key_id")
|
@@ -74,17 +83,28 @@ public class S3PerRecordOutputPlugin
|
|
74
83
|
@ConfigDefault("null")
|
75
84
|
Optional<String> getAwsSecretAccessKey();
|
76
85
|
|
77
|
-
|
78
|
-
@
|
79
|
-
|
80
|
-
|
86
|
+
@Config("serializer")
|
87
|
+
@ConfigDefault("msgpack")
|
88
|
+
Serializer getSerializer();
|
89
|
+
|
90
|
+
@Config("mode")
|
91
|
+
@ConfigDefault("multi_column")
|
92
|
+
Mode getMode();
|
81
93
|
|
82
94
|
// Set retry limit. Default is 2.
|
83
95
|
@Config("retry_limit")
|
84
96
|
@ConfigDefault("2")
|
85
97
|
Integer getRetryLimit();
|
98
|
+
|
99
|
+
@Config("column_options")
|
100
|
+
@ConfigDefault("{}")
|
101
|
+
Map<String, TimestampColumnOption> getColumnOptions();
|
86
102
|
}
|
87
103
|
|
104
|
+
public interface TimestampColumnOption
|
105
|
+
extends Task, TimestampFormatter.TimestampColumnOption
|
106
|
+
{ }
|
107
|
+
|
88
108
|
@Override
|
89
109
|
public ConfigDiff transaction(ConfigSource config,
|
90
110
|
Schema schema, int taskCount,
|
@@ -124,18 +144,22 @@ public class S3PerRecordOutputPlugin
|
|
124
144
|
private PageReader pageReader;
|
125
145
|
private final String bucket;
|
126
146
|
private final List<KeyPart> keyPattern;
|
127
|
-
private final
|
147
|
+
private final List<String> dataColumns;
|
128
148
|
private final Schema schema;
|
129
|
-
private final boolean decodeBase64;
|
130
149
|
private final int retryLimit;
|
150
|
+
private final Serializer serializer;
|
151
|
+
private final Mode mode;
|
152
|
+
private final TimestampFormatter[] timestampFormatters;
|
131
153
|
|
132
154
|
public S3PerRecordPageOutput(PluginTask task, Schema schema) {
|
133
155
|
this.schema = schema;
|
134
156
|
bucket = task.getBucket();
|
135
157
|
keyPattern = makeKeyPattern(task.getKey());
|
136
|
-
|
137
|
-
decodeBase64 = task.getBase64();
|
158
|
+
dataColumns = task.getDataColumns();
|
138
159
|
retryLimit = task.getRetryLimit();
|
160
|
+
serializer = task.getSerializer();
|
161
|
+
mode = task.getMode();
|
162
|
+
timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
|
139
163
|
|
140
164
|
AWSCredentials credentials;
|
141
165
|
if (task.getAwsAccessKeyId().isPresent() && task.getAwsSecretAccessKey().isPresent()) {
|
@@ -176,15 +200,9 @@ public class S3PerRecordOutputPlugin
|
|
176
200
|
pageReader.setPage(page);
|
177
201
|
|
178
202
|
while (pageReader.nextRecord()) {
|
179
|
-
String key = buildKey(pageReader);
|
180
|
-
|
181
|
-
|
182
|
-
byte[] payloadBytes;
|
183
|
-
if (decodeBase64) {
|
184
|
-
payloadBytes = Base64.decode(payload);
|
185
|
-
} else {
|
186
|
-
payloadBytes = payload.getBytes(StandardCharsets.UTF_8);
|
187
|
-
}
|
203
|
+
final String key = buildKey(pageReader);
|
204
|
+
final byte[] payloadBytes = serializer.serialize(mode, pageReader, schema, dataColumns, timestampFormatters);
|
205
|
+
|
188
206
|
ObjectMetadata metadata = new ObjectMetadata();
|
189
207
|
metadata.setContentLength(payloadBytes.length);
|
190
208
|
|
@@ -286,4 +304,96 @@ public class S3PerRecordOutputPlugin
|
|
286
304
|
return reader.getString(column);
|
287
305
|
}
|
288
306
|
}
|
307
|
+
|
308
|
+
public enum Serializer {
|
309
|
+
MSGPACK {
|
310
|
+
@Override
|
311
|
+
public byte[] serialize(Mode mode, PageReader reader, Schema schema, List<String> dataColumns, TimestampFormatter[] timestampFormatters) {
|
312
|
+
S3PerRecordOutputColumnVisitor visitor;
|
313
|
+
switch(mode) {
|
314
|
+
case SINGLE_COLUMN:
|
315
|
+
visitor = new MessagePackSingleColumnVisitor(reader, timestampFormatters);
|
316
|
+
schema.lookupColumn(dataColumns.get(0)).visit(visitor);
|
317
|
+
break;
|
318
|
+
case MULTI_COLUMN:
|
319
|
+
visitor = new MessagePackMultiColumnVisitor(reader, timestampFormatters);
|
320
|
+
for (String columnName : dataColumns) {
|
321
|
+
schema.lookupColumn(columnName).visit(visitor);
|
322
|
+
}
|
323
|
+
break;
|
324
|
+
default:
|
325
|
+
throw new RuntimeException("never reach here");
|
326
|
+
}
|
327
|
+
return visitor.getByteArray();
|
328
|
+
}
|
329
|
+
},
|
330
|
+
JSON {
|
331
|
+
@Override
|
332
|
+
public byte[] serialize(Mode mode, PageReader reader, Schema schema, List<String> dataColumns, TimestampFormatter[] timestampFormatters) {
|
333
|
+
S3PerRecordOutputColumnVisitor visitor;
|
334
|
+
switch(mode) {
|
335
|
+
case SINGLE_COLUMN:
|
336
|
+
visitor = new JsonSingleColumnVisitor(reader, timestampFormatters);
|
337
|
+
schema.lookupColumn(dataColumns.get(0)).visit(visitor);
|
338
|
+
break;
|
339
|
+
case MULTI_COLUMN:
|
340
|
+
visitor = new JsonMultiColumnVisitor(reader, timestampFormatters);
|
341
|
+
for (String columnName : dataColumns) {
|
342
|
+
schema.lookupColumn(columnName).visit(visitor);
|
343
|
+
}
|
344
|
+
break;
|
345
|
+
default:
|
346
|
+
throw new RuntimeException("never reach here");
|
347
|
+
}
|
348
|
+
return visitor.getByteArray();
|
349
|
+
}
|
350
|
+
};
|
351
|
+
|
352
|
+
public abstract byte[] serialize(Mode mode, PageReader reader, Schema schema, List<String> dataColumns, TimestampFormatter[] timestampFormatters);
|
353
|
+
|
354
|
+
@JsonValue
|
355
|
+
@Override
|
356
|
+
public String toString()
|
357
|
+
{
|
358
|
+
return name().toLowerCase(Locale.ENGLISH);
|
359
|
+
}
|
360
|
+
|
361
|
+
@JsonCreator
|
362
|
+
public static Serializer fromString(String name)
|
363
|
+
{
|
364
|
+
switch(name) {
|
365
|
+
case "msgpack":
|
366
|
+
return MSGPACK;
|
367
|
+
case "json":
|
368
|
+
return JSON;
|
369
|
+
default:
|
370
|
+
throw new ConfigException(String.format("Unknown format '%s'. Supported formats are msgpack only", name));
|
371
|
+
}
|
372
|
+
}
|
373
|
+
}
|
374
|
+
|
375
|
+
public enum Mode {
|
376
|
+
SINGLE_COLUMN,
|
377
|
+
MULTI_COLUMN;
|
378
|
+
|
379
|
+
@JsonValue
|
380
|
+
@Override
|
381
|
+
public String toString()
|
382
|
+
{
|
383
|
+
return name().toLowerCase(Locale.ENGLISH);
|
384
|
+
}
|
385
|
+
|
386
|
+
@JsonCreator
|
387
|
+
public static Mode fromString(String name)
|
388
|
+
{
|
389
|
+
switch(name) {
|
390
|
+
case "single_column":
|
391
|
+
return SINGLE_COLUMN;
|
392
|
+
case "multi_column":
|
393
|
+
return MULTI_COLUMN;
|
394
|
+
default:
|
395
|
+
throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are single_column, multi_column", name));
|
396
|
+
}
|
397
|
+
}
|
398
|
+
}
|
289
399
|
}
|
@@ -0,0 +1,73 @@
|
|
1
|
+
package org.embulk.output.s3_per_record.visitor;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.PageReader;
|
5
|
+
import org.embulk.spi.time.TimestampFormatter;
|
6
|
+
import org.msgpack.core.MessageBufferPacker;
|
7
|
+
import org.msgpack.core.MessagePack;
|
8
|
+
import org.msgpack.value.Value;
|
9
|
+
import org.msgpack.value.ValueFactory;
|
10
|
+
|
11
|
+
import java.io.IOException;
|
12
|
+
import java.nio.charset.StandardCharsets;
|
13
|
+
|
14
|
+
public class JsonMultiColumnVisitor implements S3PerRecordOutputColumnVisitor {
|
15
|
+
final PageReader reader;
|
16
|
+
final TimestampFormatter[] timestampFormatters;
|
17
|
+
public final ValueFactory.MapBuilder builder;
|
18
|
+
|
19
|
+
public JsonMultiColumnVisitor(PageReader reader, TimestampFormatter[] timestampFormatters) {
|
20
|
+
this.reader = reader;
|
21
|
+
this.timestampFormatters = timestampFormatters;
|
22
|
+
this.builder = new ValueFactory.MapBuilder();
|
23
|
+
}
|
24
|
+
|
25
|
+
public byte[] getByteArray() {
|
26
|
+
Value value = builder.build();
|
27
|
+
String json = value.toJson();
|
28
|
+
return json.getBytes(StandardCharsets.UTF_8);
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public void booleanColumn(Column column) {
|
33
|
+
Value columnName = ValueFactory.newString(column.getName());
|
34
|
+
Value value = ValueFactory.newBoolean(reader.getBoolean(column));
|
35
|
+
builder.put(columnName, value);
|
36
|
+
}
|
37
|
+
|
38
|
+
@Override
|
39
|
+
public void longColumn(Column column) {
|
40
|
+
Value columnName = ValueFactory.newString(column.getName());
|
41
|
+
Value value = ValueFactory.newInteger(reader.getLong(column));
|
42
|
+
builder.put(columnName, value);
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public void doubleColumn(Column column) {
|
47
|
+
Value columnName = ValueFactory.newString(column.getName());
|
48
|
+
Value value = ValueFactory.newFloat(reader.getDouble(column));
|
49
|
+
builder.put(columnName, value);
|
50
|
+
}
|
51
|
+
|
52
|
+
@Override
|
53
|
+
public void stringColumn(Column column) {
|
54
|
+
Value columnName = ValueFactory.newString(column.getName());
|
55
|
+
Value value = ValueFactory.newString(reader.getString(column));
|
56
|
+
builder.put(columnName, value);
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
public void timestampColumn(Column column) {
|
61
|
+
Value columnName = ValueFactory.newString(column.getName());
|
62
|
+
TimestampFormatter formatter = timestampFormatters[column.getIndex()];
|
63
|
+
Value value = ValueFactory.newString(formatter.format(reader.getTimestamp(column)));
|
64
|
+
builder.put(columnName, value);
|
65
|
+
}
|
66
|
+
|
67
|
+
@Override
|
68
|
+
public void jsonColumn(Column column) {
|
69
|
+
Value columnName = ValueFactory.newString(column.getName());
|
70
|
+
Value value = reader.getJson(column);
|
71
|
+
builder.put(columnName, value);
|
72
|
+
}
|
73
|
+
}
|
@@ -0,0 +1,63 @@
|
|
1
|
+
package org.embulk.output.s3_per_record.visitor;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.PageReader;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.embulk.spi.time.TimestampFormatter;
|
7
|
+
import org.msgpack.value.Value;
|
8
|
+
import org.msgpack.value.ValueFactory;
|
9
|
+
|
10
|
+
import java.nio.charset.StandardCharsets;
|
11
|
+
|
12
|
+
public class JsonSingleColumnVisitor implements S3PerRecordOutputColumnVisitor {
|
13
|
+
final PageReader reader;
|
14
|
+
final TimestampFormatter[] timestampFormatters;
|
15
|
+
final StringBuilder sb;
|
16
|
+
|
17
|
+
public JsonSingleColumnVisitor(PageReader reader, TimestampFormatter[] timestampFormatters) {
|
18
|
+
this.reader = reader;
|
19
|
+
this.timestampFormatters = timestampFormatters;
|
20
|
+
this.sb = new StringBuilder();
|
21
|
+
}
|
22
|
+
|
23
|
+
public byte[] getByteArray() {
|
24
|
+
return sb.toString().getBytes(StandardCharsets.UTF_8);
|
25
|
+
}
|
26
|
+
|
27
|
+
@Override
|
28
|
+
public void booleanColumn(Column column) {
|
29
|
+
boolean value = reader.getBoolean(column);
|
30
|
+
sb.append(value);
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public void longColumn(Column column) {
|
35
|
+
long value = reader.getLong(column);
|
36
|
+
sb.append(value);
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public void doubleColumn(Column column) {
|
41
|
+
double value = reader.getDouble(column);
|
42
|
+
sb.append(value);
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public void stringColumn(Column column) {
|
47
|
+
String value = reader.getString(column);
|
48
|
+
sb.append(ValueFactory.newString(value).toJson());
|
49
|
+
}
|
50
|
+
|
51
|
+
@Override
|
52
|
+
public void timestampColumn(Column column) {
|
53
|
+
Timestamp value = reader.getTimestamp(column);
|
54
|
+
TimestampFormatter formatter = timestampFormatters[column.getIndex()];
|
55
|
+
sb.append(ValueFactory.newString(formatter.format(value)).toJson());
|
56
|
+
}
|
57
|
+
|
58
|
+
@Override
|
59
|
+
public void jsonColumn(Column column) {
|
60
|
+
Value value = reader.getJson(column);
|
61
|
+
sb.append(value.toJson());
|
62
|
+
}
|
63
|
+
}
|
@@ -0,0 +1,77 @@
|
|
1
|
+
package org.embulk.output.s3_per_record.visitor;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.PageReader;
|
5
|
+
import org.embulk.spi.time.TimestampFormatter;
|
6
|
+
import org.msgpack.core.MessageBufferPacker;
|
7
|
+
import org.msgpack.core.MessagePack;
|
8
|
+
import org.msgpack.value.Value;
|
9
|
+
import org.msgpack.value.ValueFactory;
|
10
|
+
|
11
|
+
import java.io.IOException;
|
12
|
+
|
13
|
+
public class MessagePackMultiColumnVisitor implements S3PerRecordOutputColumnVisitor {
|
14
|
+
final PageReader reader;
|
15
|
+
final TimestampFormatter[] timestampFormatters;
|
16
|
+
public final ValueFactory.MapBuilder builder;
|
17
|
+
|
18
|
+
public MessagePackMultiColumnVisitor(PageReader reader, TimestampFormatter[] timestampFormatters) {
|
19
|
+
this.reader = reader;
|
20
|
+
this.timestampFormatters = timestampFormatters;
|
21
|
+
this.builder = new ValueFactory.MapBuilder();
|
22
|
+
}
|
23
|
+
|
24
|
+
public byte[] getByteArray() {
|
25
|
+
Value value = builder.build();
|
26
|
+
MessageBufferPacker packer = MessagePack.newDefaultBufferPacker();
|
27
|
+
try {
|
28
|
+
value.writeTo(packer);
|
29
|
+
return packer.toByteArray();
|
30
|
+
} catch (IOException e) {
|
31
|
+
throw new RuntimeException("cannot write to msgpack");
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
@Override
|
36
|
+
public void booleanColumn(Column column) {
|
37
|
+
Value columnName = ValueFactory.newString(column.getName());
|
38
|
+
Value value = ValueFactory.newBoolean(reader.getBoolean(column));
|
39
|
+
builder.put(columnName, value);
|
40
|
+
}
|
41
|
+
|
42
|
+
@Override
|
43
|
+
public void longColumn(Column column) {
|
44
|
+
Value columnName = ValueFactory.newString(column.getName());
|
45
|
+
Value value = ValueFactory.newInteger(reader.getLong(column));
|
46
|
+
builder.put(columnName, value);
|
47
|
+
}
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public void doubleColumn(Column column) {
|
51
|
+
Value columnName = ValueFactory.newString(column.getName());
|
52
|
+
Value value = ValueFactory.newFloat(reader.getDouble(column));
|
53
|
+
builder.put(columnName, value);
|
54
|
+
}
|
55
|
+
|
56
|
+
@Override
|
57
|
+
public void stringColumn(Column column) {
|
58
|
+
Value columnName = ValueFactory.newString(column.getName());
|
59
|
+
Value value = ValueFactory.newString(reader.getString(column));
|
60
|
+
builder.put(columnName, value);
|
61
|
+
}
|
62
|
+
|
63
|
+
@Override
|
64
|
+
public void timestampColumn(Column column) {
|
65
|
+
Value columnName = ValueFactory.newString(column.getName());
|
66
|
+
TimestampFormatter formatter = timestampFormatters[column.getIndex()];
|
67
|
+
Value value = ValueFactory.newString(formatter.format(reader.getTimestamp(column)));
|
68
|
+
builder.put(columnName, value);
|
69
|
+
}
|
70
|
+
|
71
|
+
@Override
|
72
|
+
public void jsonColumn(Column column) {
|
73
|
+
Value columnName = ValueFactory.newString(column.getName());
|
74
|
+
Value value = reader.getJson(column);
|
75
|
+
builder.put(columnName, value);
|
76
|
+
}
|
77
|
+
}
|
data/src/main/java/org/embulk/output/s3_per_record/visitor/MessagePackSingleColumnVisitor.java
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
package org.embulk.output.s3_per_record.visitor;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.PageReader;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.embulk.spi.time.TimestampFormatter;
|
7
|
+
import org.msgpack.core.MessageBufferPacker;
|
8
|
+
import org.msgpack.core.MessagePack;
|
9
|
+
import org.msgpack.value.Value;
|
10
|
+
|
11
|
+
import java.io.IOException;
|
12
|
+
|
13
|
+
public class MessagePackSingleColumnVisitor implements S3PerRecordOutputColumnVisitor {
|
14
|
+
final PageReader reader;
|
15
|
+
final TimestampFormatter[] timestampFormatters;
|
16
|
+
final MessageBufferPacker packer;
|
17
|
+
|
18
|
+
public MessagePackSingleColumnVisitor(PageReader reader, TimestampFormatter[] timestampFormatters) {
|
19
|
+
this.reader = reader;
|
20
|
+
this.timestampFormatters = timestampFormatters;
|
21
|
+
this.packer = MessagePack.newDefaultBufferPacker();
|
22
|
+
}
|
23
|
+
|
24
|
+
public byte[] getByteArray() {
|
25
|
+
return packer.toByteArray();
|
26
|
+
}
|
27
|
+
|
28
|
+
@Override
|
29
|
+
public void booleanColumn(Column column) {
|
30
|
+
boolean value = reader.getBoolean(column);
|
31
|
+
try {
|
32
|
+
packer.packBoolean(value);
|
33
|
+
} catch (IOException e) {
|
34
|
+
throw new RuntimeException("cannot write to msgpack");
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
@Override
|
39
|
+
public void longColumn(Column column) {
|
40
|
+
long value = reader.getLong(column);
|
41
|
+
try {
|
42
|
+
packer.packLong(value);
|
43
|
+
} catch (IOException e) {
|
44
|
+
throw new RuntimeException("cannot write to msgpack");
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
@Override
|
49
|
+
public void doubleColumn(Column column) {
|
50
|
+
double value = reader.getDouble(column);
|
51
|
+
try {
|
52
|
+
packer.packDouble(value);
|
53
|
+
} catch (IOException e) {
|
54
|
+
throw new RuntimeException("cannot write to msgpack");
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
@Override
|
59
|
+
public void stringColumn(Column column) {
|
60
|
+
String value = reader.getString(column);
|
61
|
+
try {
|
62
|
+
packer.packString(value);
|
63
|
+
} catch (IOException e) {
|
64
|
+
throw new RuntimeException("cannot write to msgpack");
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
@Override
|
69
|
+
public void timestampColumn(Column column) {
|
70
|
+
Timestamp value = reader.getTimestamp(column);
|
71
|
+
TimestampFormatter formatter = timestampFormatters[column.getIndex()];
|
72
|
+
try {
|
73
|
+
packer.packString(formatter.format(value));
|
74
|
+
} catch (IOException e) {
|
75
|
+
throw new RuntimeException("cannot write to msgpack");
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public void jsonColumn(Column column) {
|
81
|
+
Value value = reader.getJson(column);
|
82
|
+
try {
|
83
|
+
value.writeTo(packer);
|
84
|
+
} catch (IOException e) {
|
85
|
+
throw new RuntimeException("cannot write to msgpack");
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-s3_per_record
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomykaira
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - ~>
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.0'
|
19
|
-
|
20
|
-
prerelease: false
|
21
|
-
type: :development
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
23
21
|
requirements:
|
24
22
|
- - ~>
|
25
23
|
- !ruby/object:Gem::Version
|
26
24
|
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '10.0'
|
33
|
-
|
34
|
-
prerelease: false
|
35
|
-
type: :development
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
37
35
|
requirements:
|
38
36
|
- - '>='
|
39
37
|
- !ruby/object:Gem::Version
|
40
38
|
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
41
|
description: Upload a column's value to S3 as one S3 object per row.
|
42
42
|
email:
|
43
43
|
- tomykaira@gmail.com
|
@@ -51,22 +51,50 @@ files:
|
|
51
51
|
- build.gradle
|
52
52
|
- config/checkstyle/checkstyle.xml
|
53
53
|
- config/checkstyle/default.xml
|
54
|
+
- example/.bundle/config
|
55
|
+
- example/Gemfile
|
56
|
+
- example/Gemfile.lock
|
57
|
+
- example/example_array.json
|
58
|
+
- example/example_array.yml.liquid
|
59
|
+
- example/example_long.json
|
60
|
+
- example/example_long.yml.liquid
|
61
|
+
- example/example_map.json
|
62
|
+
- example/example_map.yml.liquid
|
63
|
+
- example/example_map_json.yml.liquid
|
64
|
+
- example/example_multicolumn.json
|
65
|
+
- example/example_multicolumn.yml.liquid
|
66
|
+
- example/example_multicolumn_json.yml.liquid
|
67
|
+
- example/example_string.json
|
68
|
+
- example/example_string.yml.liquid
|
69
|
+
- example/example_string_json.yml.liquid
|
70
|
+
- example/example_timestamp.json
|
71
|
+
- example/example_timestamp.yml.liquid
|
72
|
+
- example/example_timestamp_json.yml.liquid
|
54
73
|
- gradle/wrapper/gradle-wrapper.jar
|
55
74
|
- gradle/wrapper/gradle-wrapper.properties
|
56
75
|
- gradlew
|
57
76
|
- gradlew.bat
|
58
77
|
- lib/embulk/output/s3_per_record.rb
|
59
78
|
- src/main/java/org/embulk/output/s3_per_record/S3PerRecordOutputPlugin.java
|
79
|
+
- src/main/java/org/embulk/output/s3_per_record/visitor/JsonMultiColumnVisitor.java
|
80
|
+
- src/main/java/org/embulk/output/s3_per_record/visitor/JsonSingleColumnVisitor.java
|
81
|
+
- src/main/java/org/embulk/output/s3_per_record/visitor/MessagePackMultiColumnVisitor.java
|
82
|
+
- src/main/java/org/embulk/output/s3_per_record/visitor/MessagePackSingleColumnVisitor.java
|
83
|
+
- src/main/java/org/embulk/output/s3_per_record/visitor/S3PerRecordOutputColumnVisitor.java
|
60
84
|
- src/test/java/org/embulk/output/s3_per_record/TestS3PerRecordOutputPlugin.java
|
61
|
-
- classpath/jackson-dataformat-cbor-2.5.3.jar
|
62
|
-
- classpath/httpclient-4.3.6.jar
|
63
|
-
- classpath/commons-logging-1.1.3.jar
|
64
|
-
- classpath/commons-codec-1.6.jar
|
65
|
-
- classpath/aws-java-sdk-s3-1.10.71.jar
|
66
85
|
- classpath/aws-java-sdk-core-1.10.71.jar
|
67
|
-
- classpath/embulk-output-s3_per_record-0.3.1.jar
|
68
|
-
- classpath/httpcore-4.3.3.jar
|
69
86
|
- classpath/aws-java-sdk-kms-1.10.71.jar
|
87
|
+
- classpath/aws-java-sdk-s3-1.10.71.jar
|
88
|
+
- classpath/commons-codec-1.6.jar
|
89
|
+
- classpath/commons-logging-1.1.3.jar
|
90
|
+
- classpath/embulk-output-s3_per_record-0.4.0.jar
|
91
|
+
- classpath/httpclient-4.3.6.jar
|
92
|
+
- classpath/httpcore-4.3.3.jar
|
93
|
+
- classpath/jackson-annotations-2.7.0.jar
|
94
|
+
- classpath/jackson-core-2.7.1.jar
|
95
|
+
- classpath/jackson-databind-2.7.1.jar
|
96
|
+
- classpath/jackson-dataformat-cbor-2.5.3.jar
|
97
|
+
- classpath/jackson-dataformat-msgpack-0.8.3.jar
|
70
98
|
homepage: https://github.com/tomykaira/embulk-output-s3_per_record
|
71
99
|
licenses:
|
72
100
|
- MIT
|