embulk-output-elasticsearch5 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Binary file
@@ -0,0 +1,6 @@
1
+ #Sat Dec 31 16:12:53 CST 2016
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-all.zip
@@ -0,0 +1,6 @@
1
+ #Wed Jan 13 12:41:02 JST 2016
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
data/gradlew ADDED
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # Attempt to set APP_HOME
46
+ # Resolve links: $0 may be a link
47
+ PRG="$0"
48
+ # Need this for relative symlinks.
49
+ while [ -h "$PRG" ] ; do
50
+ ls=`ls -ld "$PRG"`
51
+ link=`expr "$ls" : '.*-> \(.*\)$'`
52
+ if expr "$link" : '/.*' > /dev/null; then
53
+ PRG="$link"
54
+ else
55
+ PRG=`dirname "$PRG"`"/$link"
56
+ fi
57
+ done
58
+ SAVED="`pwd`"
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
60
+ APP_HOME="`pwd -P`"
61
+ cd "$SAVED" >/dev/null
62
+
63
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
+
65
+ # Determine the Java command to use to start the JVM.
66
+ if [ -n "$JAVA_HOME" ] ; then
67
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
68
+ # IBM's JDK on AIX uses strange locations for the executables
69
+ JAVACMD="$JAVA_HOME/jre/sh/java"
70
+ else
71
+ JAVACMD="$JAVA_HOME/bin/java"
72
+ fi
73
+ if [ ! -x "$JAVACMD" ] ; then
74
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
75
+
76
+ Please set the JAVA_HOME variable in your environment to match the
77
+ location of your Java installation."
78
+ fi
79
+ else
80
+ JAVACMD="java"
81
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
82
+
83
+ Please set the JAVA_HOME variable in your environment to match the
84
+ location of your Java installation."
85
+ fi
86
+
87
+ # Increase the maximum file descriptors if we can.
88
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
89
+ MAX_FD_LIMIT=`ulimit -H -n`
90
+ if [ $? -eq 0 ] ; then
91
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
92
+ MAX_FD="$MAX_FD_LIMIT"
93
+ fi
94
+ ulimit -n $MAX_FD
95
+ if [ $? -ne 0 ] ; then
96
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
97
+ fi
98
+ else
99
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
100
+ fi
101
+ fi
102
+
103
+ # For Darwin, add options to specify how the application appears in the dock
104
+ if $darwin; then
105
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
106
+ fi
107
+
108
+ # For Cygwin, switch paths to Windows format before running java
109
+ if $cygwin ; then
110
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
111
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
113
+
114
+ # We build the pattern for arguments to be converted via cygpath
115
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
116
+ SEP=""
117
+ for dir in $ROOTDIRSRAW ; do
118
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
119
+ SEP="|"
120
+ done
121
+ OURCYGPATTERN="(^($ROOTDIRS))"
122
+ # Add a user-defined pattern to the cygpath arguments
123
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
124
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
125
+ fi
126
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
127
+ i=0
128
+ for arg in "$@" ; do
129
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
130
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
131
+
132
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
133
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
134
+ else
135
+ eval `echo args$i`="\"$arg\""
136
+ fi
137
+ i=$((i+1))
138
+ done
139
+ case $i in
140
+ (0) set -- ;;
141
+ (1) set -- "$args0" ;;
142
+ (2) set -- "$args0" "$args1" ;;
143
+ (3) set -- "$args0" "$args1" "$args2" ;;
144
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
145
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
146
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
147
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
148
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
149
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
150
+ esac
151
+ fi
152
+
153
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
154
+ function splitJvmOpts() {
155
+ JVM_OPTS=("$@")
156
+ }
157
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
+
160
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_output(
2
+ :elasticsearch, "org.embulk.output.elasticsearch.ElasticsearchOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
data/settings.gradle ADDED
@@ -0,0 +1 @@
1
+ rootProject.name = 'embulk-output-elasticsearch5'
@@ -0,0 +1,630 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
4
+ import com.fasterxml.jackson.annotation.JsonCreator;
5
+ import com.fasterxml.jackson.annotation.JsonValue;
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.base.Throwables;
8
+ import com.google.inject.Inject;
9
+ import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
10
+ import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
11
+ import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
12
+ import org.elasticsearch.action.bulk.BulkItemResponse;
13
+ import org.elasticsearch.action.bulk.BulkProcessor;
14
+ import org.elasticsearch.action.bulk.BulkRequest;
15
+ import org.elasticsearch.action.bulk.BulkResponse;
16
+ import org.elasticsearch.action.index.IndexRequest;
17
+ import org.elasticsearch.client.Client;
18
+ import org.elasticsearch.client.Requests;
19
+ import org.elasticsearch.client.transport.NoNodeAvailableException;
20
+ import org.elasticsearch.client.transport.TransportClient;
21
+ import org.elasticsearch.cluster.metadata.AliasMetaData;
22
+ import org.elasticsearch.cluster.metadata.AliasOrIndex;
23
+ import org.elasticsearch.common.collect.ImmutableOpenMap;
24
+ import org.elasticsearch.common.settings.Settings;
25
+ import org.elasticsearch.common.transport.InetSocketTransportAddress;
26
+ import org.elasticsearch.common.unit.ByteSizeValue;
27
+ import org.elasticsearch.common.xcontent.XContentBuilder;
28
+ import org.elasticsearch.common.xcontent.XContentFactory;
29
+ import org.elasticsearch.index.IndexNotFoundException;
30
+ import org.elasticsearch.indices.InvalidAliasNameException;
31
+ import org.elasticsearch.transport.client.PreBuiltTransportClient;
32
+ import org.embulk.config.Config;
33
+ import org.embulk.config.ConfigDefault;
34
+ import org.embulk.config.ConfigDiff;
35
+ import org.embulk.config.ConfigException;
36
+ import org.embulk.config.ConfigSource;
37
+ import org.embulk.config.Task;
38
+ import org.embulk.config.TaskReport;
39
+ import org.embulk.config.TaskSource;
40
+ import org.embulk.config.UserDataException;
41
+ import org.embulk.spi.Column;
42
+ import org.embulk.spi.ColumnVisitor;
43
+ import org.embulk.spi.Exec;
44
+ import org.embulk.spi.OutputPlugin;
45
+ import org.embulk.spi.Page;
46
+ import org.embulk.spi.PageReader;
47
+ import org.embulk.spi.Schema;
48
+ import org.embulk.spi.TransactionalPageOutput;
49
+ import org.embulk.spi.time.Timestamp;
50
+ import org.embulk.spi.type.Types;
51
+ import org.slf4j.Logger;
52
+
53
+ import java.io.IOException;
54
+ import java.net.InetAddress;
55
+ import java.net.UnknownHostException;
56
+ import java.text.SimpleDateFormat;
57
+ import java.util.ArrayList;
58
+ import java.util.Date;
59
+ import java.util.List;
60
+ import java.util.Locale;
61
+ import java.util.concurrent.TimeUnit;
62
+
63
+ public class ElasticsearchOutputPlugin
64
+ implements OutputPlugin
65
+ {
66
+ public interface NodeAddressTask
67
+ extends Task
68
+ {
69
+ @Config("host")
70
+ public String getHost();
71
+
72
+ @Config("port")
73
+ @ConfigDefault("9300")
74
+ public int getPort();
75
+ }
76
+
77
+ public interface PluginTask
78
+ extends Task
79
+ {
80
+ @Config("mode")
81
+ @ConfigDefault("\"insert\"")
82
+ public Mode getMode();
83
+
84
+ @Config("nodes")
85
+ public List<NodeAddressTask> getNodes();
86
+
87
+ @Config("cluster_name")
88
+ @ConfigDefault("\"elasticsearch\"")
89
+ public String getClusterName();
90
+
91
+ @Config("index")
92
+ public String getIndex();
93
+ public void setIndex(String indexName);
94
+
95
+ @Config("pipeline")
96
+ public Optional<String> getPipeline();
97
+
98
+ @Config("alias")
99
+ @ConfigDefault("null")
100
+ public Optional<String> getAlias();
101
+ public void setAlias(Optional<String> aliasName);
102
+
103
+ @Config("index_type")
104
+ public String getType();
105
+
106
+ @Config("id")
107
+ @ConfigDefault("null")
108
+ public Optional<String> getId();
109
+
110
+ @Config("bulk_actions")
111
+ @ConfigDefault("1000")
112
+ public int getBulkActions();
113
+
114
+ @Config("bulk_size")
115
+ @ConfigDefault("5242880")
116
+ public long getBulkSize();
117
+
118
+ @Config("concurrent_requests")
119
+ @ConfigDefault("5")
120
+ public int getConcurrentRequests();
121
+ }
122
+
123
+ private final Logger log;
124
+
125
+ @Inject
126
+ public ElasticsearchOutputPlugin()
127
+ {
128
+ log = Exec.getLogger(getClass());
129
+ }
130
+
131
+ @Override
132
+ public ConfigDiff transaction(ConfigSource config, Schema schema,
133
+ int processorCount, Control control)
134
+ {
135
+ final PluginTask task = config.loadConfig(PluginTask.class);
136
+
137
+ // confirm that a client can be initialized
138
+ try (Client client = createClient(task)) {
139
+ log.info(String.format("Executing plugin with '%s' mode.", task.getMode()));
140
+ if (task.getMode().equals(Mode.REPLACE)) {
141
+ task.setAlias(Optional.of(task.getIndex()));
142
+ task.setIndex(generateNewIndexName(task.getIndex()));
143
+ if (isExistsIndex(task.getAlias().orNull(), client) && !isAlias(task.getAlias().orNull(), client)) {
144
+ throw new ConfigException(String.format("Invalid alias name [%s], an index exists with the same name as the alias", task.getAlias().orNull()));
145
+ }
146
+ }
147
+ log.info(String.format("Inserting data into index[%s]", task.getIndex()));
148
+ control.run(task.dump());
149
+
150
+ if (task.getMode().equals(Mode.REPLACE)) {
151
+ try {
152
+ reAssignAlias(task.getAlias().orNull(), task.getIndex(), client);
153
+ }
154
+ catch (IndexNotFoundException | InvalidAliasNameException e) {
155
+ throw new ConfigException(e);
156
+ }
157
+ catch (NoNodeAvailableException e) {
158
+ throw new ConnectionException(e);
159
+ }
160
+ }
161
+ } catch (Exception e) {
162
+ throw Throwables.propagate(e);
163
+ }
164
+
165
+ ConfigDiff nextConfig = Exec.newConfigDiff();
166
+ return nextConfig;
167
+ }
168
+
169
+ @Override
170
+ public ConfigDiff resume(TaskSource taskSource,
171
+ Schema schema, int processorCount,
172
+ OutputPlugin.Control control)
173
+ {
174
+ // TODO
175
+ return Exec.newConfigDiff();
176
+ }
177
+
178
+ @Override
179
+ public void cleanup(TaskSource taskSource,
180
+ Schema schema, int processorCount,
181
+ List<TaskReport> successTaskReports)
182
+ {}
183
+
184
+ private Client createClient(final PluginTask task)
185
+ {
186
+ // @see http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/client.html
187
+ Settings settings = Settings.builder()
188
+ .put("cluster.name", task.getClusterName())
189
+ .build();
190
+ TransportClient client = new PreBuiltTransportClient(settings);
191
+ List<NodeAddressTask> nodes = task.getNodes();
192
+ for (NodeAddressTask node : nodes) {
193
+ try {
194
+ client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(node.getHost()), node.getPort()));
195
+ }
196
+ catch (UnknownHostException | NoNodeAvailableException e) {
197
+ throw new ConnectionException(e);
198
+ }
199
+ }
200
+ return client;
201
+ }
202
+
203
+ private BulkProcessor newBulkProcessor(final PluginTask task, final Client client)
204
+ {
205
+ return BulkProcessor.builder(client, new BulkProcessor.Listener() {
206
+ @Override
207
+ public void beforeBulk(long executionId, BulkRequest request)
208
+ {
209
+ log.info("Execute {} bulk actions", request.numberOfActions());
210
+ }
211
+
212
+ @Override
213
+ public void afterBulk(long executionId, BulkRequest request, BulkResponse response)
214
+ {
215
+ if (response.hasFailures()) {
216
+ long items = 0;
217
+ if (log.isDebugEnabled()) {
218
+ for (BulkItemResponse item : response.getItems()) {
219
+ if (item.isFailed()) {
220
+ items += 1;
221
+ log.debug(" Error for {}/{}/{} for {} operation: {}",
222
+ item.getIndex(), item.getType(), item.getId(),
223
+ item.getOpType(), item.getFailureMessage());
224
+ }
225
+ }
226
+ }
227
+ log.warn("{} bulk actions failed: {}", items, response.buildFailureMessage());
228
+ }
229
+ else {
230
+ log.info("{} bulk actions succeeded", request.numberOfActions());
231
+ }
232
+ }
233
+
234
+ @Override
235
+ public void afterBulk(long executionId, BulkRequest request, Throwable failure)
236
+ {
237
+ if (failure.getClass() == NoNodeAvailableException.class) {
238
+ log.error("Got the error during bulk processing", failure);
239
+ throw new ConnectionException(failure);
240
+ }
241
+ else {
242
+ log.warn("Got the error during bulk processing", failure);
243
+ }
244
+ }
245
+ }).setBulkActions(task.getBulkActions())
246
+ .setBulkSize(new ByteSizeValue(task.getBulkSize()))
247
+ .setConcurrentRequests(task.getConcurrentRequests())
248
+ .build();
249
+ }
250
+
251
+ @Override
252
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema,
253
+ int processorIndex)
254
+ {
255
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
256
+ Client client = createClient(task);
257
+ BulkProcessor bulkProcessor = newBulkProcessor(task, client);
258
+ ElasticsearchPageOutput pageOutput = new ElasticsearchPageOutput(task, client, bulkProcessor);
259
+ pageOutput.open(schema);
260
+ return pageOutput;
261
+ }
262
+
263
+ public static class ElasticsearchPageOutput implements TransactionalPageOutput
264
+ {
265
+ private Logger log;
266
+
267
+ private Client client;
268
+ private BulkProcessor bulkProcessor;
269
+
270
+ private PageReader pageReader;
271
+ private Column idColumn;
272
+
273
+ private final String index;
274
+ private final String type;
275
+ private final String id;
276
+ private final String pipeline;
277
+
278
+ public ElasticsearchPageOutput(PluginTask task, Client client, BulkProcessor bulkProcessor)
279
+ {
280
+ this.log = Exec.getLogger(getClass());
281
+
282
+ this.client = client;
283
+ this.bulkProcessor = bulkProcessor;
284
+
285
+ this.index = task.getIndex();
286
+ this.type = task.getType();
287
+ this.id = task.getId().orNull();
288
+ this.pipeline = task.getPipeline().orNull();
289
+ }
290
+
291
+ void open(final Schema schema)
292
+ {
293
+ pageReader = new PageReader(schema);
294
+ idColumn = (id == null) ? null : schema.lookupColumn(id);
295
+ }
296
+
297
+ @Override
298
+ public void add(Page page)
299
+ {
300
+ pageReader.setPage(page);
301
+
302
+ while (pageReader.nextRecord()) {
303
+ try {
304
+ final XContentBuilder contextBuilder = XContentFactory.jsonBuilder().startObject(); // TODO reusable??
305
+ pageReader.getSchema().visitColumns(new ColumnVisitor() {
306
+ @Override
307
+ public void booleanColumn(Column column)
308
+ {
309
+ try {
310
+ if (pageReader.isNull(column)) {
311
+ contextBuilder.nullField(column.getName());
312
+ }
313
+ else {
314
+ contextBuilder.field(column.getName(), pageReader.getBoolean(column));
315
+ }
316
+ }
317
+ catch (IOException e) {
318
+ try {
319
+ contextBuilder.nullField(column.getName());
320
+ }
321
+ catch (IOException ex) {
322
+ throw Throwables.propagate(ex);
323
+ }
324
+ }
325
+ }
326
+
327
+ @Override
328
+ public void longColumn(Column column)
329
+ {
330
+ try {
331
+ if (pageReader.isNull(column)) {
332
+ contextBuilder.nullField(column.getName());
333
+ }
334
+ else {
335
+ contextBuilder.field(column.getName(), pageReader.getLong(column));
336
+ }
337
+ }
338
+ catch (IOException e) {
339
+ try {
340
+ contextBuilder.nullField(column.getName());
341
+ }
342
+ catch (IOException ex) {
343
+ throw Throwables.propagate(ex);
344
+ }
345
+ }
346
+ }
347
+
348
+ @Override
349
+ public void doubleColumn(Column column)
350
+ {
351
+ try {
352
+ if (pageReader.isNull(column)) {
353
+ contextBuilder.nullField(column.getName());
354
+ }
355
+ else {
356
+ contextBuilder.field(column.getName(), pageReader.getDouble(column));
357
+ }
358
+ }
359
+ catch (IOException e) {
360
+ try {
361
+ contextBuilder.nullField(column.getName());
362
+ }
363
+ catch (IOException ex) {
364
+ throw Throwables.propagate(ex);
365
+ }
366
+ }
367
+ }
368
+
369
+ @Override
370
+ public void stringColumn(Column column)
371
+ {
372
+ try {
373
+ if (pageReader.isNull(column)) {
374
+ contextBuilder.nullField(column.getName());
375
+ }
376
+ else {
377
+ contextBuilder.field(column.getName(), pageReader.getString(column));
378
+ }
379
+ }
380
+ catch (IOException e) {
381
+ try {
382
+ contextBuilder.nullField(column.getName());
383
+ }
384
+ catch (IOException ex) {
385
+ throw Throwables.propagate(ex);
386
+ }
387
+ }
388
+ }
389
+
390
+ @Override
391
+ public void jsonColumn(Column column)
392
+ {
393
+ try {
394
+ if (pageReader.isNull(column)) {
395
+ contextBuilder.nullField(column.getName());
396
+ }
397
+ else {
398
+ contextBuilder.field(column.getName(), pageReader.getJson(column).toJson());
399
+ }
400
+ }
401
+ catch (IOException e) {
402
+ try {
403
+ contextBuilder.nullField(column.getName());
404
+ }
405
+ catch (IOException ex) {
406
+ throw Throwables.propagate(ex);
407
+ }
408
+ }
409
+ }
410
+
411
+ @Override
412
+ public void timestampColumn(Column column)
413
+ {
414
+ try {
415
+ if (pageReader.isNull(column)) {
416
+ contextBuilder.nullField(column.getName());
417
+ }
418
+ else {
419
+ contextBuilder.field(column.getName(), new Date(pageReader.getTimestamp(column).toEpochMilli()));
420
+ }
421
+ }
422
+ catch (IOException e) {
423
+ try {
424
+ contextBuilder.nullField(column.getName());
425
+ }
426
+ catch (IOException ex) {
427
+ throw Throwables.propagate(ex);
428
+ }
429
+ }
430
+ }
431
+ });
432
+
433
+ contextBuilder.endObject();
434
+ bulkProcessor.add(newIndexRequest(getIdValue(idColumn)).source(contextBuilder));
435
+ }
436
+ catch (ConnectionException | IOException e) {
437
+ Throwables.propagate(e); // TODO error handling
438
+ }
439
+ }
440
+ }
441
+
442
+ /**
443
+ * @param inputColumn
444
+ * @return
445
+ */
446
+ private String getIdValue(Column inputColumn)
447
+ {
448
+ if (inputColumn == null) {
449
+ return null;
450
+ }
451
+ if (pageReader.isNull(inputColumn)) {
452
+ return null;
453
+ }
454
+ String idValue = null;
455
+ if (Types.STRING.equals(inputColumn.getType())) {
456
+ idValue = pageReader.getString(inputColumn);
457
+ }
458
+ else if (Types.BOOLEAN.equals(inputColumn.getType())) {
459
+ idValue = pageReader.getBoolean(inputColumn) + "";
460
+ }
461
+ else if (Types.DOUBLE.equals(inputColumn.getType())) {
462
+ idValue = pageReader.getDouble(inputColumn) + "";
463
+ }
464
+ else if (Types.LONG.equals(inputColumn.getType())) {
465
+ idValue = pageReader.getLong(inputColumn) + "";
466
+ }
467
+ else if (Types.JSON.equals(inputColumn.getType())) {
468
+ idValue = pageReader.getJson(inputColumn).toJson();
469
+ }
470
+ else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
471
+ idValue = pageReader.getTimestamp(inputColumn).toString();
472
+ }
473
+ else {
474
+ idValue = null;
475
+ }
476
+ return idValue;
477
+ }
478
+
479
+ private IndexRequest newIndexRequest(String idValue)
480
+ {
481
+ return Requests.indexRequest(index).type(type).id(idValue).setPipeline(pipeline);
482
+ }
483
+
484
+ @Override
485
+ public void finish()
486
+ {
487
+ try {
488
+ bulkProcessor.flush();
489
+ }
490
+ finally {
491
+ close();
492
+ }
493
+ }
494
+
495
+ @Override
496
+ public void close()
497
+ {
498
+ if (bulkProcessor != null) {
499
+ try {
500
+ while (!bulkProcessor.awaitClose(3, TimeUnit.SECONDS)) {
501
+ log.debug("wait for closing the bulk processing..");
502
+ }
503
+ }
504
+ catch (InterruptedException e) {
505
+ Thread.currentThread().interrupt();
506
+ }
507
+ bulkProcessor = null;
508
+ }
509
+
510
+ if (client != null) {
511
+ client.close(); // ElasticsearchException
512
+ client = null;
513
+ }
514
+ }
515
+
516
+ @Override
517
+ public void abort()
518
+ {
519
+ // TODO do nothing
520
+ }
521
+
522
+ @Override
523
+ public TaskReport commit()
524
+ {
525
+ TaskReport report = Exec.newTaskReport();
526
+ // TODO
527
+ return report;
528
+ }
529
+ }
530
+
531
+ public enum Mode
532
+ {
533
+ INSERT,
534
+ REPLACE;
535
+
536
+ @JsonValue
537
+ @Override
538
+ public String toString()
539
+ {
540
+ return name().toLowerCase(Locale.ENGLISH);
541
+ }
542
+
543
+ @JsonCreator
544
+ public static Mode fromString(String value)
545
+ {
546
+ switch (value) {
547
+ case "insert":
548
+ return INSERT;
549
+ case "replace":
550
+ return REPLACE;
551
+ default:
552
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are insert, truncate_insert, replace", value));
553
+ }
554
+ }
555
+ }
556
+
557
+ private void reAssignAlias(String aliasName, String newIndexName, Client client)
558
+ throws IndexNotFoundException, InvalidAliasNameException
559
+ {
560
+ if (!isExistsAlias(aliasName, client)) {
561
+ client.admin().indices().prepareAliases()
562
+ .addAlias(newIndexName, aliasName)
563
+ .execute().actionGet();
564
+ log.info(String.format("Assigned alias[%s] to index[%s]", aliasName, newIndexName));
565
+ }
566
+ else {
567
+ List<String> oldIndices = getIndexByAlias(aliasName, client);
568
+ client.admin().indices().prepareAliases()
569
+ .removeAlias(oldIndices.toArray(new String[oldIndices.size()]), aliasName)
570
+ .addAlias(newIndexName, aliasName)
571
+ .execute().actionGet();
572
+ log.info(String.format("Reassigned alias[%s] from index%s to index[%s]", aliasName, oldIndices, newIndexName));
573
+ for (String index : oldIndices) {
574
+ deleteIndex(index, client);
575
+ }
576
+ }
577
+ }
578
+
579
+ private void deleteIndex(String indexName, Client client)
580
+ {
581
+ client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
582
+ log.info(String.format("Deleted Index [%s]", indexName));
583
+ }
584
+
585
+ private List<String> getIndexByAlias(String aliasName, Client client)
586
+ {
587
+ ImmutableOpenMap<String, List<AliasMetaData>> map = client.admin().indices().getAliases(new GetAliasesRequest(aliasName))
588
+ .actionGet().getAliases();
589
+ List<String> indices = new ArrayList<>();
590
+ for (ObjectObjectCursor<String, List<AliasMetaData>> c : map) {
591
+ indices.add(c.key);
592
+ }
593
+
594
+ return indices;
595
+ }
596
+
597
+ private boolean isExistsAlias(String aliasName, Client client)
598
+ {
599
+ return client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().hasAlias(aliasName);
600
+ }
601
+
602
+ private boolean isExistsIndex(String indexName, Client client)
603
+ {
604
+ return client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().hasIndex(indexName);
605
+ }
606
+
607
+ private boolean isAlias(String aliasName, Client client)
608
+ {
609
+ AliasOrIndex aliasOrIndex = client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().getAliasAndIndexLookup().get(aliasName);
610
+ return aliasOrIndex != null && aliasOrIndex.isAlias();
611
+ }
612
+
613
+ public String generateNewIndexName(String indexName)
614
+ {
615
+ Timestamp time = Exec.getTransactionTime();
616
+ return indexName + new SimpleDateFormat("_yyyyMMdd-HHmmss").format(time.toEpochMilli());
617
+ }
618
+
619
+ public class ConnectionException extends RuntimeException implements UserDataException
620
+ {
621
+ protected ConnectionException()
622
+ {
623
+ }
624
+
625
+ public ConnectionException(Throwable cause)
626
+ {
627
+ super(cause);
628
+ }
629
+ }
630
+ }