embulk-output-elasticsearch 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/gradlew CHANGED
@@ -42,11 +42,6 @@ case "`uname`" in
42
42
  ;;
43
43
  esac
44
44
 
45
- # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
- if $cygwin ; then
47
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
- fi
49
-
50
45
  # Attempt to set APP_HOME
51
46
  # Resolve links: $0 may be a link
52
47
  PRG="$0"
@@ -61,9 +56,9 @@ while [ -h "$PRG" ] ; do
61
56
  fi
62
57
  done
63
58
  SAVED="`pwd`"
64
- cd "`dirname \"$PRG\"`/" >&-
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
65
60
  APP_HOME="`pwd -P`"
66
- cd "$SAVED" >&-
61
+ cd "$SAVED" >/dev/null
67
62
 
68
63
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
64
 
@@ -114,6 +109,7 @@ fi
114
109
  if $cygwin ; then
115
110
  APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
111
  CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
113
 
118
114
  # We build the pattern for arguments to be converted via cygpath
119
115
  ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
@@ -0,0 +1,376 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.fasterxml.jackson.core.JsonProcessingException;
4
+ import com.fasterxml.jackson.databind.JsonNode;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
6
+ import com.google.common.annotations.VisibleForTesting;
7
+ import org.eclipse.jetty.client.HttpResponseException;
8
+ import org.eclipse.jetty.client.util.StringContentProvider;
9
+ import org.eclipse.jetty.http.HttpMethod;
10
+ import org.embulk.config.ConfigException;
11
+ import org.embulk.config.UserDataException;
12
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.AuthMethod;
13
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.NodeAddressTask;
14
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask;
15
+ import org.embulk.spi.DataException;
16
+ import org.embulk.spi.Exec;
17
+ import org.embulk.spi.time.Timestamp;
18
+ import org.embulk.util.retryhelper.jetty92.Jetty92RetryHelper;
19
+ import org.embulk.util.retryhelper.jetty92.Jetty92SingleRequester;
20
+ import org.embulk.util.retryhelper.jetty92.StringJetty92ResponseEntityReader;
21
+ import org.slf4j.Logger;
22
+
23
+ import javax.xml.bind.DatatypeConverter;
24
+
25
+ import java.io.IOException;
26
+ import java.text.SimpleDateFormat;
27
+ import java.util.ArrayList;
28
+ import java.util.Arrays;
29
+ import java.util.HashMap;
30
+ import java.util.Iterator;
31
+ import java.util.List;
32
+ import java.util.Locale;
33
+ import java.util.Map;
34
+ import java.util.Random;
35
+
36
+ public class ElasticsearchHttpClient
37
+ {
38
+ private final Logger log;
39
+
40
+ // ALLOW_UNQUOTED_CONTROL_CHARS - Not expected but whether parser will allow JSON Strings to contain unquoted control characters
41
+ // FAIL_ON_UNKNOWN_PROPERTIES - Feature that determines whether encountering of unknown properties
42
+ private final ObjectMapper jsonMapper = new ObjectMapper()
43
+ .configure(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, false)
44
+ .configure(com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
45
+
46
+ // Elasticsearch maximum index byte size
47
+ // public static final int MAX_INDEX_NAME_BYTES = 255;
48
+ // @see https://github.com/elastic/elasticsearch/blob/master/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java#L108
49
+ private final long maxIndexNameBytes = 255;
50
+ private final List<Character> inalidIndexCharaters = Arrays.asList('\\', '/', '*', '?', '"', '<', '>', '|', '#', ' ', ',');
51
+
52
+ public ElasticsearchHttpClient()
53
+ {
54
+ this.log = Exec.getLogger(getClass());
55
+ }
56
+
57
+ public void push(JsonNode records, PluginTask task, Jetty92RetryHelper retryHelper)
58
+ {
59
+ int bulkActions = task.getBulkActions();
60
+ long bulkSize = task.getBulkSize();
61
+ // curl -xPOST localhost:9200/{index}/{type}/_bulk -d '
62
+ // {"index" : {}}\n
63
+ // {"k" : "v"}\n
64
+ // {"index" : {}}\n
65
+ // {"k" : "v2"}\n
66
+ // '
67
+ try {
68
+ String path = String.format("/%s/%s/_bulk", task.getIndex(), task.getType());
69
+ int recordSize = records.size();
70
+ String idColumn = task.getId().orNull();
71
+ if (recordSize > 0) {
72
+ StringBuilder sb = new StringBuilder();
73
+ for (JsonNode record : records) {
74
+ sb.append(createIndexRequest(idColumn, record));
75
+
76
+ String requestString = jsonMapper.writeValueAsString(record);
77
+ sb.append("\n")
78
+ .append(requestString)
79
+ .append("\n");
80
+ }
81
+ sendRequest(path, HttpMethod.POST, task, retryHelper, sb.toString());
82
+ }
83
+ }
84
+ catch (JsonProcessingException ex) {
85
+ throw new DataException(ex);
86
+ }
87
+ }
88
+
89
+ public List<String> getIndexByAlias(String aliasName, PluginTask task, Jetty92RetryHelper retryHelper)
90
+ {
91
+ // curl -XGET localhost:9200/_alias/{alias}
92
+ // No alias: 404
93
+ // Alias found: {"embulk_20161018-183738":{"aliases":{"embulk":{}}}}
94
+ List<String> indices = new ArrayList<>();
95
+ String path = String.format("/_alias/%s", aliasName);
96
+ JsonNode response = sendRequest(path, HttpMethod.GET, task, retryHelper);
97
+
98
+ Iterator it = response.fieldNames();
99
+ while (it.hasNext()) {
100
+ indices.add(it.next().toString());
101
+ }
102
+
103
+ return indices;
104
+ }
105
+
106
+ public boolean isIndexExisting(String indexName, PluginTask task, Jetty92RetryHelper retryHelper)
107
+ {
108
+ // curl -XGET localhost:9200/{index}
109
+ // No index: 404
110
+ // Index found: 200
111
+ try {
112
+ sendRequest(indexName, HttpMethod.GET, task, retryHelper);
113
+ return true;
114
+ }
115
+ catch (ResourceNotFoundException ex) {
116
+ return false;
117
+ }
118
+ }
119
+
120
+ public String generateNewIndexName(String indexName)
121
+ {
122
+ Timestamp time = Exec.getTransactionTime();
123
+ return indexName + new SimpleDateFormat("_yyyyMMdd-HHmmss").format(time.toEpochMilli());
124
+ }
125
+
126
+ public boolean isAliasExisting(String aliasName, PluginTask task, Jetty92RetryHelper retryHelper)
127
+ {
128
+ // curl -XGET localhost:9200/_aliases // List all aliases
129
+ // No aliases: {}
130
+ // Aliases found: {"embulk_20161018-183738":{"aliases":{"embulk":{}}}}
131
+ JsonNode response = sendRequest("/_aliases", HttpMethod.GET, task, retryHelper);
132
+ if (response.size() == 0) {
133
+ return false;
134
+ }
135
+ for (JsonNode index : response) {
136
+ if (index.has("aliases") && index.get("aliases").has(aliasName)) {
137
+ return true;
138
+ }
139
+ }
140
+ return false;
141
+ }
142
+
143
+ // Should be called just once while Embulk transaction.
144
+ // Be sure to call after all exporting tasks completed
145
+ // This method will delete existing index
146
+ public void reassignAlias(String aliasName, String newIndexName, PluginTask task, Jetty92RetryHelper retryHelper)
147
+ {
148
+ if (!isAliasExisting(aliasName, task, retryHelper)) {
149
+ assignAlias(newIndexName, aliasName, task, retryHelper);
150
+ }
151
+ else {
152
+ List<String> oldIndices = getIndexByAlias(aliasName, task, retryHelper);
153
+ assignAlias(newIndexName, aliasName, task, retryHelper);
154
+ for (String index : oldIndices) {
155
+ deleteIndex(index, task, retryHelper);
156
+ }
157
+ }
158
+ }
159
+
160
+ public String getEsVersion(PluginTask task, Jetty92RetryHelper retryHelper)
161
+ {
162
+ // curl -XGET 'http://localhost:9200’
163
+ JsonNode response = sendRequest("", HttpMethod.GET, task, retryHelper);
164
+ return response.get("version").get("number").asText();
165
+ }
166
+
167
+ public void validateIndexOrAliasName(String index, String type)
168
+ {
169
+ for (int i = 0; i < index.length(); i++) {
170
+ if (inalidIndexCharaters.contains(index.charAt(i))) {
171
+ throw new ConfigException(String.format("%s '%s' must not contain the invalid characters " + inalidIndexCharaters.toString(), type, index));
172
+ }
173
+ }
174
+
175
+ if (!index.toLowerCase(Locale.ROOT).equals(index)) {
176
+ throw new ConfigException(String.format("%s '%s' must be lowercase", type, index));
177
+ }
178
+
179
+ if (index.startsWith("_") || index.startsWith("-") || index.startsWith("+")) {
180
+ throw new ConfigException(String.format("%s '%s' must not start with '_', '-', or '+'", type, index));
181
+ }
182
+
183
+ if (index.length() > maxIndexNameBytes) {
184
+ throw new ConfigException(String.format("%s name is too long, (%s > %s)", type, index.length(), maxIndexNameBytes));
185
+ }
186
+
187
+ if (index.equals(".") || index.equals("..")) {
188
+ throw new ConfigException("index must not be '.' or '..'");
189
+ }
190
+ }
191
+
192
+ private String createIndexRequest(String idColumn, JsonNode record) throws JsonProcessingException
193
+ {
194
+ // index name and type are set at path("/{index}/{type}"). So no need to set
195
+ if (idColumn != null && record.hasNonNull(idColumn)) {
196
+ // {"index" : {"_id" : "v"}}
197
+ Map<String, Map> indexRequest = new HashMap<>();
198
+
199
+ Map<String, JsonNode> idRequest = new HashMap<>();
200
+ idRequest.put("_id", record.get(idColumn));
201
+
202
+ indexRequest.put("index", idRequest);
203
+ return jsonMapper.writeValueAsString(indexRequest);
204
+ }
205
+ else {
206
+ // {"index" : {}}
207
+ return "{\"index\" : {}}";
208
+ }
209
+ }
210
+
211
+ private void assignAlias(String indexName, String aliasName, PluginTask task, Jetty92RetryHelper retryHelper)
212
+ {
213
+ try {
214
+ if (isIndexExisting(indexName, task, retryHelper)) {
215
+ if (isAliasExisting(aliasName, task, retryHelper)) {
216
+ // curl -XPUT http://localhost:9200/_alias -d\
217
+ // "actions" : [
218
+ // {"remove" : {"alias" : "{alias}", "index" : "{index_old}"}},
219
+ // {"add" : {"alias": "{alias}", "index": "{index_new}"}}
220
+ // ]
221
+ // Success: {"acknowledged":true}
222
+ List<String> oldIndices = getIndexByAlias(aliasName, task, retryHelper);
223
+
224
+ Map<String, String> newAlias = new HashMap<>();
225
+ newAlias.put("alias", aliasName);
226
+ newAlias.put("index", indexName);
227
+ Map<String, Map> add = new HashMap<>();
228
+ add.put("add", newAlias);
229
+
230
+ Map<String, String> oldAlias = new HashMap<>();
231
+ // TODO multiple alias?
232
+ for (String oldIndex : oldIndices) {
233
+ oldAlias.put("alias", aliasName);
234
+ oldAlias.put("index", oldIndex);
235
+ }
236
+ Map<String, Map> remove = new HashMap<>();
237
+ remove.put("remove", oldAlias);
238
+
239
+ List<Map<String, Map>> actions = new ArrayList<>();
240
+ actions.add(remove);
241
+ actions.add(add);
242
+ Map<String, List> rootTree = new HashMap<>();
243
+ rootTree.put("actions", actions);
244
+
245
+ String content = jsonMapper.writeValueAsString(rootTree);
246
+ sendRequest("/_aliases", HttpMethod.POST, task, retryHelper, content);
247
+ log.info("Reassigned alias [{}] to index[{}]", aliasName, indexName);
248
+ }
249
+ else {
250
+ // curl -XPUT http://localhost:9200/{index}/_alias/{alias}
251
+ // Success: {"acknowledged":true}
252
+ String path = String.format("/%s/_alias/%s", indexName, aliasName);
253
+ sendRequest(path, HttpMethod.PUT, task, retryHelper);
254
+ log.info("Assigned alias [{}] to Index [{}]", aliasName, indexName);
255
+ }
256
+ }
257
+ }
258
+ catch (JsonProcessingException ex) {
259
+ throw new ConfigException(String.format("Failed to assign alias[%s] to index[%s]", aliasName, indexName));
260
+ }
261
+ }
262
+
263
+ private void deleteIndex(String indexName, PluginTask task, Jetty92RetryHelper retryHelper)
264
+ {
265
+ // curl -XDELETE localhost:9200/{index}
266
+ // Success: {"acknowledged":true}
267
+ if (isIndexExisting(indexName, task, retryHelper)) {
268
+ sendRequest(indexName, HttpMethod.DELETE, task, retryHelper);
269
+ log.info("Deleted Index [{}]", indexName);
270
+ }
271
+ }
272
+
273
+ private JsonNode sendRequest(String path, final HttpMethod method, PluginTask task, Jetty92RetryHelper retryHelper)
274
+ {
275
+ return sendRequest(path, method, task, retryHelper, "");
276
+ }
277
+
278
+ private JsonNode sendRequest(String path, final HttpMethod method, PluginTask task, Jetty92RetryHelper retryHelper, final String content)
279
+ {
280
+ final String uri = createRequestUri(task, path);
281
+ final String authorizationHeader = getAuthorizationHeader(task);
282
+
283
+ try {
284
+ String responseBody = retryHelper.requestWithRetry(
285
+ new StringJetty92ResponseEntityReader(task.getTimeoutMills()),
286
+ new Jetty92SingleRequester() {
287
+ @Override
288
+ public void requestOnce(org.eclipse.jetty.client.HttpClient client, org.eclipse.jetty.client.api.Response.Listener responseListener)
289
+ {
290
+ org.eclipse.jetty.client.api.Request request = client
291
+ .newRequest(uri)
292
+ .accept("application/json")
293
+ .method(method);
294
+ if (method == HttpMethod.POST) {
295
+ request.content(new StringContentProvider(content), "application/json");
296
+ }
297
+
298
+ if (!authorizationHeader.isEmpty()) {
299
+ request.header("Authorization", authorizationHeader);
300
+ }
301
+ request.send(responseListener);
302
+ }
303
+
304
+ @Override
305
+ public boolean isResponseStatusToRetry(org.eclipse.jetty.client.api.Response response)
306
+ {
307
+ int status = response.getStatus();
308
+ if (status == 429) {
309
+ return true; // Retry if 429.
310
+ }
311
+ return status / 100 != 4; // Retry unless 4xx except for 429.
312
+ }
313
+ });
314
+ return parseJson(responseBody);
315
+ }
316
+ catch (HttpResponseException ex) {
317
+ if (ex.getMessage().startsWith("Response not 2xx: 404 Not Found")) {
318
+ throw new ResourceNotFoundException(ex);
319
+ }
320
+ throw ex;
321
+ }
322
+ }
323
+
324
+ private String createRequestUri(PluginTask task, String path)
325
+ {
326
+ if (!path.startsWith("/")) {
327
+ path = "/" + path;
328
+ }
329
+ String protocol = task.getUseSsl() ? "https" : "http";
330
+ String nodeAddress = getRandomNodeAddress(task);
331
+ return String.format("%s://%s%s", protocol, nodeAddress, path);
332
+ }
333
+
334
+ // Return node address (RoundRobin)
335
+ private String getRandomNodeAddress(PluginTask task)
336
+ {
337
+ List<NodeAddressTask> nodes = task.getNodes();
338
+ Random random = new Random();
339
+ int index = random.nextInt(nodes.size());
340
+ NodeAddressTask node = nodes.get(index);
341
+ return node.getHost() + ":" + node.getPort();
342
+ }
343
+
344
+ private JsonNode parseJson(final String json) throws DataException
345
+ {
346
+ try {
347
+ return this.jsonMapper.readTree(json);
348
+ }
349
+ catch (IOException ex) {
350
+ throw new DataException(ex);
351
+ }
352
+ }
353
+
354
+ @VisibleForTesting
355
+ protected String getAuthorizationHeader(PluginTask task)
356
+ {
357
+ String header = "";
358
+ if (task.getAuthMethod() == AuthMethod.BASIC) {
359
+ String authString = task.getUser().get() + ":" + task.getPassword().get();
360
+ header = "Basic " + DatatypeConverter.printBase64Binary(authString.getBytes());
361
+ }
362
+ return header;
363
+ }
364
+
365
+ public class ResourceNotFoundException extends RuntimeException implements UserDataException
366
+ {
367
+ protected ResourceNotFoundException()
368
+ {
369
+ }
370
+
371
+ public ResourceNotFoundException(Throwable cause)
372
+ {
373
+ super(cause);
374
+ }
375
+ }
376
+ }
@@ -1,586 +1,12 @@
1
1
  package org.embulk.output.elasticsearch;
2
2
 
3
- import java.io.IOException;
4
- import java.text.SimpleDateFormat;
5
- import java.util.ArrayList;
6
- import java.util.Date;
7
- import java.util.List;
8
- import java.util.Locale;
9
- import java.util.concurrent.TimeUnit;
10
- import java.net.InetAddress;
11
- import java.net.UnknownHostException;
12
-
13
- import com.fasterxml.jackson.annotation.JsonCreator;
14
- import com.fasterxml.jackson.annotation.JsonValue;
15
-
16
- import org.elasticsearch.action.bulk.BulkItemResponse;
17
- import org.elasticsearch.action.bulk.BulkProcessor;
18
- import org.elasticsearch.action.bulk.BulkRequest;
19
- import org.elasticsearch.action.bulk.BulkResponse;
20
- import org.elasticsearch.action.index.IndexRequest;
21
- import org.elasticsearch.client.Client;
22
- import org.elasticsearch.client.Requests;
23
- import org.elasticsearch.client.transport.NoNodeAvailableException;
24
- import org.elasticsearch.client.transport.TransportClient;
25
- import org.elasticsearch.cluster.metadata.AliasMetaData;
26
- import org.elasticsearch.cluster.metadata.AliasOrIndex;
27
- import org.elasticsearch.common.collect.ImmutableOpenMap;
28
- import org.elasticsearch.common.settings.Settings;
29
- import org.elasticsearch.common.transport.InetSocketTransportAddress;
30
- import org.elasticsearch.common.unit.ByteSizeValue;
31
- import org.elasticsearch.common.xcontent.XContentBuilder;
32
- import org.elasticsearch.common.xcontent.XContentFactory;
33
- import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
34
- import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
35
- import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
36
- import org.elasticsearch.index.IndexNotFoundException;
37
- import org.elasticsearch.indices.InvalidAliasNameException;
38
-
39
- import org.embulk.config.Config;
40
- import org.embulk.config.ConfigDefault;
41
- import org.embulk.config.ConfigDiff;
42
- import org.embulk.config.ConfigException;
43
- import org.embulk.config.ConfigSource;
44
- import org.embulk.config.Task;
45
- import org.embulk.config.TaskReport;
46
- import org.embulk.config.TaskSource;
47
- import org.embulk.config.UserDataException;
48
- import org.embulk.spi.Column;
49
- import org.embulk.spi.ColumnVisitor;
50
- import org.embulk.spi.Exec;
51
- import org.embulk.spi.OutputPlugin;
52
- import org.embulk.spi.Page;
53
- import org.embulk.spi.PageReader;
54
- import org.embulk.spi.Schema;
55
- import org.embulk.spi.TransactionalPageOutput;
56
- import org.embulk.spi.time.Timestamp;
57
- import org.embulk.spi.type.Types;
58
- import org.slf4j.Logger;
59
-
60
- import com.google.common.base.Optional;
61
- import com.google.common.base.Throwables;
62
- import com.google.inject.Inject;
63
- import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
3
+ import org.embulk.base.restclient.RestClientOutputPluginBase;
64
4
 
65
5
  public class ElasticsearchOutputPlugin
66
- implements OutputPlugin
6
+ extends RestClientOutputPluginBase<ElasticsearchOutputPluginDelegate.PluginTask>
67
7
  {
68
- public interface NodeAddressTask
69
- extends Task
70
- {
71
- @Config("host")
72
- public String getHost();
73
-
74
- @Config("port")
75
- @ConfigDefault("9300")
76
- public int getPort();
77
- }
78
-
79
- public interface PluginTask
80
- extends Task
81
- {
82
- @Config("mode")
83
- @ConfigDefault("\"insert\"")
84
- public Mode getMode();
85
-
86
- @Config("nodes")
87
- public List<NodeAddressTask> getNodes();
88
-
89
- @Config("cluster_name")
90
- @ConfigDefault("\"elasticsearch\"")
91
- public String getClusterName();
92
-
93
- @Config("index")
94
- public String getIndex();
95
- public void setIndex(String indexName);
96
-
97
- @Config("alias")
98
- @ConfigDefault("null")
99
- public Optional<String> getAlias();
100
- public void setAlias(Optional<String> aliasName);
101
-
102
- @Config("index_type")
103
- public String getType();
104
-
105
- @Config("id")
106
- @ConfigDefault("null")
107
- public Optional<String> getId();
108
-
109
- @Config("bulk_actions")
110
- @ConfigDefault("1000")
111
- public int getBulkActions();
112
-
113
- @Config("bulk_size")
114
- @ConfigDefault("5242880")
115
- public long getBulkSize();
116
-
117
- @Config("concurrent_requests")
118
- @ConfigDefault("5")
119
- public int getConcurrentRequests();
120
- }
121
-
122
- private final Logger log;
123
-
124
- @Inject
125
8
  public ElasticsearchOutputPlugin()
126
9
  {
127
- log = Exec.getLogger(getClass());
10
+ super(ElasticsearchOutputPluginDelegate.PluginTask.class, new ElasticsearchOutputPluginDelegate());
128
11
  }
129
-
130
- @Override
131
- public ConfigDiff transaction(ConfigSource config, Schema schema,
132
- int processorCount, Control control)
133
- {
134
- final PluginTask task = config.loadConfig(PluginTask.class);
135
-
136
- // confirm that a client can be initialized
137
- try (Client client = createClient(task)) {
138
- log.info(String.format("Executing plugin with '%s' mode.", task.getMode()));
139
- if (task.getMode().equals(Mode.REPLACE)) {
140
- task.setAlias(Optional.of(task.getIndex()));
141
- task.setIndex(generateNewIndexName(task.getIndex()));
142
- if (isExistsIndex(task.getAlias().orNull(), client) && !isAlias(task.getAlias().orNull(), client)) {
143
- throw new ConfigException(String.format("Invalid alias name [%s], an index exists with the same name as the alias", task.getAlias().orNull()));
144
- }
145
- }
146
- log.info(String.format("Inserting data into index[%s]", task.getIndex()));
147
- control.run(task.dump());
148
-
149
- if (task.getMode().equals(Mode.REPLACE)) {
150
- try {
151
- reAssignAlias(task.getAlias().orNull(), task.getIndex(), client);
152
- } catch (IndexNotFoundException | InvalidAliasNameException e) {
153
- throw new ConfigException(e);
154
- } catch (NoNodeAvailableException e) {
155
- throw new ConnectionException(e);
156
- }
157
- }
158
- } catch (Exception e) {
159
- throw Throwables.propagate(e);
160
- }
161
-
162
- ConfigDiff nextConfig = Exec.newConfigDiff();
163
- return nextConfig;
164
- }
165
-
166
- @Override
167
- public ConfigDiff resume(TaskSource taskSource,
168
- Schema schema, int processorCount,
169
- OutputPlugin.Control control)
170
- {
171
- // TODO
172
- return Exec.newConfigDiff();
173
- }
174
-
175
- @Override
176
- public void cleanup(TaskSource taskSource,
177
- Schema schema, int processorCount,
178
- List<TaskReport> successTaskReports)
179
- {}
180
-
181
- private Client createClient(final PluginTask task)
182
- {
183
- // @see http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/client.html
184
- Settings settings = Settings.settingsBuilder()
185
- .put("cluster.name", task.getClusterName())
186
- .build();
187
- TransportClient client = TransportClient.builder().settings(settings).build();
188
- List<NodeAddressTask> nodes = task.getNodes();
189
- for (NodeAddressTask node : nodes) {
190
- try {
191
- client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(node.getHost()), node.getPort()));
192
- } catch (UnknownHostException | NoNodeAvailableException e) {
193
- throw new ConnectionException(e);
194
- }
195
- }
196
- return client;
197
- }
198
-
199
- private BulkProcessor newBulkProcessor(final PluginTask task, final Client client)
200
- {
201
- return BulkProcessor.builder(client, new BulkProcessor.Listener() {
202
- @Override
203
- public void beforeBulk(long executionId, BulkRequest request)
204
- {
205
- log.info("Execute {} bulk actions", request.numberOfActions());
206
- }
207
-
208
- @Override
209
- public void afterBulk(long executionId, BulkRequest request, BulkResponse response)
210
- {
211
- if (response.hasFailures()) {
212
- long items = 0;
213
- if (log.isDebugEnabled()) {
214
- for (BulkItemResponse item : response.getItems()) {
215
- if (item.isFailed()) {
216
- items += 1;
217
- log.debug(" Error for {}/{}/{} for {} operation: {}",
218
- item.getIndex(), item.getType(), item.getId(),
219
- item.getOpType(), item.getFailureMessage());
220
- }
221
- }
222
- }
223
- log.warn("{} bulk actions failed: {}", items, response.buildFailureMessage());
224
- } else {
225
- log.info("{} bulk actions succeeded", request.numberOfActions());
226
- }
227
- }
228
-
229
- @Override
230
- public void afterBulk(long executionId, BulkRequest request, Throwable failure)
231
- {
232
- if (failure.getClass() == NoNodeAvailableException.class) {
233
- log.error("Got the error during bulk processing", failure);
234
- throw new ConnectionException(failure);
235
- } else {
236
- log.warn("Got the error during bulk processing", failure);
237
- }
238
- }
239
- }).setBulkActions(task.getBulkActions())
240
- .setBulkSize(new ByteSizeValue(task.getBulkSize()))
241
- .setConcurrentRequests(task.getConcurrentRequests())
242
- .build();
243
- }
244
-
245
- @Override
246
- public TransactionalPageOutput open(TaskSource taskSource, Schema schema,
247
- int processorIndex)
248
- {
249
- final PluginTask task = taskSource.loadTask(PluginTask.class);
250
- Client client = createClient(task);
251
- BulkProcessor bulkProcessor = newBulkProcessor(task, client);
252
- ElasticsearchPageOutput pageOutput = new ElasticsearchPageOutput(task, client, bulkProcessor);
253
- pageOutput.open(schema);
254
- return pageOutput;
255
- }
256
-
257
- public static class ElasticsearchPageOutput implements TransactionalPageOutput
258
- {
259
- private Logger log;
260
-
261
- private Client client;
262
- private BulkProcessor bulkProcessor;
263
-
264
- private PageReader pageReader;
265
- private Column idColumn;
266
-
267
- private final String index;
268
- private final String type;
269
- private final String id;
270
-
271
- public ElasticsearchPageOutput(PluginTask task, Client client, BulkProcessor bulkProcessor)
272
- {
273
- this.log = Exec.getLogger(getClass());
274
-
275
- this.client = client;
276
- this.bulkProcessor = bulkProcessor;
277
-
278
- this.index = task.getIndex();
279
- this.type = task.getType();
280
- this.id = task.getId().orNull();
281
- }
282
-
283
- void open(final Schema schema)
284
- {
285
- pageReader = new PageReader(schema);
286
- idColumn = (id == null) ? null : schema.lookupColumn(id);
287
- }
288
-
289
- @Override
290
- public void add(Page page)
291
- {
292
- pageReader.setPage(page);
293
-
294
- while (pageReader.nextRecord()) {
295
- try {
296
- final XContentBuilder contextBuilder = XContentFactory.jsonBuilder().startObject(); // TODO reusable??
297
- pageReader.getSchema().visitColumns(new ColumnVisitor() {
298
- @Override
299
- public void booleanColumn(Column column) {
300
- try {
301
- if (pageReader.isNull(column)) {
302
- contextBuilder.nullField(column.getName());
303
- } else {
304
- contextBuilder.field(column.getName(), pageReader.getBoolean(column));
305
- }
306
- } catch (IOException e) {
307
- try {
308
- contextBuilder.nullField(column.getName());
309
- } catch (IOException ex) {
310
- throw Throwables.propagate(ex);
311
- }
312
- }
313
- }
314
-
315
- @Override
316
- public void longColumn(Column column) {
317
- try {
318
- if (pageReader.isNull(column)) {
319
- contextBuilder.nullField(column.getName());
320
- } else {
321
- contextBuilder.field(column.getName(), pageReader.getLong(column));
322
- }
323
- } catch (IOException e) {
324
- try {
325
- contextBuilder.nullField(column.getName());
326
- } catch (IOException ex) {
327
- throw Throwables.propagate(ex);
328
- }
329
- }
330
- }
331
-
332
- @Override
333
- public void doubleColumn(Column column) {
334
- try {
335
- if (pageReader.isNull(column)) {
336
- contextBuilder.nullField(column.getName());
337
- } else {
338
- contextBuilder.field(column.getName(), pageReader.getDouble(column));
339
- }
340
- } catch (IOException e) {
341
- try {
342
- contextBuilder.nullField(column.getName());
343
- } catch (IOException ex) {
344
- throw Throwables.propagate(ex);
345
- }
346
- }
347
- }
348
-
349
- @Override
350
- public void stringColumn(Column column) {
351
- try {
352
- if (pageReader.isNull(column)) {
353
- contextBuilder.nullField(column.getName());
354
- } else {
355
- contextBuilder.field(column.getName(), pageReader.getString(column));
356
- }
357
- } catch (IOException e) {
358
- try {
359
- contextBuilder.nullField(column.getName());
360
- } catch (IOException ex) {
361
- throw Throwables.propagate(ex);
362
- }
363
- }
364
- }
365
-
366
- @Override
367
- public void jsonColumn(Column column) {
368
- try {
369
- if (pageReader.isNull(column)) {
370
- contextBuilder.nullField(column.getName());
371
- } else {
372
- contextBuilder.field(column.getName(), pageReader.getJson(column).toJson());
373
- }
374
- } catch (IOException e) {
375
- try {
376
- contextBuilder.nullField(column.getName());
377
- } catch (IOException ex) {
378
- throw Throwables.propagate(ex);
379
- }
380
- }
381
- }
382
-
383
- @Override
384
- public void timestampColumn(Column column) {
385
- try {
386
- if (pageReader.isNull(column)) {
387
- contextBuilder.nullField(column.getName());
388
- } else {
389
- contextBuilder.field(column.getName(), new Date(pageReader.getTimestamp(column).toEpochMilli()));
390
- }
391
- } catch (IOException e) {
392
- try {
393
- contextBuilder.nullField(column.getName());
394
- } catch (IOException ex) {
395
- throw Throwables.propagate(ex);
396
- }
397
- }
398
- }
399
- });
400
-
401
- contextBuilder.endObject();
402
- bulkProcessor.add(newIndexRequest(getIdValue(idColumn)).source(contextBuilder));
403
-
404
- } catch (ConnectionException | IOException e) {
405
- Throwables.propagate(e); // TODO error handling
406
- }
407
- }
408
- }
409
-
410
- /**
411
- * @param inputColumn
412
- * @return
413
- */
414
- private String getIdValue(Column inputColumn) {
415
- if (inputColumn == null) return null;
416
- if (pageReader.isNull(inputColumn)) return null;
417
- String idValue = null;
418
- if (Types.STRING.equals(inputColumn.getType())) {
419
- idValue = pageReader.getString(inputColumn);
420
- } else if (Types.BOOLEAN.equals(inputColumn.getType())) {
421
- idValue = pageReader.getBoolean(inputColumn) + "";
422
- } else if (Types.DOUBLE.equals(inputColumn.getType())) {
423
- idValue = pageReader.getDouble(inputColumn) + "";
424
- } else if (Types.LONG.equals(inputColumn.getType())) {
425
- idValue = pageReader.getLong(inputColumn) + "";
426
- } else if (Types.JSON.equals(inputColumn.getType())) {
427
- idValue = pageReader.getJson(inputColumn).toJson();
428
- } else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
429
- idValue = pageReader.getTimestamp(inputColumn).toString();
430
- } else {
431
- idValue = null;
432
- }
433
- return idValue;
434
- }
435
-
436
- private IndexRequest newIndexRequest(String idValue)
437
- {
438
- return Requests.indexRequest(index).type(type).id(idValue);
439
- }
440
-
441
- @Override
442
- public void finish()
443
- {
444
- try {
445
- bulkProcessor.flush();
446
- } finally {
447
- close();
448
- }
449
- }
450
-
451
- @Override
452
- public void close()
453
- {
454
- if (bulkProcessor != null) {
455
- try {
456
- while (!bulkProcessor.awaitClose(3, TimeUnit.SECONDS)) {
457
- log.debug("wait for closing the bulk processing..");
458
- }
459
- } catch (InterruptedException e) {
460
- Thread.currentThread().interrupt();
461
- }
462
- bulkProcessor = null;
463
- }
464
-
465
- if (client != null) {
466
- client.close(); // ElasticsearchException
467
- client = null;
468
- }
469
- }
470
-
471
- @Override
472
- public void abort()
473
- {
474
- // TODO do nothing
475
- }
476
-
477
- @Override
478
- public TaskReport commit()
479
- {
480
- TaskReport report = Exec.newTaskReport();
481
- // TODO
482
- return report;
483
- }
484
-
485
- }
486
-
487
- public enum Mode
488
- {
489
- INSERT,
490
- REPLACE;
491
-
492
- @JsonValue
493
- @Override
494
- public String toString()
495
- {
496
- return name().toLowerCase(Locale.ENGLISH);
497
- }
498
-
499
- @JsonCreator
500
- public static Mode fromString(String value)
501
- {
502
- switch (value) {
503
- case "insert":
504
- return INSERT;
505
- case "replace":
506
- return REPLACE;
507
- default:
508
- throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are insert, truncate_insert, replace", value));
509
- }
510
- }
511
- }
512
-
513
- private void reAssignAlias(String aliasName, String newIndexName, Client client)
514
- throws IndexNotFoundException, InvalidAliasNameException
515
- {
516
- if (!isExistsAlias(aliasName, client)) {
517
- client.admin().indices().prepareAliases()
518
- .addAlias(newIndexName, aliasName)
519
- .execute().actionGet();
520
- log.info(String.format("Assigned alias[%s] to index[%s]", aliasName, newIndexName));
521
- } else {
522
- List<String> oldIndices = getIndexByAlias(aliasName, client);
523
- client.admin().indices().prepareAliases()
524
- .removeAlias(oldIndices.toArray(new String[oldIndices.size()]), aliasName)
525
- .addAlias(newIndexName, aliasName)
526
- .execute().actionGet();
527
- log.info(String.format("Reassigned alias[%s] from index%s to index[%s]", aliasName, oldIndices, newIndexName));
528
- for (String index : oldIndices) {
529
- deleteIndex(index, client);
530
- }
531
- }
532
- }
533
-
534
- private void deleteIndex(String indexName, Client client)
535
- {
536
- client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
537
- log.info(String.format("Deleted Index [%s]", indexName));
538
- }
539
-
540
- private List<String> getIndexByAlias(String aliasName, Client client)
541
- {
542
- ImmutableOpenMap<String, List<AliasMetaData>> map = client.admin().indices().getAliases(new GetAliasesRequest(aliasName))
543
- .actionGet().getAliases();
544
- List<String> indices = new ArrayList<>();
545
- for (ObjectObjectCursor<String, List<AliasMetaData>> c : map) {
546
- indices.add(c.key);
547
- }
548
-
549
- return indices;
550
- }
551
-
552
- private boolean isExistsAlias(String aliasName, Client client)
553
- {
554
- return client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().hasAlias(aliasName);
555
- }
556
-
557
- private boolean isExistsIndex(String indexName, Client client)
558
- {
559
- return client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().hasIndex(indexName);
560
- }
561
-
562
- private boolean isAlias(String aliasName, Client client)
563
- {
564
- AliasOrIndex aliasOrIndex = client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().getAliasAndIndexLookup().get(aliasName);
565
- return aliasOrIndex != null && aliasOrIndex.isAlias();
566
- }
567
-
568
- public String generateNewIndexName(String indexName)
569
- {
570
- Timestamp time = Exec.getTransactionTime();
571
- return indexName + new SimpleDateFormat("_yyyyMMdd-HHmmss").format(time.toEpochMilli());
572
- }
573
-
574
- public class ConnectionException extends RuntimeException implements UserDataException
575
- {
576
- protected ConnectionException()
577
- {
578
- }
579
-
580
- public ConnectionException(Throwable cause)
581
- {
582
- super(cause);
583
- }
584
- }
585
-
586
12
  }