embulk-output-elasticsearch 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/gradlew CHANGED
@@ -42,11 +42,6 @@ case "`uname`" in
42
42
  ;;
43
43
  esac
44
44
 
45
- # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
- if $cygwin ; then
47
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
- fi
49
-
50
45
  # Attempt to set APP_HOME
51
46
  # Resolve links: $0 may be a link
52
47
  PRG="$0"
@@ -61,9 +56,9 @@ while [ -h "$PRG" ] ; do
61
56
  fi
62
57
  done
63
58
  SAVED="`pwd`"
64
- cd "`dirname \"$PRG\"`/" >&-
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
65
60
  APP_HOME="`pwd -P`"
66
- cd "$SAVED" >&-
61
+ cd "$SAVED" >/dev/null
67
62
 
68
63
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
64
 
@@ -114,6 +109,7 @@ fi
114
109
  if $cygwin ; then
115
110
  APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
111
  CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
113
 
118
114
  # We build the pattern for arguments to be converted via cygpath
119
115
  ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
@@ -0,0 +1,376 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.fasterxml.jackson.core.JsonProcessingException;
4
+ import com.fasterxml.jackson.databind.JsonNode;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
6
+ import com.google.common.annotations.VisibleForTesting;
7
+ import org.eclipse.jetty.client.HttpResponseException;
8
+ import org.eclipse.jetty.client.util.StringContentProvider;
9
+ import org.eclipse.jetty.http.HttpMethod;
10
+ import org.embulk.config.ConfigException;
11
+ import org.embulk.config.UserDataException;
12
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.AuthMethod;
13
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.NodeAddressTask;
14
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask;
15
+ import org.embulk.spi.DataException;
16
+ import org.embulk.spi.Exec;
17
+ import org.embulk.spi.time.Timestamp;
18
+ import org.embulk.util.retryhelper.jetty92.Jetty92RetryHelper;
19
+ import org.embulk.util.retryhelper.jetty92.Jetty92SingleRequester;
20
+ import org.embulk.util.retryhelper.jetty92.StringJetty92ResponseEntityReader;
21
+ import org.slf4j.Logger;
22
+
23
+ import javax.xml.bind.DatatypeConverter;
24
+
25
+ import java.io.IOException;
26
+ import java.text.SimpleDateFormat;
27
+ import java.util.ArrayList;
28
+ import java.util.Arrays;
29
+ import java.util.HashMap;
30
+ import java.util.Iterator;
31
+ import java.util.List;
32
+ import java.util.Locale;
33
+ import java.util.Map;
34
+ import java.util.Random;
35
+
36
+ public class ElasticsearchHttpClient
37
+ {
38
+ private final Logger log;
39
+
40
+ // ALLOW_UNQUOTED_CONTROL_CHARS - Not expected but whether parser will allow JSON Strings to contain unquoted control characters
41
+ // FAIL_ON_UNKNOWN_PROPERTIES - Feature that determines whether encountering of unknown properties
42
+ private final ObjectMapper jsonMapper = new ObjectMapper()
43
+ .configure(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, false)
44
+ .configure(com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
45
+
46
+ // Elasticsearch maximum index byte size
47
+ // public static final int MAX_INDEX_NAME_BYTES = 255;
48
+ // @see https://github.com/elastic/elasticsearch/blob/master/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java#L108
49
+ private final long maxIndexNameBytes = 255;
50
+ private final List<Character> inalidIndexCharaters = Arrays.asList('\\', '/', '*', '?', '"', '<', '>', '|', '#', ' ', ',');
51
+
52
+ public ElasticsearchHttpClient()
53
+ {
54
+ this.log = Exec.getLogger(getClass());
55
+ }
56
+
57
+ public void push(JsonNode records, PluginTask task, Jetty92RetryHelper retryHelper)
58
+ {
59
+ int bulkActions = task.getBulkActions();
60
+ long bulkSize = task.getBulkSize();
61
+ // curl -xPOST localhost:9200/{index}/{type}/_bulk -d '
62
+ // {"index" : {}}\n
63
+ // {"k" : "v"}\n
64
+ // {"index" : {}}\n
65
+ // {"k" : "v2"}\n
66
+ // '
67
+ try {
68
+ String path = String.format("/%s/%s/_bulk", task.getIndex(), task.getType());
69
+ int recordSize = records.size();
70
+ String idColumn = task.getId().orNull();
71
+ if (recordSize > 0) {
72
+ StringBuilder sb = new StringBuilder();
73
+ for (JsonNode record : records) {
74
+ sb.append(createIndexRequest(idColumn, record));
75
+
76
+ String requestString = jsonMapper.writeValueAsString(record);
77
+ sb.append("\n")
78
+ .append(requestString)
79
+ .append("\n");
80
+ }
81
+ sendRequest(path, HttpMethod.POST, task, retryHelper, sb.toString());
82
+ }
83
+ }
84
+ catch (JsonProcessingException ex) {
85
+ throw new DataException(ex);
86
+ }
87
+ }
88
+
89
+ public List<String> getIndexByAlias(String aliasName, PluginTask task, Jetty92RetryHelper retryHelper)
90
+ {
91
+ // curl -XGET localhost:9200/_alias/{alias}
92
+ // No alias: 404
93
+ // Alias found: {"embulk_20161018-183738":{"aliases":{"embulk":{}}}}
94
+ List<String> indices = new ArrayList<>();
95
+ String path = String.format("/_alias/%s", aliasName);
96
+ JsonNode response = sendRequest(path, HttpMethod.GET, task, retryHelper);
97
+
98
+ Iterator it = response.fieldNames();
99
+ while (it.hasNext()) {
100
+ indices.add(it.next().toString());
101
+ }
102
+
103
+ return indices;
104
+ }
105
+
106
+ public boolean isIndexExisting(String indexName, PluginTask task, Jetty92RetryHelper retryHelper)
107
+ {
108
+ // curl -XGET localhost:9200/{index}
109
+ // No index: 404
110
+ // Index found: 200
111
+ try {
112
+ sendRequest(indexName, HttpMethod.GET, task, retryHelper);
113
+ return true;
114
+ }
115
+ catch (ResourceNotFoundException ex) {
116
+ return false;
117
+ }
118
+ }
119
+
120
+ public String generateNewIndexName(String indexName)
121
+ {
122
+ Timestamp time = Exec.getTransactionTime();
123
+ return indexName + new SimpleDateFormat("_yyyyMMdd-HHmmss").format(time.toEpochMilli());
124
+ }
125
+
126
+ public boolean isAliasExisting(String aliasName, PluginTask task, Jetty92RetryHelper retryHelper)
127
+ {
128
+ // curl -XGET localhost:9200/_aliases // List all aliases
129
+ // No aliases: {}
130
+ // Aliases found: {"embulk_20161018-183738":{"aliases":{"embulk":{}}}}
131
+ JsonNode response = sendRequest("/_aliases", HttpMethod.GET, task, retryHelper);
132
+ if (response.size() == 0) {
133
+ return false;
134
+ }
135
+ for (JsonNode index : response) {
136
+ if (index.has("aliases") && index.get("aliases").has(aliasName)) {
137
+ return true;
138
+ }
139
+ }
140
+ return false;
141
+ }
142
+
143
+ // Should be called just once while Embulk transaction.
144
+ // Be sure to call after all exporting tasks completed
145
+ // This method will delete existing index
146
+ public void reassignAlias(String aliasName, String newIndexName, PluginTask task, Jetty92RetryHelper retryHelper)
147
+ {
148
+ if (!isAliasExisting(aliasName, task, retryHelper)) {
149
+ assignAlias(newIndexName, aliasName, task, retryHelper);
150
+ }
151
+ else {
152
+ List<String> oldIndices = getIndexByAlias(aliasName, task, retryHelper);
153
+ assignAlias(newIndexName, aliasName, task, retryHelper);
154
+ for (String index : oldIndices) {
155
+ deleteIndex(index, task, retryHelper);
156
+ }
157
+ }
158
+ }
159
+
160
+ public String getEsVersion(PluginTask task, Jetty92RetryHelper retryHelper)
161
+ {
162
+ // curl -XGET 'http://localhost:9200’
163
+ JsonNode response = sendRequest("", HttpMethod.GET, task, retryHelper);
164
+ return response.get("version").get("number").asText();
165
+ }
166
+
167
+ public void validateIndexOrAliasName(String index, String type)
168
+ {
169
+ for (int i = 0; i < index.length(); i++) {
170
+ if (inalidIndexCharaters.contains(index.charAt(i))) {
171
+ throw new ConfigException(String.format("%s '%s' must not contain the invalid characters " + inalidIndexCharaters.toString(), type, index));
172
+ }
173
+ }
174
+
175
+ if (!index.toLowerCase(Locale.ROOT).equals(index)) {
176
+ throw new ConfigException(String.format("%s '%s' must be lowercase", type, index));
177
+ }
178
+
179
+ if (index.startsWith("_") || index.startsWith("-") || index.startsWith("+")) {
180
+ throw new ConfigException(String.format("%s '%s' must not start with '_', '-', or '+'", type, index));
181
+ }
182
+
183
+ if (index.length() > maxIndexNameBytes) {
184
+ throw new ConfigException(String.format("%s name is too long, (%s > %s)", type, index.length(), maxIndexNameBytes));
185
+ }
186
+
187
+ if (index.equals(".") || index.equals("..")) {
188
+ throw new ConfigException("index must not be '.' or '..'");
189
+ }
190
+ }
191
+
192
+ private String createIndexRequest(String idColumn, JsonNode record) throws JsonProcessingException
193
+ {
194
+ // index name and type are set at path("/{index}/{type}"). So no need to set
195
+ if (idColumn != null && record.hasNonNull(idColumn)) {
196
+ // {"index" : {"_id" : "v"}}
197
+ Map<String, Map> indexRequest = new HashMap<>();
198
+
199
+ Map<String, JsonNode> idRequest = new HashMap<>();
200
+ idRequest.put("_id", record.get(idColumn));
201
+
202
+ indexRequest.put("index", idRequest);
203
+ return jsonMapper.writeValueAsString(indexRequest);
204
+ }
205
+ else {
206
+ // {"index" : {}}
207
+ return "{\"index\" : {}}";
208
+ }
209
+ }
210
+
211
+ private void assignAlias(String indexName, String aliasName, PluginTask task, Jetty92RetryHelper retryHelper)
212
+ {
213
+ try {
214
+ if (isIndexExisting(indexName, task, retryHelper)) {
215
+ if (isAliasExisting(aliasName, task, retryHelper)) {
216
+ // curl -XPUT http://localhost:9200/_alias -d\
217
+ // "actions" : [
218
+ // {"remove" : {"alias" : "{alias}", "index" : "{index_old}"}},
219
+ // {"add" : {"alias": "{alias}", "index": "{index_new}"}}
220
+ // ]
221
+ // Success: {"acknowledged":true}
222
+ List<String> oldIndices = getIndexByAlias(aliasName, task, retryHelper);
223
+
224
+ Map<String, String> newAlias = new HashMap<>();
225
+ newAlias.put("alias", aliasName);
226
+ newAlias.put("index", indexName);
227
+ Map<String, Map> add = new HashMap<>();
228
+ add.put("add", newAlias);
229
+
230
+ Map<String, String> oldAlias = new HashMap<>();
231
+ // TODO multiple alias?
232
+ for (String oldIndex : oldIndices) {
233
+ oldAlias.put("alias", aliasName);
234
+ oldAlias.put("index", oldIndex);
235
+ }
236
+ Map<String, Map> remove = new HashMap<>();
237
+ remove.put("remove", oldAlias);
238
+
239
+ List<Map<String, Map>> actions = new ArrayList<>();
240
+ actions.add(remove);
241
+ actions.add(add);
242
+ Map<String, List> rootTree = new HashMap<>();
243
+ rootTree.put("actions", actions);
244
+
245
+ String content = jsonMapper.writeValueAsString(rootTree);
246
+ sendRequest("/_aliases", HttpMethod.POST, task, retryHelper, content);
247
+ log.info("Reassigned alias [{}] to index[{}]", aliasName, indexName);
248
+ }
249
+ else {
250
+ // curl -XPUT http://localhost:9200/{index}/_alias/{alias}
251
+ // Success: {"acknowledged":true}
252
+ String path = String.format("/%s/_alias/%s", indexName, aliasName);
253
+ sendRequest(path, HttpMethod.PUT, task, retryHelper);
254
+ log.info("Assigned alias [{}] to Index [{}]", aliasName, indexName);
255
+ }
256
+ }
257
+ }
258
+ catch (JsonProcessingException ex) {
259
+ throw new ConfigException(String.format("Failed to assign alias[%s] to index[%s]", aliasName, indexName));
260
+ }
261
+ }
262
+
263
+ private void deleteIndex(String indexName, PluginTask task, Jetty92RetryHelper retryHelper)
264
+ {
265
+ // curl -XDELETE localhost:9200/{index}
266
+ // Success: {"acknowledged":true}
267
+ if (isIndexExisting(indexName, task, retryHelper)) {
268
+ sendRequest(indexName, HttpMethod.DELETE, task, retryHelper);
269
+ log.info("Deleted Index [{}]", indexName);
270
+ }
271
+ }
272
+
273
+ private JsonNode sendRequest(String path, final HttpMethod method, PluginTask task, Jetty92RetryHelper retryHelper)
274
+ {
275
+ return sendRequest(path, method, task, retryHelper, "");
276
+ }
277
+
278
+ private JsonNode sendRequest(String path, final HttpMethod method, PluginTask task, Jetty92RetryHelper retryHelper, final String content)
279
+ {
280
+ final String uri = createRequestUri(task, path);
281
+ final String authorizationHeader = getAuthorizationHeader(task);
282
+
283
+ try {
284
+ String responseBody = retryHelper.requestWithRetry(
285
+ new StringJetty92ResponseEntityReader(task.getTimeoutMills()),
286
+ new Jetty92SingleRequester() {
287
+ @Override
288
+ public void requestOnce(org.eclipse.jetty.client.HttpClient client, org.eclipse.jetty.client.api.Response.Listener responseListener)
289
+ {
290
+ org.eclipse.jetty.client.api.Request request = client
291
+ .newRequest(uri)
292
+ .accept("application/json")
293
+ .method(method);
294
+ if (method == HttpMethod.POST) {
295
+ request.content(new StringContentProvider(content), "application/json");
296
+ }
297
+
298
+ if (!authorizationHeader.isEmpty()) {
299
+ request.header("Authorization", authorizationHeader);
300
+ }
301
+ request.send(responseListener);
302
+ }
303
+
304
+ @Override
305
+ public boolean isResponseStatusToRetry(org.eclipse.jetty.client.api.Response response)
306
+ {
307
+ int status = response.getStatus();
308
+ if (status == 429) {
309
+ return true; // Retry if 429.
310
+ }
311
+ return status / 100 != 4; // Retry unless 4xx except for 429.
312
+ }
313
+ });
314
+ return parseJson(responseBody);
315
+ }
316
+ catch (HttpResponseException ex) {
317
+ if (ex.getMessage().startsWith("Response not 2xx: 404 Not Found")) {
318
+ throw new ResourceNotFoundException(ex);
319
+ }
320
+ throw ex;
321
+ }
322
+ }
323
+
324
+ private String createRequestUri(PluginTask task, String path)
325
+ {
326
+ if (!path.startsWith("/")) {
327
+ path = "/" + path;
328
+ }
329
+ String protocol = task.getUseSsl() ? "https" : "http";
330
+ String nodeAddress = getRandomNodeAddress(task);
331
+ return String.format("%s://%s%s", protocol, nodeAddress, path);
332
+ }
333
+
334
+ // Return node address (RoundRobin)
335
+ private String getRandomNodeAddress(PluginTask task)
336
+ {
337
+ List<NodeAddressTask> nodes = task.getNodes();
338
+ Random random = new Random();
339
+ int index = random.nextInt(nodes.size());
340
+ NodeAddressTask node = nodes.get(index);
341
+ return node.getHost() + ":" + node.getPort();
342
+ }
343
+
344
+ private JsonNode parseJson(final String json) throws DataException
345
+ {
346
+ try {
347
+ return this.jsonMapper.readTree(json);
348
+ }
349
+ catch (IOException ex) {
350
+ throw new DataException(ex);
351
+ }
352
+ }
353
+
354
+ @VisibleForTesting
355
+ protected String getAuthorizationHeader(PluginTask task)
356
+ {
357
+ String header = "";
358
+ if (task.getAuthMethod() == AuthMethod.BASIC) {
359
+ String authString = task.getUser().get() + ":" + task.getPassword().get();
360
+ header = "Basic " + DatatypeConverter.printBase64Binary(authString.getBytes());
361
+ }
362
+ return header;
363
+ }
364
+
365
+ public class ResourceNotFoundException extends RuntimeException implements UserDataException
366
+ {
367
+ protected ResourceNotFoundException()
368
+ {
369
+ }
370
+
371
+ public ResourceNotFoundException(Throwable cause)
372
+ {
373
+ super(cause);
374
+ }
375
+ }
376
+ }
@@ -1,586 +1,12 @@
1
1
  package org.embulk.output.elasticsearch;
2
2
 
3
- import java.io.IOException;
4
- import java.text.SimpleDateFormat;
5
- import java.util.ArrayList;
6
- import java.util.Date;
7
- import java.util.List;
8
- import java.util.Locale;
9
- import java.util.concurrent.TimeUnit;
10
- import java.net.InetAddress;
11
- import java.net.UnknownHostException;
12
-
13
- import com.fasterxml.jackson.annotation.JsonCreator;
14
- import com.fasterxml.jackson.annotation.JsonValue;
15
-
16
- import org.elasticsearch.action.bulk.BulkItemResponse;
17
- import org.elasticsearch.action.bulk.BulkProcessor;
18
- import org.elasticsearch.action.bulk.BulkRequest;
19
- import org.elasticsearch.action.bulk.BulkResponse;
20
- import org.elasticsearch.action.index.IndexRequest;
21
- import org.elasticsearch.client.Client;
22
- import org.elasticsearch.client.Requests;
23
- import org.elasticsearch.client.transport.NoNodeAvailableException;
24
- import org.elasticsearch.client.transport.TransportClient;
25
- import org.elasticsearch.cluster.metadata.AliasMetaData;
26
- import org.elasticsearch.cluster.metadata.AliasOrIndex;
27
- import org.elasticsearch.common.collect.ImmutableOpenMap;
28
- import org.elasticsearch.common.settings.Settings;
29
- import org.elasticsearch.common.transport.InetSocketTransportAddress;
30
- import org.elasticsearch.common.unit.ByteSizeValue;
31
- import org.elasticsearch.common.xcontent.XContentBuilder;
32
- import org.elasticsearch.common.xcontent.XContentFactory;
33
- import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
34
- import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
35
- import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
36
- import org.elasticsearch.index.IndexNotFoundException;
37
- import org.elasticsearch.indices.InvalidAliasNameException;
38
-
39
- import org.embulk.config.Config;
40
- import org.embulk.config.ConfigDefault;
41
- import org.embulk.config.ConfigDiff;
42
- import org.embulk.config.ConfigException;
43
- import org.embulk.config.ConfigSource;
44
- import org.embulk.config.Task;
45
- import org.embulk.config.TaskReport;
46
- import org.embulk.config.TaskSource;
47
- import org.embulk.config.UserDataException;
48
- import org.embulk.spi.Column;
49
- import org.embulk.spi.ColumnVisitor;
50
- import org.embulk.spi.Exec;
51
- import org.embulk.spi.OutputPlugin;
52
- import org.embulk.spi.Page;
53
- import org.embulk.spi.PageReader;
54
- import org.embulk.spi.Schema;
55
- import org.embulk.spi.TransactionalPageOutput;
56
- import org.embulk.spi.time.Timestamp;
57
- import org.embulk.spi.type.Types;
58
- import org.slf4j.Logger;
59
-
60
- import com.google.common.base.Optional;
61
- import com.google.common.base.Throwables;
62
- import com.google.inject.Inject;
63
- import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
3
+ import org.embulk.base.restclient.RestClientOutputPluginBase;
64
4
 
65
5
  public class ElasticsearchOutputPlugin
66
- implements OutputPlugin
6
+ extends RestClientOutputPluginBase<ElasticsearchOutputPluginDelegate.PluginTask>
67
7
  {
68
- public interface NodeAddressTask
69
- extends Task
70
- {
71
- @Config("host")
72
- public String getHost();
73
-
74
- @Config("port")
75
- @ConfigDefault("9300")
76
- public int getPort();
77
- }
78
-
79
- public interface PluginTask
80
- extends Task
81
- {
82
- @Config("mode")
83
- @ConfigDefault("\"insert\"")
84
- public Mode getMode();
85
-
86
- @Config("nodes")
87
- public List<NodeAddressTask> getNodes();
88
-
89
- @Config("cluster_name")
90
- @ConfigDefault("\"elasticsearch\"")
91
- public String getClusterName();
92
-
93
- @Config("index")
94
- public String getIndex();
95
- public void setIndex(String indexName);
96
-
97
- @Config("alias")
98
- @ConfigDefault("null")
99
- public Optional<String> getAlias();
100
- public void setAlias(Optional<String> aliasName);
101
-
102
- @Config("index_type")
103
- public String getType();
104
-
105
- @Config("id")
106
- @ConfigDefault("null")
107
- public Optional<String> getId();
108
-
109
- @Config("bulk_actions")
110
- @ConfigDefault("1000")
111
- public int getBulkActions();
112
-
113
- @Config("bulk_size")
114
- @ConfigDefault("5242880")
115
- public long getBulkSize();
116
-
117
- @Config("concurrent_requests")
118
- @ConfigDefault("5")
119
- public int getConcurrentRequests();
120
- }
121
-
122
- private final Logger log;
123
-
124
- @Inject
125
8
  public ElasticsearchOutputPlugin()
126
9
  {
127
- log = Exec.getLogger(getClass());
10
+ super(ElasticsearchOutputPluginDelegate.PluginTask.class, new ElasticsearchOutputPluginDelegate());
128
11
  }
129
-
130
- @Override
131
- public ConfigDiff transaction(ConfigSource config, Schema schema,
132
- int processorCount, Control control)
133
- {
134
- final PluginTask task = config.loadConfig(PluginTask.class);
135
-
136
- // confirm that a client can be initialized
137
- try (Client client = createClient(task)) {
138
- log.info(String.format("Executing plugin with '%s' mode.", task.getMode()));
139
- if (task.getMode().equals(Mode.REPLACE)) {
140
- task.setAlias(Optional.of(task.getIndex()));
141
- task.setIndex(generateNewIndexName(task.getIndex()));
142
- if (isExistsIndex(task.getAlias().orNull(), client) && !isAlias(task.getAlias().orNull(), client)) {
143
- throw new ConfigException(String.format("Invalid alias name [%s], an index exists with the same name as the alias", task.getAlias().orNull()));
144
- }
145
- }
146
- log.info(String.format("Inserting data into index[%s]", task.getIndex()));
147
- control.run(task.dump());
148
-
149
- if (task.getMode().equals(Mode.REPLACE)) {
150
- try {
151
- reAssignAlias(task.getAlias().orNull(), task.getIndex(), client);
152
- } catch (IndexNotFoundException | InvalidAliasNameException e) {
153
- throw new ConfigException(e);
154
- } catch (NoNodeAvailableException e) {
155
- throw new ConnectionException(e);
156
- }
157
- }
158
- } catch (Exception e) {
159
- throw Throwables.propagate(e);
160
- }
161
-
162
- ConfigDiff nextConfig = Exec.newConfigDiff();
163
- return nextConfig;
164
- }
165
-
166
- @Override
167
- public ConfigDiff resume(TaskSource taskSource,
168
- Schema schema, int processorCount,
169
- OutputPlugin.Control control)
170
- {
171
- // TODO
172
- return Exec.newConfigDiff();
173
- }
174
-
175
- @Override
176
- public void cleanup(TaskSource taskSource,
177
- Schema schema, int processorCount,
178
- List<TaskReport> successTaskReports)
179
- {}
180
-
181
- private Client createClient(final PluginTask task)
182
- {
183
- // @see http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/client.html
184
- Settings settings = Settings.settingsBuilder()
185
- .put("cluster.name", task.getClusterName())
186
- .build();
187
- TransportClient client = TransportClient.builder().settings(settings).build();
188
- List<NodeAddressTask> nodes = task.getNodes();
189
- for (NodeAddressTask node : nodes) {
190
- try {
191
- client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(node.getHost()), node.getPort()));
192
- } catch (UnknownHostException | NoNodeAvailableException e) {
193
- throw new ConnectionException(e);
194
- }
195
- }
196
- return client;
197
- }
198
-
199
- private BulkProcessor newBulkProcessor(final PluginTask task, final Client client)
200
- {
201
- return BulkProcessor.builder(client, new BulkProcessor.Listener() {
202
- @Override
203
- public void beforeBulk(long executionId, BulkRequest request)
204
- {
205
- log.info("Execute {} bulk actions", request.numberOfActions());
206
- }
207
-
208
- @Override
209
- public void afterBulk(long executionId, BulkRequest request, BulkResponse response)
210
- {
211
- if (response.hasFailures()) {
212
- long items = 0;
213
- if (log.isDebugEnabled()) {
214
- for (BulkItemResponse item : response.getItems()) {
215
- if (item.isFailed()) {
216
- items += 1;
217
- log.debug(" Error for {}/{}/{} for {} operation: {}",
218
- item.getIndex(), item.getType(), item.getId(),
219
- item.getOpType(), item.getFailureMessage());
220
- }
221
- }
222
- }
223
- log.warn("{} bulk actions failed: {}", items, response.buildFailureMessage());
224
- } else {
225
- log.info("{} bulk actions succeeded", request.numberOfActions());
226
- }
227
- }
228
-
229
- @Override
230
- public void afterBulk(long executionId, BulkRequest request, Throwable failure)
231
- {
232
- if (failure.getClass() == NoNodeAvailableException.class) {
233
- log.error("Got the error during bulk processing", failure);
234
- throw new ConnectionException(failure);
235
- } else {
236
- log.warn("Got the error during bulk processing", failure);
237
- }
238
- }
239
- }).setBulkActions(task.getBulkActions())
240
- .setBulkSize(new ByteSizeValue(task.getBulkSize()))
241
- .setConcurrentRequests(task.getConcurrentRequests())
242
- .build();
243
- }
244
-
245
- @Override
246
- public TransactionalPageOutput open(TaskSource taskSource, Schema schema,
247
- int processorIndex)
248
- {
249
- final PluginTask task = taskSource.loadTask(PluginTask.class);
250
- Client client = createClient(task);
251
- BulkProcessor bulkProcessor = newBulkProcessor(task, client);
252
- ElasticsearchPageOutput pageOutput = new ElasticsearchPageOutput(task, client, bulkProcessor);
253
- pageOutput.open(schema);
254
- return pageOutput;
255
- }
256
-
257
- public static class ElasticsearchPageOutput implements TransactionalPageOutput
258
- {
259
- private Logger log;
260
-
261
- private Client client;
262
- private BulkProcessor bulkProcessor;
263
-
264
- private PageReader pageReader;
265
- private Column idColumn;
266
-
267
- private final String index;
268
- private final String type;
269
- private final String id;
270
-
271
- public ElasticsearchPageOutput(PluginTask task, Client client, BulkProcessor bulkProcessor)
272
- {
273
- this.log = Exec.getLogger(getClass());
274
-
275
- this.client = client;
276
- this.bulkProcessor = bulkProcessor;
277
-
278
- this.index = task.getIndex();
279
- this.type = task.getType();
280
- this.id = task.getId().orNull();
281
- }
282
-
283
- void open(final Schema schema)
284
- {
285
- pageReader = new PageReader(schema);
286
- idColumn = (id == null) ? null : schema.lookupColumn(id);
287
- }
288
-
289
- @Override
290
- public void add(Page page)
291
- {
292
- pageReader.setPage(page);
293
-
294
- while (pageReader.nextRecord()) {
295
- try {
296
- final XContentBuilder contextBuilder = XContentFactory.jsonBuilder().startObject(); // TODO reusable??
297
- pageReader.getSchema().visitColumns(new ColumnVisitor() {
298
- @Override
299
- public void booleanColumn(Column column) {
300
- try {
301
- if (pageReader.isNull(column)) {
302
- contextBuilder.nullField(column.getName());
303
- } else {
304
- contextBuilder.field(column.getName(), pageReader.getBoolean(column));
305
- }
306
- } catch (IOException e) {
307
- try {
308
- contextBuilder.nullField(column.getName());
309
- } catch (IOException ex) {
310
- throw Throwables.propagate(ex);
311
- }
312
- }
313
- }
314
-
315
- @Override
316
- public void longColumn(Column column) {
317
- try {
318
- if (pageReader.isNull(column)) {
319
- contextBuilder.nullField(column.getName());
320
- } else {
321
- contextBuilder.field(column.getName(), pageReader.getLong(column));
322
- }
323
- } catch (IOException e) {
324
- try {
325
- contextBuilder.nullField(column.getName());
326
- } catch (IOException ex) {
327
- throw Throwables.propagate(ex);
328
- }
329
- }
330
- }
331
-
332
- @Override
333
- public void doubleColumn(Column column) {
334
- try {
335
- if (pageReader.isNull(column)) {
336
- contextBuilder.nullField(column.getName());
337
- } else {
338
- contextBuilder.field(column.getName(), pageReader.getDouble(column));
339
- }
340
- } catch (IOException e) {
341
- try {
342
- contextBuilder.nullField(column.getName());
343
- } catch (IOException ex) {
344
- throw Throwables.propagate(ex);
345
- }
346
- }
347
- }
348
-
349
- @Override
350
- public void stringColumn(Column column) {
351
- try {
352
- if (pageReader.isNull(column)) {
353
- contextBuilder.nullField(column.getName());
354
- } else {
355
- contextBuilder.field(column.getName(), pageReader.getString(column));
356
- }
357
- } catch (IOException e) {
358
- try {
359
- contextBuilder.nullField(column.getName());
360
- } catch (IOException ex) {
361
- throw Throwables.propagate(ex);
362
- }
363
- }
364
- }
365
-
366
- @Override
367
- public void jsonColumn(Column column) {
368
- try {
369
- if (pageReader.isNull(column)) {
370
- contextBuilder.nullField(column.getName());
371
- } else {
372
- contextBuilder.field(column.getName(), pageReader.getJson(column).toJson());
373
- }
374
- } catch (IOException e) {
375
- try {
376
- contextBuilder.nullField(column.getName());
377
- } catch (IOException ex) {
378
- throw Throwables.propagate(ex);
379
- }
380
- }
381
- }
382
-
383
- @Override
384
- public void timestampColumn(Column column) {
385
- try {
386
- if (pageReader.isNull(column)) {
387
- contextBuilder.nullField(column.getName());
388
- } else {
389
- contextBuilder.field(column.getName(), new Date(pageReader.getTimestamp(column).toEpochMilli()));
390
- }
391
- } catch (IOException e) {
392
- try {
393
- contextBuilder.nullField(column.getName());
394
- } catch (IOException ex) {
395
- throw Throwables.propagate(ex);
396
- }
397
- }
398
- }
399
- });
400
-
401
- contextBuilder.endObject();
402
- bulkProcessor.add(newIndexRequest(getIdValue(idColumn)).source(contextBuilder));
403
-
404
- } catch (ConnectionException | IOException e) {
405
- Throwables.propagate(e); // TODO error handling
406
- }
407
- }
408
- }
409
-
410
- /**
411
- * @param inputColumn
412
- * @return
413
- */
414
- private String getIdValue(Column inputColumn) {
415
- if (inputColumn == null) return null;
416
- if (pageReader.isNull(inputColumn)) return null;
417
- String idValue = null;
418
- if (Types.STRING.equals(inputColumn.getType())) {
419
- idValue = pageReader.getString(inputColumn);
420
- } else if (Types.BOOLEAN.equals(inputColumn.getType())) {
421
- idValue = pageReader.getBoolean(inputColumn) + "";
422
- } else if (Types.DOUBLE.equals(inputColumn.getType())) {
423
- idValue = pageReader.getDouble(inputColumn) + "";
424
- } else if (Types.LONG.equals(inputColumn.getType())) {
425
- idValue = pageReader.getLong(inputColumn) + "";
426
- } else if (Types.JSON.equals(inputColumn.getType())) {
427
- idValue = pageReader.getJson(inputColumn).toJson();
428
- } else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
429
- idValue = pageReader.getTimestamp(inputColumn).toString();
430
- } else {
431
- idValue = null;
432
- }
433
- return idValue;
434
- }
435
-
436
- private IndexRequest newIndexRequest(String idValue)
437
- {
438
- return Requests.indexRequest(index).type(type).id(idValue);
439
- }
440
-
441
- @Override
442
- public void finish()
443
- {
444
- try {
445
- bulkProcessor.flush();
446
- } finally {
447
- close();
448
- }
449
- }
450
-
451
- @Override
452
- public void close()
453
- {
454
- if (bulkProcessor != null) {
455
- try {
456
- while (!bulkProcessor.awaitClose(3, TimeUnit.SECONDS)) {
457
- log.debug("wait for closing the bulk processing..");
458
- }
459
- } catch (InterruptedException e) {
460
- Thread.currentThread().interrupt();
461
- }
462
- bulkProcessor = null;
463
- }
464
-
465
- if (client != null) {
466
- client.close(); // ElasticsearchException
467
- client = null;
468
- }
469
- }
470
-
471
- @Override
472
- public void abort()
473
- {
474
- // TODO do nothing
475
- }
476
-
477
- @Override
478
- public TaskReport commit()
479
- {
480
- TaskReport report = Exec.newTaskReport();
481
- // TODO
482
- return report;
483
- }
484
-
485
- }
486
-
487
- public enum Mode
488
- {
489
- INSERT,
490
- REPLACE;
491
-
492
- @JsonValue
493
- @Override
494
- public String toString()
495
- {
496
- return name().toLowerCase(Locale.ENGLISH);
497
- }
498
-
499
- @JsonCreator
500
- public static Mode fromString(String value)
501
- {
502
- switch (value) {
503
- case "insert":
504
- return INSERT;
505
- case "replace":
506
- return REPLACE;
507
- default:
508
- throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are insert, truncate_insert, replace", value));
509
- }
510
- }
511
- }
512
-
513
- private void reAssignAlias(String aliasName, String newIndexName, Client client)
514
- throws IndexNotFoundException, InvalidAliasNameException
515
- {
516
- if (!isExistsAlias(aliasName, client)) {
517
- client.admin().indices().prepareAliases()
518
- .addAlias(newIndexName, aliasName)
519
- .execute().actionGet();
520
- log.info(String.format("Assigned alias[%s] to index[%s]", aliasName, newIndexName));
521
- } else {
522
- List<String> oldIndices = getIndexByAlias(aliasName, client);
523
- client.admin().indices().prepareAliases()
524
- .removeAlias(oldIndices.toArray(new String[oldIndices.size()]), aliasName)
525
- .addAlias(newIndexName, aliasName)
526
- .execute().actionGet();
527
- log.info(String.format("Reassigned alias[%s] from index%s to index[%s]", aliasName, oldIndices, newIndexName));
528
- for (String index : oldIndices) {
529
- deleteIndex(index, client);
530
- }
531
- }
532
- }
533
-
534
- private void deleteIndex(String indexName, Client client)
535
- {
536
- client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
537
- log.info(String.format("Deleted Index [%s]", indexName));
538
- }
539
-
540
- private List<String> getIndexByAlias(String aliasName, Client client)
541
- {
542
- ImmutableOpenMap<String, List<AliasMetaData>> map = client.admin().indices().getAliases(new GetAliasesRequest(aliasName))
543
- .actionGet().getAliases();
544
- List<String> indices = new ArrayList<>();
545
- for (ObjectObjectCursor<String, List<AliasMetaData>> c : map) {
546
- indices.add(c.key);
547
- }
548
-
549
- return indices;
550
- }
551
-
552
- private boolean isExistsAlias(String aliasName, Client client)
553
- {
554
- return client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().hasAlias(aliasName);
555
- }
556
-
557
- private boolean isExistsIndex(String indexName, Client client)
558
- {
559
- return client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().hasIndex(indexName);
560
- }
561
-
562
- private boolean isAlias(String aliasName, Client client)
563
- {
564
- AliasOrIndex aliasOrIndex = client.admin().cluster().state(new ClusterStateRequest()).actionGet().getState().getMetaData().getAliasAndIndexLookup().get(aliasName);
565
- return aliasOrIndex != null && aliasOrIndex.isAlias();
566
- }
567
-
568
- public String generateNewIndexName(String indexName)
569
- {
570
- Timestamp time = Exec.getTransactionTime();
571
- return indexName + new SimpleDateFormat("_yyyyMMdd-HHmmss").format(time.toEpochMilli());
572
- }
573
-
574
- public class ConnectionException extends RuntimeException implements UserDataException
575
- {
576
- protected ConnectionException()
577
- {
578
- }
579
-
580
- public ConnectionException(Throwable cause)
581
- {
582
- super(cause);
583
- }
584
- }
585
-
586
12
  }