embulk-filter-copy 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/CHANGELOG.md +3 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +67 -0
  6. data/build.gradle +104 -0
  7. data/config/checkstyle/checkstyle.xml +128 -0
  8. data/config/checkstyle/default.xml +108 -0
  9. data/example/config.yml +28 -0
  10. data/example/data.tsv +5 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  13. data/gradlew +169 -0
  14. data/gradlew.bat +84 -0
  15. data/lib/embulk/filter/copy.rb +8 -0
  16. data/settings.gradle +1 -0
  17. data/src/main/java/org/embulk/filter/copy/CopyFilterPlugin.java +149 -0
  18. data/src/main/java/org/embulk/filter/copy/forward/ForwardBaseTask.java +17 -0
  19. data/src/main/java/org/embulk/filter/copy/forward/InForwardEventReader.java +147 -0
  20. data/src/main/java/org/embulk/filter/copy/forward/InForwardService.java +187 -0
  21. data/src/main/java/org/embulk/filter/copy/forward/InForwardVisitor.java +63 -0
  22. data/src/main/java/org/embulk/filter/copy/forward/OutForwardEventBuilder.java +135 -0
  23. data/src/main/java/org/embulk/filter/copy/forward/OutForwardService.java +170 -0
  24. data/src/main/java/org/embulk/filter/copy/forward/OutForwardVisitor.java +63 -0
  25. data/src/main/java/org/embulk/filter/copy/plugin/InternalForwardInputPlugin.java +111 -0
  26. data/src/main/java/org/embulk/filter/copy/service/EmbulkExecutorService.java +111 -0
  27. data/src/main/java/org/embulk/filter/copy/service/StandardColumnVisitor.java +64 -0
  28. data/src/main/java/org/embulk/filter/copy/util/ElapsedTime.java +165 -0
  29. data/src/test/java/org/embulk/filter/copy/TestCopyFilterPlugin.java +5 -0
  30. data/src/test/java/org/embulk/filter/copy/plugin/TestInternalForwardInputPlugin.java +5 -0
  31. metadata +111 -0
@@ -0,0 +1,170 @@
1
+ package org.embulk.filter.copy.forward;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.collect.Maps;
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.spi.Exec;
8
+ import org.komamitsu.fluency.Fluency;
9
+ import org.slf4j.Logger;
10
+
11
+ import java.io.IOException;
12
+ import java.util.Map;
13
+
14
+ public class OutForwardService
15
+ {
16
+ private final static Logger logger = Exec.getLogger(OutForwardService.class);
17
+
18
+ public interface OutForwardTask
19
+ extends org.embulk.config.Task
20
+ {
21
+ @Config("host")
22
+ @ConfigDefault("\"localhost\"")
23
+ String getHost();
24
+
25
+ @Config("port")
26
+ @ConfigDefault("24224")
27
+ int getPort();
28
+
29
+ @Config("max_buffer_size")
30
+ @ConfigDefault("null")
31
+ Optional<Long> getMaxBufferSize();
32
+
33
+ @Config("buffer_chunk_initial_size")
34
+ @ConfigDefault("null")
35
+ Optional<Integer> getBufferChunkInitialSize();
36
+
37
+ @Config("buffer_chunk_retention_size")
38
+ @ConfigDefault("null")
39
+ Optional<Integer> getBufferChunkRetentionSize();
40
+
41
+ @Config("flush_interval_millis")
42
+ @ConfigDefault("null")
43
+ Optional<Integer> getFlushIntervalMillis();
44
+
45
+ @Config("sender_max_retry_count")
46
+ @ConfigDefault("null")
47
+ Optional<Integer> getSenderMaxRetryCount();
48
+
49
+ @Config("ack_response_mode")
50
+ @ConfigDefault("null")
51
+ Optional<Boolean> getAckResponseMode();
52
+
53
+ @Config("file_backup_dir")
54
+ @ConfigDefault("null")
55
+ Optional<String> getFileBackupDir();
56
+
57
+ @Config("wait_until_buffer_flushed")
58
+ @ConfigDefault("null")
59
+ Optional<Integer> getWaitUntilBufferFlushed();
60
+
61
+ @Config("wait_until_flusher_terminated")
62
+ @ConfigDefault("null")
63
+ Optional<Integer> getWaitUntilFlusherTerminated();
64
+ }
65
+
66
+ public interface Task
67
+ extends ForwardBaseTask
68
+ {
69
+ @Config("out_forward")
70
+ @ConfigDefault("{}")
71
+ OutForwardTask getOutForwardTask();
72
+ }
73
+
74
+ public static void sendShutdownMessage(Task task)
75
+ {
76
+ logger.info("out_forward: send shutdown message.");
77
+ OutForwardService outForward = new OutForwardService(task);
78
+ outForward.emit(task.getShutdownTag(), Maps.newHashMap());
79
+ outForward.finish();
80
+ outForward.close();
81
+ }
82
+
83
+ private final Task task;
84
+ private final Fluency client;
85
+
86
+ public OutForwardService(Task task)
87
+ {
88
+ this.task = task;
89
+ this.client = newFluency(task.getOutForwardTask());
90
+ }
91
+
92
+ private Fluency.Config configureFluencyConfig(OutForwardTask t)
93
+ {
94
+ Fluency.Config c = new Fluency.Config();
95
+ if (t.getMaxBufferSize().isPresent()) {
96
+ c.setMaxBufferSize(t.getMaxBufferSize().get());
97
+ }
98
+ if (t.getBufferChunkInitialSize().isPresent()) {
99
+ c.setBufferChunkInitialSize(t.getBufferChunkInitialSize().get());
100
+ }
101
+ if (t.getBufferChunkRetentionSize().isPresent()) {
102
+ c.setBufferChunkRetentionSize(t.getBufferChunkRetentionSize().get());
103
+ }
104
+ if (t.getFlushIntervalMillis().isPresent()) {
105
+ c.setFlushIntervalMillis(t.getFlushIntervalMillis().get());
106
+ }
107
+ if (t.getSenderMaxRetryCount().isPresent()) {
108
+ c.setSenderMaxRetryCount(t.getSenderMaxRetryCount().get());
109
+ }
110
+ if (t.getAckResponseMode().isPresent()) {
111
+ c.setAckResponseMode(t.getAckResponseMode().get());
112
+ }
113
+ if (t.getFileBackupDir().isPresent()) {
114
+ c.setFileBackupDir(t.getFileBackupDir().get());
115
+ }
116
+ if (t.getWaitUntilBufferFlushed().isPresent()) {
117
+ c.setWaitUntilBufferFlushed(t.getWaitUntilBufferFlushed().get());
118
+ }
119
+ if (t.getWaitUntilFlusherTerminated().isPresent()) {
120
+ c.setWaitUntilFlusherTerminated(t.getWaitUntilFlusherTerminated().get());
121
+ }
122
+ return c;
123
+ }
124
+
125
+ private Fluency newFluency(OutForwardTask t)
126
+ {
127
+ Fluency.Config c = configureFluencyConfig(t);
128
+ try {
129
+ return Fluency.defaultFluency(t.getHost(), t.getPort(), c);
130
+ }
131
+ catch (IOException e) {
132
+ throw new RuntimeException(e);
133
+ }
134
+ }
135
+
136
+ public void emit(String tag, Map<String, Object> message)
137
+ {
138
+ try {
139
+ client.emit(tag, message);
140
+ }
141
+ catch (IOException e) {
142
+ throw new RuntimeException(e);
143
+ }
144
+ }
145
+
146
+ public void emit(Map<String, Object> message)
147
+ {
148
+ emit(task.getMessageTag(), message);
149
+ }
150
+
151
+ public void finish()
152
+ {
153
+ try {
154
+ client.flush();
155
+ }
156
+ catch (IOException e) {
157
+ throw new RuntimeException(e);
158
+ }
159
+ }
160
+
161
+ public void close()
162
+ {
163
+ try {
164
+ client.close();
165
+ }
166
+ catch (IOException e) {
167
+ throw new RuntimeException(e);
168
+ }
169
+ }
170
+ }
@@ -0,0 +1,63 @@
1
+ package org.embulk.filter.copy.forward;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.ColumnVisitor;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ public class OutForwardVisitor
8
+ implements ColumnVisitor
9
+ {
10
+ private final PageReader reader;
11
+ private final OutForwardEventBuilder builder;
12
+
13
+ public OutForwardVisitor(PageReader reader, OutForwardEventBuilder builder)
14
+ {
15
+ this.reader = reader;
16
+ this.builder = builder;
17
+ }
18
+
19
+ private void nullOr(Column column, Runnable r)
20
+ {
21
+ if (reader.isNull(column)) {
22
+ builder.setNull(column);
23
+ return;
24
+ }
25
+ r.run();
26
+ }
27
+
28
+ @Override
29
+ public void booleanColumn(Column column)
30
+ {
31
+ nullOr(column, () -> builder.setBoolean(column, reader.getBoolean(column)));
32
+ }
33
+
34
+ @Override
35
+ public void longColumn(Column column)
36
+ {
37
+ nullOr(column, () -> builder.setLong(column, reader.getLong(column)));
38
+ }
39
+
40
+ @Override
41
+ public void doubleColumn(Column column)
42
+ {
43
+ nullOr(column, () -> builder.setDouble(column, reader.getDouble(column)));
44
+ }
45
+
46
+ @Override
47
+ public void stringColumn(Column column)
48
+ {
49
+ nullOr(column, () -> builder.setString(column, reader.getString(column)));
50
+ }
51
+
52
+ @Override
53
+ public void timestampColumn(Column column)
54
+ {
55
+ nullOr(column, () -> builder.setTimestamp(column, reader.getTimestamp(column)));
56
+ }
57
+
58
+ @Override
59
+ public void jsonColumn(Column column)
60
+ {
61
+ nullOr(column, () -> builder.setJson(column, reader.getJson(column)));
62
+ }
63
+ }
@@ -0,0 +1,111 @@
1
+ package org.embulk.filter.copy.plugin;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDiff;
5
+ import org.embulk.config.ConfigInject;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.Task;
8
+ import org.embulk.config.TaskReport;
9
+ import org.embulk.config.TaskSource;
10
+ import org.embulk.filter.copy.forward.InForwardEventReader;
11
+ import org.embulk.filter.copy.forward.InForwardService;
12
+ import org.embulk.filter.copy.forward.InForwardVisitor;
13
+ import org.embulk.spi.BufferAllocator;
14
+ import org.embulk.spi.Exec;
15
+ import org.embulk.spi.InputPlugin;
16
+ import org.embulk.spi.PageBuilder;
17
+ import org.embulk.spi.PageOutput;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.SchemaConfig;
20
+ import org.embulk.spi.time.TimestampParser;
21
+ import org.slf4j.Logger;
22
+
23
+ import java.util.List;
24
+
25
+ public class InternalForwardInputPlugin
26
+ implements InputPlugin
27
+ {
28
+ public final static String PLUGIN_NAME = "internal_forward";
29
+ private final static Logger logger = Exec.getLogger(InternalForwardInputPlugin.class);
30
+
31
+ public interface PluginTask
32
+ extends Task, TimestampParser.Task, InForwardService.Task
33
+ {
34
+ @Config("columns")
35
+ SchemaConfig getColumns();
36
+
37
+ @ConfigInject
38
+ BufferAllocator getBufferAllocator();
39
+ }
40
+
41
+ @Override
42
+ public ConfigDiff transaction(ConfigSource config,
43
+ InputPlugin.Control control)
44
+ {
45
+ PluginTask task = config.loadConfig(PluginTask.class);
46
+
47
+ Schema schema = task.getColumns().toSchema();
48
+ int taskCount = 1; // number of run() method calls
49
+
50
+ return resume(task.dump(), schema, taskCount, control);
51
+ }
52
+
53
+ @Override
54
+ public ConfigDiff resume(TaskSource taskSource,
55
+ Schema schema, int taskCount,
56
+ InputPlugin.Control control)
57
+ {
58
+ control.run(taskSource, schema, taskCount);
59
+ return Exec.newConfigDiff();
60
+ }
61
+
62
+ @Override
63
+ public void cleanup(TaskSource taskSource,
64
+ Schema schema, int taskCount,
65
+ List<TaskReport> successTaskReports)
66
+ {
67
+ }
68
+
69
+ @Override
70
+ public TaskReport run(TaskSource taskSource,
71
+ Schema schema, int taskIndex,
72
+ PageOutput output)
73
+ {
74
+ PluginTask task = taskSource.loadTask(PluginTask.class);
75
+
76
+ try (PageBuilder pageBuilder = new PageBuilder(task.getBufferAllocator(), schema, output)) {
77
+ TimestampParser timestampParser = new TimestampParser(
78
+ task.getJRuby(),
79
+ task.getDefaultTimestampFormat(),
80
+ task.getDefaultTimeZone());
81
+ InForwardEventReader eventReader = new InForwardEventReader(schema, timestampParser);
82
+ InForwardVisitor inForwardVisitor = new InForwardVisitor(eventReader, pageBuilder);
83
+
84
+ InForwardService.builder()
85
+ .task(task)
86
+ .forEachEventCallback(
87
+ event ->
88
+ {
89
+ // TODO: here is not thread-safe
90
+ eventReader.setEvent(event);
91
+ while (eventReader.nextMessage()) {
92
+ schema.visitColumns(inForwardVisitor);
93
+ pageBuilder.addRecord();
94
+ }
95
+ }
96
+ )
97
+ .build()
98
+ .runUntilShouldShutdown();
99
+
100
+ pageBuilder.finish();
101
+ }
102
+
103
+ return Exec.newTaskReport(); // TODO
104
+ }
105
+
106
+ @Override
107
+ public ConfigDiff guess(ConfigSource config)
108
+ {
109
+ return Exec.newConfigDiff();
110
+ }
111
+ }
@@ -0,0 +1,111 @@
1
+ package org.embulk.filter.copy.service;
2
+
3
+ import com.google.common.util.concurrent.FutureCallback;
4
+ import com.google.common.util.concurrent.Futures;
5
+ import com.google.common.util.concurrent.ListenableFuture;
6
+ import com.google.common.util.concurrent.ListeningExecutorService;
7
+ import com.google.common.util.concurrent.MoreExecutors;
8
+ import com.google.inject.Injector;
9
+ import org.embulk.EmbulkEmbed;
10
+ import org.embulk.config.ConfigException;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.exec.ExecutionResult;
13
+ import org.embulk.filter.copy.util.ElapsedTime;
14
+ import org.embulk.guice.LifeCycleInjector;
15
+ import org.embulk.spi.Exec;
16
+ import org.slf4j.Logger;
17
+
18
+ import javax.annotation.Nullable;
19
+
20
+ import java.lang.reflect.Constructor;
21
+ import java.lang.reflect.InvocationTargetException;
22
+ import java.util.concurrent.Callable;
23
+ import java.util.concurrent.Executors;
24
+
25
+ public class EmbulkExecutorService
26
+ {
27
+ private final static String THREAD_NAME = "embulk executor service";
28
+ private static final int NUM_THREADS = 1;
29
+ private final static Logger logger = Exec.getLogger(EmbulkExecutorService.class);
30
+ private final Injector injector;
31
+ private final ListeningExecutorService es;
32
+ private ListenableFuture<ExecutionResult> future;
33
+
34
+ public EmbulkExecutorService(Injector injector)
35
+ {
36
+ this.injector = injector;
37
+ this.es = MoreExecutors.listeningDecorator(
38
+ Executors.newFixedThreadPool(
39
+ NUM_THREADS,
40
+ r -> new Thread(r, THREAD_NAME)
41
+ ));
42
+ }
43
+
44
+ public void executeAsync(ConfigSource config)
45
+ {
46
+ logger.debug("execute with this config: {}", config);
47
+ if (future != null) {
48
+ throw new IllegalStateException("executeAsync is already called.");
49
+ }
50
+ future = es.submit(embulkRun(config));
51
+ Futures.addCallback(future, resultFutureCallback());
52
+ }
53
+
54
+ public void shutdown()
55
+ {
56
+ ElapsedTime.info(
57
+ logger,
58
+ "embulk executor service shutdown",
59
+ es::shutdown);
60
+ }
61
+
62
+ public void waitExecutionFinished()
63
+ {
64
+ if (future == null) {
65
+ throw new NullPointerException();
66
+ }
67
+
68
+ ElapsedTime.debugUntil(() -> future.isDone() || future.isCancelled(),
69
+ logger, "embulk executor", 3000L);
70
+ }
71
+
72
+ private Callable<ExecutionResult> embulkRun(ConfigSource config)
73
+ {
74
+ return () -> newEmbulkEmbed(injector).run(config);
75
+ }
76
+
77
+ private EmbulkEmbed newEmbulkEmbed(Injector injector)
78
+ {
79
+ try {
80
+ Constructor<EmbulkEmbed> constructor = EmbulkEmbed.class
81
+ .getDeclaredConstructor(ConfigSource.class, LifeCycleInjector.class);
82
+ constructor.setAccessible(true);
83
+ return constructor.newInstance(Exec.newConfigSource(), injector);
84
+ }
85
+ catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) {
86
+ throw new ConfigException(e);
87
+ }
88
+ }
89
+
90
+ private FutureCallback<ExecutionResult> resultFutureCallback()
91
+ {
92
+ return new FutureCallback<ExecutionResult>()
93
+ {
94
+ @Override
95
+ public void onSuccess(@Nullable ExecutionResult result)
96
+ {
97
+ for (Throwable throwable : result.getIgnoredExceptions()) {
98
+ logger.warn("Ignored error ", throwable);
99
+ }
100
+ logger.info("Config diff: {}", result.getConfigDiff());
101
+ logger.debug("ExecutionResult: {}", result);
102
+ }
103
+
104
+ @Override
105
+ public void onFailure(Throwable t)
106
+ {
107
+ throw new RuntimeException(t);
108
+ }
109
+ };
110
+ }
111
+ }