embulk-filter-copy 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/CHANGELOG.md +3 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +67 -0
  6. data/build.gradle +104 -0
  7. data/config/checkstyle/checkstyle.xml +128 -0
  8. data/config/checkstyle/default.xml +108 -0
  9. data/example/config.yml +28 -0
  10. data/example/data.tsv +5 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  13. data/gradlew +169 -0
  14. data/gradlew.bat +84 -0
  15. data/lib/embulk/filter/copy.rb +8 -0
  16. data/settings.gradle +1 -0
  17. data/src/main/java/org/embulk/filter/copy/CopyFilterPlugin.java +149 -0
  18. data/src/main/java/org/embulk/filter/copy/forward/ForwardBaseTask.java +17 -0
  19. data/src/main/java/org/embulk/filter/copy/forward/InForwardEventReader.java +147 -0
  20. data/src/main/java/org/embulk/filter/copy/forward/InForwardService.java +187 -0
  21. data/src/main/java/org/embulk/filter/copy/forward/InForwardVisitor.java +63 -0
  22. data/src/main/java/org/embulk/filter/copy/forward/OutForwardEventBuilder.java +135 -0
  23. data/src/main/java/org/embulk/filter/copy/forward/OutForwardService.java +170 -0
  24. data/src/main/java/org/embulk/filter/copy/forward/OutForwardVisitor.java +63 -0
  25. data/src/main/java/org/embulk/filter/copy/plugin/InternalForwardInputPlugin.java +111 -0
  26. data/src/main/java/org/embulk/filter/copy/service/EmbulkExecutorService.java +111 -0
  27. data/src/main/java/org/embulk/filter/copy/service/StandardColumnVisitor.java +64 -0
  28. data/src/main/java/org/embulk/filter/copy/util/ElapsedTime.java +165 -0
  29. data/src/test/java/org/embulk/filter/copy/TestCopyFilterPlugin.java +5 -0
  30. data/src/test/java/org/embulk/filter/copy/plugin/TestInternalForwardInputPlugin.java +5 -0
  31. metadata +111 -0
@@ -0,0 +1,170 @@
1
+ package org.embulk.filter.copy.forward;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.collect.Maps;
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.spi.Exec;
8
+ import org.komamitsu.fluency.Fluency;
9
+ import org.slf4j.Logger;
10
+
11
+ import java.io.IOException;
12
+ import java.util.Map;
13
+
14
+ public class OutForwardService
15
+ {
16
+ private final static Logger logger = Exec.getLogger(OutForwardService.class);
17
+
18
+ public interface OutForwardTask
19
+ extends org.embulk.config.Task
20
+ {
21
+ @Config("host")
22
+ @ConfigDefault("\"localhost\"")
23
+ String getHost();
24
+
25
+ @Config("port")
26
+ @ConfigDefault("24224")
27
+ int getPort();
28
+
29
+ @Config("max_buffer_size")
30
+ @ConfigDefault("null")
31
+ Optional<Long> getMaxBufferSize();
32
+
33
+ @Config("buffer_chunk_initial_size")
34
+ @ConfigDefault("null")
35
+ Optional<Integer> getBufferChunkInitialSize();
36
+
37
+ @Config("buffer_chunk_retention_size")
38
+ @ConfigDefault("null")
39
+ Optional<Integer> getBufferChunkRetentionSize();
40
+
41
+ @Config("flush_interval_millis")
42
+ @ConfigDefault("null")
43
+ Optional<Integer> getFlushIntervalMillis();
44
+
45
+ @Config("sender_max_retry_count")
46
+ @ConfigDefault("null")
47
+ Optional<Integer> getSenderMaxRetryCount();
48
+
49
+ @Config("ack_response_mode")
50
+ @ConfigDefault("null")
51
+ Optional<Boolean> getAckResponseMode();
52
+
53
+ @Config("file_backup_dir")
54
+ @ConfigDefault("null")
55
+ Optional<String> getFileBackupDir();
56
+
57
+ @Config("wait_until_buffer_flushed")
58
+ @ConfigDefault("null")
59
+ Optional<Integer> getWaitUntilBufferFlushed();
60
+
61
+ @Config("wait_until_flusher_terminated")
62
+ @ConfigDefault("null")
63
+ Optional<Integer> getWaitUntilFlusherTerminated();
64
+ }
65
+
66
+ public interface Task
67
+ extends ForwardBaseTask
68
+ {
69
+ @Config("out_forward")
70
+ @ConfigDefault("{}")
71
+ OutForwardTask getOutForwardTask();
72
+ }
73
+
74
+ public static void sendShutdownMessage(Task task)
75
+ {
76
+ logger.info("out_forward: send shutdown message.");
77
+ OutForwardService outForward = new OutForwardService(task);
78
+ outForward.emit(task.getShutdownTag(), Maps.newHashMap());
79
+ outForward.finish();
80
+ outForward.close();
81
+ }
82
+
83
+ private final Task task;
84
+ private final Fluency client;
85
+
86
+ public OutForwardService(Task task)
87
+ {
88
+ this.task = task;
89
+ this.client = newFluency(task.getOutForwardTask());
90
+ }
91
+
92
+ private Fluency.Config configureFluencyConfig(OutForwardTask t)
93
+ {
94
+ Fluency.Config c = new Fluency.Config();
95
+ if (t.getMaxBufferSize().isPresent()) {
96
+ c.setMaxBufferSize(t.getMaxBufferSize().get());
97
+ }
98
+ if (t.getBufferChunkInitialSize().isPresent()) {
99
+ c.setBufferChunkInitialSize(t.getBufferChunkInitialSize().get());
100
+ }
101
+ if (t.getBufferChunkRetentionSize().isPresent()) {
102
+ c.setBufferChunkRetentionSize(t.getBufferChunkRetentionSize().get());
103
+ }
104
+ if (t.getFlushIntervalMillis().isPresent()) {
105
+ c.setFlushIntervalMillis(t.getFlushIntervalMillis().get());
106
+ }
107
+ if (t.getSenderMaxRetryCount().isPresent()) {
108
+ c.setSenderMaxRetryCount(t.getSenderMaxRetryCount().get());
109
+ }
110
+ if (t.getAckResponseMode().isPresent()) {
111
+ c.setAckResponseMode(t.getAckResponseMode().get());
112
+ }
113
+ if (t.getFileBackupDir().isPresent()) {
114
+ c.setFileBackupDir(t.getFileBackupDir().get());
115
+ }
116
+ if (t.getWaitUntilBufferFlushed().isPresent()) {
117
+ c.setWaitUntilBufferFlushed(t.getWaitUntilBufferFlushed().get());
118
+ }
119
+ if (t.getWaitUntilFlusherTerminated().isPresent()) {
120
+ c.setWaitUntilFlusherTerminated(t.getWaitUntilFlusherTerminated().get());
121
+ }
122
+ return c;
123
+ }
124
+
125
+ private Fluency newFluency(OutForwardTask t)
126
+ {
127
+ Fluency.Config c = configureFluencyConfig(t);
128
+ try {
129
+ return Fluency.defaultFluency(t.getHost(), t.getPort(), c);
130
+ }
131
+ catch (IOException e) {
132
+ throw new RuntimeException(e);
133
+ }
134
+ }
135
+
136
+ public void emit(String tag, Map<String, Object> message)
137
+ {
138
+ try {
139
+ client.emit(tag, message);
140
+ }
141
+ catch (IOException e) {
142
+ throw new RuntimeException(e);
143
+ }
144
+ }
145
+
146
+ public void emit(Map<String, Object> message)
147
+ {
148
+ emit(task.getMessageTag(), message);
149
+ }
150
+
151
+ public void finish()
152
+ {
153
+ try {
154
+ client.flush();
155
+ }
156
+ catch (IOException e) {
157
+ throw new RuntimeException(e);
158
+ }
159
+ }
160
+
161
+ public void close()
162
+ {
163
+ try {
164
+ client.close();
165
+ }
166
+ catch (IOException e) {
167
+ throw new RuntimeException(e);
168
+ }
169
+ }
170
+ }
@@ -0,0 +1,63 @@
1
+ package org.embulk.filter.copy.forward;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.ColumnVisitor;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ public class OutForwardVisitor
8
+ implements ColumnVisitor
9
+ {
10
+ private final PageReader reader;
11
+ private final OutForwardEventBuilder builder;
12
+
13
+ public OutForwardVisitor(PageReader reader, OutForwardEventBuilder builder)
14
+ {
15
+ this.reader = reader;
16
+ this.builder = builder;
17
+ }
18
+
19
+ private void nullOr(Column column, Runnable r)
20
+ {
21
+ if (reader.isNull(column)) {
22
+ builder.setNull(column);
23
+ return;
24
+ }
25
+ r.run();
26
+ }
27
+
28
+ @Override
29
+ public void booleanColumn(Column column)
30
+ {
31
+ nullOr(column, () -> builder.setBoolean(column, reader.getBoolean(column)));
32
+ }
33
+
34
+ @Override
35
+ public void longColumn(Column column)
36
+ {
37
+ nullOr(column, () -> builder.setLong(column, reader.getLong(column)));
38
+ }
39
+
40
+ @Override
41
+ public void doubleColumn(Column column)
42
+ {
43
+ nullOr(column, () -> builder.setDouble(column, reader.getDouble(column)));
44
+ }
45
+
46
+ @Override
47
+ public void stringColumn(Column column)
48
+ {
49
+ nullOr(column, () -> builder.setString(column, reader.getString(column)));
50
+ }
51
+
52
+ @Override
53
+ public void timestampColumn(Column column)
54
+ {
55
+ nullOr(column, () -> builder.setTimestamp(column, reader.getTimestamp(column)));
56
+ }
57
+
58
+ @Override
59
+ public void jsonColumn(Column column)
60
+ {
61
+ nullOr(column, () -> builder.setJson(column, reader.getJson(column)));
62
+ }
63
+ }
@@ -0,0 +1,111 @@
1
+ package org.embulk.filter.copy.plugin;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDiff;
5
+ import org.embulk.config.ConfigInject;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.Task;
8
+ import org.embulk.config.TaskReport;
9
+ import org.embulk.config.TaskSource;
10
+ import org.embulk.filter.copy.forward.InForwardEventReader;
11
+ import org.embulk.filter.copy.forward.InForwardService;
12
+ import org.embulk.filter.copy.forward.InForwardVisitor;
13
+ import org.embulk.spi.BufferAllocator;
14
+ import org.embulk.spi.Exec;
15
+ import org.embulk.spi.InputPlugin;
16
+ import org.embulk.spi.PageBuilder;
17
+ import org.embulk.spi.PageOutput;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.SchemaConfig;
20
+ import org.embulk.spi.time.TimestampParser;
21
+ import org.slf4j.Logger;
22
+
23
+ import java.util.List;
24
+
25
+ public class InternalForwardInputPlugin
26
+ implements InputPlugin
27
+ {
28
+ public final static String PLUGIN_NAME = "internal_forward";
29
+ private final static Logger logger = Exec.getLogger(InternalForwardInputPlugin.class);
30
+
31
+ public interface PluginTask
32
+ extends Task, TimestampParser.Task, InForwardService.Task
33
+ {
34
+ @Config("columns")
35
+ SchemaConfig getColumns();
36
+
37
+ @ConfigInject
38
+ BufferAllocator getBufferAllocator();
39
+ }
40
+
41
+ @Override
42
+ public ConfigDiff transaction(ConfigSource config,
43
+ InputPlugin.Control control)
44
+ {
45
+ PluginTask task = config.loadConfig(PluginTask.class);
46
+
47
+ Schema schema = task.getColumns().toSchema();
48
+ int taskCount = 1; // number of run() method calls
49
+
50
+ return resume(task.dump(), schema, taskCount, control);
51
+ }
52
+
53
+ @Override
54
+ public ConfigDiff resume(TaskSource taskSource,
55
+ Schema schema, int taskCount,
56
+ InputPlugin.Control control)
57
+ {
58
+ control.run(taskSource, schema, taskCount);
59
+ return Exec.newConfigDiff();
60
+ }
61
+
62
+ @Override
63
+ public void cleanup(TaskSource taskSource,
64
+ Schema schema, int taskCount,
65
+ List<TaskReport> successTaskReports)
66
+ {
67
+ }
68
+
69
+ @Override
70
+ public TaskReport run(TaskSource taskSource,
71
+ Schema schema, int taskIndex,
72
+ PageOutput output)
73
+ {
74
+ PluginTask task = taskSource.loadTask(PluginTask.class);
75
+
76
+ try (PageBuilder pageBuilder = new PageBuilder(task.getBufferAllocator(), schema, output)) {
77
+ TimestampParser timestampParser = new TimestampParser(
78
+ task.getJRuby(),
79
+ task.getDefaultTimestampFormat(),
80
+ task.getDefaultTimeZone());
81
+ InForwardEventReader eventReader = new InForwardEventReader(schema, timestampParser);
82
+ InForwardVisitor inForwardVisitor = new InForwardVisitor(eventReader, pageBuilder);
83
+
84
+ InForwardService.builder()
85
+ .task(task)
86
+ .forEachEventCallback(
87
+ event ->
88
+ {
89
+ // TODO: here is not thread-safe
90
+ eventReader.setEvent(event);
91
+ while (eventReader.nextMessage()) {
92
+ schema.visitColumns(inForwardVisitor);
93
+ pageBuilder.addRecord();
94
+ }
95
+ }
96
+ )
97
+ .build()
98
+ .runUntilShouldShutdown();
99
+
100
+ pageBuilder.finish();
101
+ }
102
+
103
+ return Exec.newTaskReport(); // TODO
104
+ }
105
+
106
+ @Override
107
+ public ConfigDiff guess(ConfigSource config)
108
+ {
109
+ return Exec.newConfigDiff();
110
+ }
111
+ }
@@ -0,0 +1,111 @@
1
+ package org.embulk.filter.copy.service;
2
+
3
+ import com.google.common.util.concurrent.FutureCallback;
4
+ import com.google.common.util.concurrent.Futures;
5
+ import com.google.common.util.concurrent.ListenableFuture;
6
+ import com.google.common.util.concurrent.ListeningExecutorService;
7
+ import com.google.common.util.concurrent.MoreExecutors;
8
+ import com.google.inject.Injector;
9
+ import org.embulk.EmbulkEmbed;
10
+ import org.embulk.config.ConfigException;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.exec.ExecutionResult;
13
+ import org.embulk.filter.copy.util.ElapsedTime;
14
+ import org.embulk.guice.LifeCycleInjector;
15
+ import org.embulk.spi.Exec;
16
+ import org.slf4j.Logger;
17
+
18
+ import javax.annotation.Nullable;
19
+
20
+ import java.lang.reflect.Constructor;
21
+ import java.lang.reflect.InvocationTargetException;
22
+ import java.util.concurrent.Callable;
23
+ import java.util.concurrent.Executors;
24
+
25
+ public class EmbulkExecutorService
26
+ {
27
+ private final static String THREAD_NAME = "embulk executor service";
28
+ private static final int NUM_THREADS = 1;
29
+ private final static Logger logger = Exec.getLogger(EmbulkExecutorService.class);
30
+ private final Injector injector;
31
+ private final ListeningExecutorService es;
32
+ private ListenableFuture<ExecutionResult> future;
33
+
34
+ public EmbulkExecutorService(Injector injector)
35
+ {
36
+ this.injector = injector;
37
+ this.es = MoreExecutors.listeningDecorator(
38
+ Executors.newFixedThreadPool(
39
+ NUM_THREADS,
40
+ r -> new Thread(r, THREAD_NAME)
41
+ ));
42
+ }
43
+
44
+ public void executeAsync(ConfigSource config)
45
+ {
46
+ logger.debug("execute with this config: {}", config);
47
+ if (future != null) {
48
+ throw new IllegalStateException("executeAsync is already called.");
49
+ }
50
+ future = es.submit(embulkRun(config));
51
+ Futures.addCallback(future, resultFutureCallback());
52
+ }
53
+
54
+ public void shutdown()
55
+ {
56
+ ElapsedTime.info(
57
+ logger,
58
+ "embulk executor service shutdown",
59
+ es::shutdown);
60
+ }
61
+
62
+ public void waitExecutionFinished()
63
+ {
64
+ if (future == null) {
65
+ throw new NullPointerException();
66
+ }
67
+
68
+ ElapsedTime.debugUntil(() -> future.isDone() || future.isCancelled(),
69
+ logger, "embulk executor", 3000L);
70
+ }
71
+
72
+ private Callable<ExecutionResult> embulkRun(ConfigSource config)
73
+ {
74
+ return () -> newEmbulkEmbed(injector).run(config);
75
+ }
76
+
77
+ private EmbulkEmbed newEmbulkEmbed(Injector injector)
78
+ {
79
+ try {
80
+ Constructor<EmbulkEmbed> constructor = EmbulkEmbed.class
81
+ .getDeclaredConstructor(ConfigSource.class, LifeCycleInjector.class);
82
+ constructor.setAccessible(true);
83
+ return constructor.newInstance(Exec.newConfigSource(), injector);
84
+ }
85
+ catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) {
86
+ throw new ConfigException(e);
87
+ }
88
+ }
89
+
90
+ private FutureCallback<ExecutionResult> resultFutureCallback()
91
+ {
92
+ return new FutureCallback<ExecutionResult>()
93
+ {
94
+ @Override
95
+ public void onSuccess(@Nullable ExecutionResult result)
96
+ {
97
+ for (Throwable throwable : result.getIgnoredExceptions()) {
98
+ logger.warn("Ignored error ", throwable);
99
+ }
100
+ logger.info("Config diff: {}", result.getConfigDiff());
101
+ logger.debug("ExecutionResult: {}", result);
102
+ }
103
+
104
+ @Override
105
+ public void onFailure(Throwable t)
106
+ {
107
+ throw new RuntimeException(t);
108
+ }
109
+ };
110
+ }
111
+ }