embulk-filter-speedometer 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "speedometer", "org.embulk.filter.SpeedometerFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,289 @@
1
+ package org.embulk.filter;
2
+
3
+ import javax.validation.constraints.Min;
4
+
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.config.ConfigInject;
8
+ import org.embulk.config.ConfigSource;
9
+ import org.embulk.config.Task;
10
+ import org.embulk.config.TaskSource;
11
+ import org.embulk.spi.BufferAllocator;
12
+ import org.embulk.spi.Column;
13
+ import org.embulk.spi.ColumnVisitor;
14
+ import org.embulk.spi.FilterPlugin;
15
+ import org.embulk.spi.Page;
16
+ import org.embulk.spi.PageBuilder;
17
+ import org.embulk.spi.PageOutput;
18
+ import org.embulk.spi.PageReader;
19
+ import org.embulk.spi.Schema;
20
+ import org.embulk.spi.time.Timestamp;
21
+ import org.embulk.spi.time.TimestampFormatter;
22
+ import org.embulk.spi.type.TimestampType;
23
+
24
+ import com.google.common.collect.ImmutableMap;
25
+
26
+ public class SpeedometerFilterPlugin
27
+ implements FilterPlugin
28
+ {
29
+ private static final int TRUE_LENGTH = Boolean.toString(true).length();
30
+ private static final int FALSE_LENGTH = Boolean.toString(false).length();
31
+
32
+ public interface PluginTask
33
+ extends Task, TimestampFormatter.FormatterTask
34
+ {
35
+ @Config("speed_limit")
36
+ @ConfigDefault("0")
37
+ @Min(0)
38
+ public long getSpeedLimit();
39
+
40
+ @Config("max_sleep_millisec")
41
+ @ConfigDefault("1000")
42
+ @Min(0)
43
+ public int getMaxSleepMillisec();
44
+
45
+ @Config("delimiter")
46
+ @ConfigDefault("\",\"")
47
+ public String getDelimiter();
48
+
49
+ @Config("record_padding_size")
50
+ @ConfigDefault("1")
51
+ public int getRecordPaddingSize();
52
+
53
+ @Config("log_interval_seconds")
54
+ @ConfigDefault("10")
55
+ @Min(0)
56
+ public int getLogIntervalSeconds();
57
+
58
+ @ConfigInject
59
+ public BufferAllocator getBufferAllocator();
60
+ }
61
+
62
+ @Override
63
+ public void transaction(ConfigSource config, Schema inputSchema,
64
+ FilterPlugin.Control control)
65
+ {
66
+ PluginTask task = config.loadConfig(PluginTask.class);
67
+ Schema outputSchema = inputSchema;
68
+ control.run(task.dump(), outputSchema);
69
+ }
70
+
71
+ @Override
72
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
73
+ Schema outputSchema, PageOutput output)
74
+ {
75
+ PluginTask task = taskSource.loadTask(PluginTask.class);
76
+
77
+ return new SpeedControlPageOutput(task, inputSchema, output);
78
+ }
79
+
80
+ static class SpeedControlPageOutput implements PageOutput {
81
+ private final SpeedometerSpeedController controller;
82
+ private final Schema schema;
83
+ private final ImmutableMap<Column, TimestampFormatter> timestampMap;
84
+ private final PageOutput pageOutput;
85
+ private final PageOutput addOnlyPageOutput;
86
+ private final PageReader pageReader;
87
+ private final BufferAllocator allocator;
88
+ private final int delimiterLength;
89
+ private final int recordPaddingSize;
90
+
91
+ SpeedControlPageOutput(PluginTask task, Schema schema, PageOutput pageOutput) {
92
+ this.controller = new SpeedometerSpeedController(task, SpeedometerSpeedAggregator.getInstance());
93
+ this.schema = schema;
94
+ this.pageOutput = pageOutput;
95
+ this.addOnlyPageOutput = new AddOnlyPageOutput(pageOutput);
96
+ this.allocator = task.getBufferAllocator();
97
+ this.delimiterLength = task.getDelimiter().length();
98
+ this.recordPaddingSize = task.getRecordPaddingSize();
99
+ pageReader = new PageReader(schema);
100
+ timestampMap = buildTimestampFormatterMap(task, schema);
101
+ }
102
+
103
+ @Override
104
+ public void add(Page page) {
105
+ try (final PageBuilder pageBuilder = new PageBuilder(allocator, schema, addOnlyPageOutput)) {
106
+ ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
107
+ pageReader.setPage(page);
108
+ while (pageReader.nextRecord()) {
109
+ visitor.speedMonitorStartRecord();
110
+ schema.visitColumns(visitor);
111
+ visitor.speedMonitorEndRecord();
112
+ pageBuilder.addRecord();
113
+ }
114
+ pageBuilder.finish();
115
+ }
116
+ }
117
+
118
+ @Override
119
+ public void finish() {
120
+ pageOutput.finish();
121
+ }
122
+
123
+ @Override
124
+ public void close() {
125
+ controller.stop();
126
+ pageReader.close();
127
+ pageOutput.close();
128
+ }
129
+
130
+ private ImmutableMap<Column, TimestampFormatter> buildTimestampFormatterMap(final PluginTask task, Schema schema) {
131
+ final ImmutableMap.Builder<Column, TimestampFormatter> builder = new ImmutableMap.Builder<>();
132
+
133
+ schema.visitColumns(new ColumnVisitor() {
134
+ @Override
135
+ public void booleanColumn(Column column) { }
136
+
137
+ @Override
138
+ public void longColumn(Column column) { }
139
+
140
+ @Override
141
+ public void doubleColumn(Column column) { }
142
+
143
+ @Override
144
+ public void stringColumn(Column column) { }
145
+
146
+ @Override
147
+ public void timestampColumn(Column column) {
148
+ if (column.getType() instanceof TimestampType) {
149
+ TimestampType tt = (TimestampType) column.getType();
150
+ builder.put(column, new TimestampFormatter(tt.getFormat(), task));
151
+ } else {
152
+ throw new RuntimeException("Timestamp should be TimestampType.");
153
+ }
154
+ }
155
+ });
156
+
157
+ return builder.build();
158
+ }
159
+
160
+ // Ignore finish and close to avoid closing upper output stream.
161
+ static class AddOnlyPageOutput implements PageOutput {
162
+ protected final PageOutput output;
163
+
164
+ public AddOnlyPageOutput(PageOutput outptut) {
165
+ this.output = outptut;
166
+ }
167
+
168
+ @Override
169
+ public void add(Page page) {
170
+ output.add(page);
171
+ }
172
+
173
+ @Override
174
+ public void finish() { }
175
+
176
+ @Override
177
+ public void close() { }
178
+ }
179
+
180
+ class ColumnVisitorImpl implements ColumnVisitor {
181
+ private final PageBuilder pageBuilder;
182
+ private long startRecordTime;
183
+
184
+ ColumnVisitorImpl(PageBuilder pageBuilder) {
185
+ this.pageBuilder = pageBuilder;
186
+ }
187
+
188
+ @Override
189
+ public void booleanColumn(Column column) {
190
+ if (pageReader.isNull(column)) {
191
+ speedMonitor(column);
192
+ pageBuilder.setNull(column);
193
+ } else {
194
+ pageBuilder.setBoolean(column, speedMonitor(column, pageReader.getBoolean(column)));
195
+ }
196
+ }
197
+
198
+ @Override
199
+ public void longColumn(Column column) {
200
+ if (pageReader.isNull(column)) {
201
+ speedMonitor(column);
202
+ pageBuilder.setNull(column);
203
+ } else {
204
+ pageBuilder.setLong(column, speedMonitor(column, pageReader.getLong(column)));
205
+ }
206
+ }
207
+
208
+ @Override
209
+ public void doubleColumn(Column column) {
210
+ if (pageReader.isNull(column)) {
211
+ speedMonitor(column);
212
+ pageBuilder.setNull(column);
213
+ } else {
214
+ pageBuilder.setDouble(column, speedMonitor(column, pageReader.getDouble(column)));
215
+ }
216
+ }
217
+
218
+ @Override
219
+ public void stringColumn(Column column) {
220
+ if (pageReader.isNull(column)) {
221
+ speedMonitor(column);
222
+ pageBuilder.setNull(column);
223
+ } else {
224
+ pageBuilder.setString(column, speedMonitor(column, pageReader.getString(column)));
225
+ }
226
+ }
227
+
228
+ @Override
229
+ public void timestampColumn(Column column) {
230
+ if (pageReader.isNull(column)) {
231
+ speedMonitor(column);
232
+ pageBuilder.setNull(column);
233
+ } else {
234
+ pageBuilder.setTimestamp(column, speedMonitor(column, pageReader.getTimestamp(column)));
235
+ }
236
+ }
237
+
238
+ private void speedMonitorStartRecord() {
239
+ startRecordTime = System.currentTimeMillis();
240
+ }
241
+
242
+ private void speedMonitorEndRecord() {
243
+ controller.checkSpeedLimit(startRecordTime, recordPaddingSize);
244
+ }
245
+
246
+ // For null column
247
+ private void speedMonitor(Column column) {
248
+ speedMonitorForDelimiter(column);
249
+ }
250
+
251
+ private boolean speedMonitor(Column column, boolean b) {
252
+ speedMonitorForDelimiter(column);
253
+ controller.checkSpeedLimit(startRecordTime, b ? TRUE_LENGTH : FALSE_LENGTH);
254
+ return b;
255
+ }
256
+
257
+ private long speedMonitor(Column column, long l) {
258
+ speedMonitorForDelimiter(column);
259
+ controller.checkSpeedLimit(startRecordTime, String.valueOf(l).length());
260
+ return l;
261
+ }
262
+
263
+ private double speedMonitor(Column column, double d) {
264
+ speedMonitorForDelimiter(column);
265
+ controller.checkSpeedLimit(startRecordTime, String.valueOf(d).length());
266
+ return d;
267
+ }
268
+
269
+ private String speedMonitor(Column column, String s) {
270
+ speedMonitorForDelimiter(column);
271
+ controller.checkSpeedLimit(startRecordTime, s.length());
272
+ return s;
273
+ }
274
+
275
+ private Timestamp speedMonitor(Column column, Timestamp t) {
276
+ speedMonitorForDelimiter(column);
277
+ TimestampFormatter formatter = timestampMap.get(column);
278
+ controller.checkSpeedLimit(startRecordTime, formatter.format(t).length());
279
+ return t;
280
+ }
281
+
282
+ private void speedMonitorForDelimiter(Column column) {
283
+ if (column.getIndex() > 0) {
284
+ controller.checkSpeedLimit(startRecordTime, delimiterLength);
285
+ }
286
+ }
287
+ }
288
+ }
289
+ }
@@ -0,0 +1,141 @@
1
+ package org.embulk.filter;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+ import java.util.concurrent.atomic.AtomicInteger;
6
+ import java.util.concurrent.atomic.AtomicLong;
7
+
8
+ import org.embulk.spi.Exec;
9
+ import org.slf4j.Logger;
10
+
11
+ class SpeedometerSpeedAggregator {
12
+ private static class SpeedometerSpeedAggregatorHolder {
13
+ private static final SpeedometerSpeedAggregator INSTANCE = new SpeedometerSpeedAggregator();
14
+ }
15
+
16
+ private final long INITAL_START_TIME = 0;
17
+
18
+ private final AtomicInteger activeControllerCount = new AtomicInteger(0);
19
+ private final AtomicLong globalStartTime = new AtomicLong(INITAL_START_TIME);
20
+ private final AtomicLong globalTotalBytes = new AtomicLong(0);
21
+ private final AtomicLong previousLogReportTimeMillisec = new AtomicLong(INITAL_START_TIME);
22
+
23
+ // TODO: We can use google's library.
24
+ private final List<SpeedometerSpeedController> controllerList = new ArrayList<>();
25
+
26
+ public static SpeedometerSpeedAggregator getInstance() {
27
+ return SpeedometerSpeedAggregatorHolder.INSTANCE;
28
+ }
29
+
30
+ public SpeedometerSpeedAggregator() {
31
+ showLogMessage(activeControllerCount.get(), 0, 0, 0);
32
+ }
33
+
34
+ public void startController(SpeedometerSpeedController controller, long nowTime) {
35
+ globalStartTime.compareAndSet(INITAL_START_TIME, nowTime);
36
+ activeControllerCount.incrementAndGet();
37
+ synchronized (controllerList) {
38
+ controllerList.add(controller);
39
+ }
40
+ }
41
+
42
+ public void stopController(SpeedometerSpeedController controller) {
43
+ synchronized (controllerList) {
44
+ controllerList.remove(controller);
45
+ }
46
+ long runningCount = activeControllerCount.decrementAndGet();
47
+ globalTotalBytes.addAndGet(controller.getTotalBytes());
48
+
49
+ // NOTE: Sometimes, there is no running thread nevertheless there are remaining tasks.
50
+ // So, this message may be output while running tasks.
51
+ if (runningCount == 0) {
52
+ showOverallMessage();
53
+ }
54
+ }
55
+
56
+ public long getSpeedLimitForController(SpeedometerSpeedController controller) {
57
+ return controller.getSpeedLimit() / activeControllerCount.get();
58
+ }
59
+
60
+ public void checkProgress(long nowTime, int logIntervalMillisec) {
61
+ if (logIntervalMillisec <= 0) {
62
+ return;
63
+ }
64
+
65
+ long previousTime = previousLogReportTimeMillisec.get();
66
+ if (previousTime == INITAL_START_TIME) {
67
+ previousLogReportTimeMillisec.compareAndSet(INITAL_START_TIME, nowTime);
68
+ } else {
69
+ long nowInterval = previousTime + logIntervalMillisec;
70
+ if (nowInterval < nowTime) {
71
+ if (previousLogReportTimeMillisec.compareAndSet(previousTime, nowTime)) {
72
+ showProgressMessage(nowTime);
73
+ renewPeriods();
74
+ }
75
+ }
76
+ }
77
+ }
78
+
79
+ Logger getLogger() {
80
+ return Exec.getLogger(SpeedometerFilterPlugin.class);
81
+ }
82
+
83
+ long getGlobalStartTime() {
84
+ return globalStartTime.get();
85
+ }
86
+
87
+ int getActiveControllerCount() {
88
+ return activeControllerCount.get();
89
+ }
90
+
91
+ long getGlobalTotalBytes() {
92
+ return globalTotalBytes.get();
93
+ }
94
+
95
+ List<SpeedometerSpeedController> getControllerList() {
96
+ List<SpeedometerSpeedController> copyList;
97
+ synchronized (controllerList) {
98
+ copyList = new ArrayList<SpeedometerSpeedController>(controllerList);
99
+ }
100
+ return copyList;
101
+ }
102
+
103
+ private void renewPeriods() {
104
+ for (SpeedometerSpeedController controller : getControllerList()) {
105
+ controller.renewPeriod();
106
+ }
107
+ }
108
+
109
+ private void showProgressMessage(long nowTime) {
110
+ long currentTotalSize = globalTotalBytes.get();
111
+ long currentBytesPerSec = 0;
112
+ for (SpeedometerSpeedController controller : getControllerList()) {
113
+ currentTotalSize += controller.getTotalBytes();
114
+ currentBytesPerSec += controller.getPeriodBytesPerSec(nowTime);
115
+ }
116
+
117
+ long timeDelta = nowTime - globalStartTime.get();
118
+ timeDelta = timeDelta > 0 ? timeDelta : 1;
119
+
120
+ showLogMessage(activeControllerCount.get(), currentTotalSize, timeDelta, currentBytesPerSec);
121
+ }
122
+
123
+ private void showOverallMessage() {
124
+ long timeDelta = System.currentTimeMillis() - globalStartTime.get();
125
+ timeDelta = timeDelta > 0 ? timeDelta : 1;
126
+ long bytesPerSec = (globalTotalBytes.get() * 1000) / timeDelta;
127
+
128
+ showLogMessage(activeControllerCount.get(), globalTotalBytes.get(), timeDelta, bytesPerSec);
129
+ }
130
+
131
+ private void showLogMessage(int activeThreads, long totalBytes, long timeMilliSec, long bytesPerSec) {
132
+ Logger logger = getLogger();
133
+ if (logger != null) {
134
+ logger.info(String.format("{speedometer: {active: %d, total: %s, sec: %s, speed: %s/s}}",
135
+ activeThreads,
136
+ SpeedometerUtil.toNumberText(totalBytes),
137
+ SpeedometerUtil.toTimeText(timeMilliSec),
138
+ SpeedometerUtil.toNumberText(bytesPerSec)));
139
+ }
140
+ }
141
+ }
@@ -0,0 +1,107 @@
1
+ package org.embulk.filter;
2
+
3
+ import org.embulk.filter.SpeedometerFilterPlugin.PluginTask;
4
+
5
+
6
+ class SpeedometerSpeedController {
7
+ private final SpeedometerSpeedAggregator aggregator;
8
+
9
+ private final long limitBytesPerSec;
10
+ private final int maxSleepMillisec;
11
+ private final int logIntervalMillisec;
12
+
13
+ private long startTime;
14
+ private volatile long periodStartTime;
15
+ private volatile long periodTotalBytes;
16
+ private volatile long threadTotalBytes;
17
+ private volatile boolean renewFlag = true;
18
+
19
+ SpeedometerSpeedController(PluginTask task, SpeedometerSpeedAggregator aggregator) {
20
+ this.limitBytesPerSec = task.getSpeedLimit();
21
+ this.maxSleepMillisec = task.getMaxSleepMillisec();
22
+ this.logIntervalMillisec = task.getLogIntervalSeconds() * 1000;
23
+ this.aggregator = aggregator;
24
+ }
25
+
26
+ public void stop() {
27
+ startNewPeriod(0);
28
+ aggregator.stopController(this);
29
+ }
30
+
31
+ public long getSpeedLimit() {
32
+ return limitBytesPerSec;
33
+ }
34
+
35
+ public int getMaxSleepMillisec() {
36
+ return maxSleepMillisec;
37
+ }
38
+
39
+ public int getLogIntervalMillisec() {
40
+ return logIntervalMillisec;
41
+ }
42
+
43
+ public long getTotalBytes() {
44
+ return threadTotalBytes + periodTotalBytes;
45
+ }
46
+
47
+ public long getPeriodBytesPerSec(long nowTime) {
48
+ long timeDeltaMillisec = nowTime - periodStartTime;
49
+ if (timeDeltaMillisec <= 0) {
50
+ timeDeltaMillisec = 1;
51
+ }
52
+ return (periodTotalBytes * 1000) / timeDeltaMillisec;
53
+ }
54
+
55
+ public void checkSpeedLimit(long nowTime, long newDataSize) {
56
+ if (startTime == 0) {
57
+ startTime = nowTime;
58
+ aggregator.startController(this, startTime);
59
+ }
60
+
61
+ if (renewFlag) {
62
+ renewFlag = false;
63
+ startNewPeriod(nowTime);
64
+ }
65
+
66
+ periodTotalBytes += newDataSize;
67
+ aggregator.checkProgress(nowTime, logIntervalMillisec);
68
+
69
+ if (limitBytesPerSec <= 0) {
70
+ return;
71
+ }
72
+
73
+ long speedLimitForThread = aggregator.getSpeedLimitForController(this);
74
+ long timeDeltaMillisec = nowTime > periodStartTime ? nowTime - periodStartTime : 1;
75
+ long bytesPerSec = (periodTotalBytes * 1000) / timeDeltaMillisec;
76
+ long overBytes = bytesPerSec - speedLimitForThread;
77
+
78
+ if (overBytes > 0) {
79
+ try {
80
+ long sleepTime = (periodTotalBytes * 1000) / speedLimitForThread - timeDeltaMillisec;
81
+ sleepTime = sleepTime > maxSleepMillisec ? maxSleepMillisec : sleepTime > 0 ? sleepTime : 0;
82
+ Thread.sleep(sleepTime);
83
+ } catch (InterruptedException e) {
84
+ // TODO: Do I need to throw an exception ?
85
+ }
86
+ }
87
+ }
88
+
89
+ void renewPeriod() {
90
+ renewFlag = true;
91
+ }
92
+
93
+ boolean isRenewPeriodSet() {
94
+ return renewFlag;
95
+ }
96
+
97
+ SpeedometerSpeedAggregator getAggregator() {
98
+ return aggregator;
99
+ }
100
+
101
+ private void startNewPeriod(long newPeriodTime) {
102
+ threadTotalBytes += periodTotalBytes;
103
+ periodTotalBytes = 0;
104
+ periodStartTime = newPeriodTime;
105
+ }
106
+ }
107
+