embulk-filter-calc 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,140 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.config.TaskSource;
8
+ import org.embulk.spi.Column;
9
+ import org.embulk.spi.ColumnConfig;
10
+ import org.embulk.spi.Exec;
11
+ import org.embulk.spi.FilterPlugin;
12
+ import org.embulk.spi.Page;
13
+ import org.embulk.spi.PageBuilder;
14
+ import org.embulk.spi.PageOutput;
15
+ import org.embulk.spi.PageReader;
16
+ import org.embulk.spi.Schema;
17
+ import org.embulk.spi.SchemaConfigException;
18
+ import org.embulk.spi.type.Type;
19
+ import org.embulk.spi.type.Types;
20
+
21
+ import java.util.List;
22
+
23
+ import static java.util.Locale.ENGLISH;
24
+
25
+ public class CalcFilterPlugin
26
+ implements FilterPlugin
27
+ {
28
+
29
+ // private Object IOException;
30
+
31
+ public interface CalcConfig
32
+ extends Task
33
+ {
34
+ @Config("formula")
35
+ String getFormula();
36
+
37
+ @Config("name")
38
+ String getName();
39
+ }
40
+
41
+ public interface PluginTask
42
+ extends Task
43
+ {
44
+
45
+ @Config("columns")
46
+ public List<CalcConfig> getCalcConfig();
47
+
48
+ @Config("output_columns")
49
+ @ConfigDefault("[]")
50
+ public List<ColumnConfig> getOutputColumns();
51
+ }
52
+
53
+ @Override
54
+ public void transaction(ConfigSource config, Schema inputSchema,
55
+ FilterPlugin.Control control)
56
+ {
57
+ PluginTask task = config.loadConfig(PluginTask.class);
58
+
59
+ Schema outputSchema = buildOutputSchema(task, inputSchema);
60
+ for (CalcConfig calcConfig : task.getCalcConfig()) {
61
+ CalcConfigChecker calc = new CalcConfigChecker(calcConfig.getName(), calcConfig.getFormula(), outputSchema);
62
+ calc.validateFormula();
63
+ }
64
+
65
+ control.run(task.dump(), outputSchema);
66
+ }
67
+
68
+ static Schema buildOutputSchema(PluginTask task, Schema inputSchema)
69
+ {
70
+ Schema.Builder builder = Schema.builder();
71
+ for (Column inputColumns : inputSchema.getColumns()) {
72
+ builder.add(inputColumns.getName(), inputColumns.getType());
73
+ }
74
+
75
+ List<ColumnConfig> outputColumns = task.getOutputColumns();
76
+ for (ColumnConfig outputColumn : outputColumns) {
77
+
78
+ String name = outputColumn.getName();
79
+ Type type = outputColumn.getType();
80
+ Column inputColumn;
81
+ try {
82
+ inputColumn = inputSchema.lookupColumn(name);
83
+ }
84
+ catch (SchemaConfigException ex) {
85
+ inputColumn = null;
86
+ }
87
+ if (inputColumn != null) {
88
+ throw new SchemaConfigException(String.format(ENGLISH, "The column \"%s\" already exists.", name));
89
+ }
90
+
91
+ if (Types.DOUBLE.equals(type)) {
92
+ builder.add(name, Types.DOUBLE);
93
+ }
94
+ else if (Types.LONG.equals(type)) {
95
+ builder.add(name, Types.LONG);
96
+ }
97
+ else {
98
+ throw new SchemaConfigException(String.format(ENGLISH, "The column \"%s\" must specify either long or double.", name));
99
+ }
100
+ }
101
+ return builder.build();
102
+ }
103
+
104
+ @Override
105
+ public PageOutput open(TaskSource taskSource, final Schema inputSchema,
106
+ final Schema outputSchema, final PageOutput output)
107
+ {
108
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
109
+
110
+ return new PageOutput()
111
+ {
112
+ private PageReader pageReader = new PageReader(inputSchema);
113
+ private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
114
+ private CalcVisitorImpl visitor = new CalcVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
115
+
116
+ @Override
117
+ public void finish()
118
+ {
119
+ pageBuilder.finish();
120
+ }
121
+
122
+ @Override
123
+ public void close()
124
+ {
125
+ pageBuilder.close();
126
+ }
127
+
128
+ @Override
129
+ public void add(Page page)
130
+ {
131
+ pageReader.setPage(page);
132
+
133
+ while (pageReader.nextRecord()) {
134
+ outputSchema.visitColumns(visitor);
135
+ pageBuilder.addRecord();
136
+ }
137
+ }
138
+ };
139
+ }
140
+ }
@@ -0,0 +1,130 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.embulk.config.ConfigException;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.Page;
6
+ import org.embulk.spi.PageReader;
7
+ import org.embulk.spi.Schema;
8
+ import org.embulk.spi.type.Types;
9
+
10
+ import static java.util.Locale.ENGLISH;
11
+
12
+ public class CalcFormulaVisitor
13
+ extends CalculatorBaseVisitor<Double>
14
+ {
15
+ private PageReader pageReader;
16
+ private Schema inputSchema;
17
+
18
+ public CalcFormulaVisitor(Schema inputSchema, PageReader pageReader)
19
+ {
20
+ this.pageReader = pageReader;
21
+ this.inputSchema = inputSchema;
22
+ }
23
+
24
+ @Override
25
+ public Double visitMulDivMod(CalculatorParser.MulDivModContext ctx)
26
+ {
27
+ Double left = visit(ctx.expr(0));
28
+ Double right = visit(ctx.expr(1));
29
+
30
+ if (left == null || right == null) {
31
+ return null;
32
+ }
33
+ else if (ctx.op.getType() == CalculatorParser.MUL) {
34
+ return left * right;
35
+ }
36
+
37
+ else if (ctx.op.getType() == CalculatorParser.DIV) {
38
+ return left / right;
39
+ }
40
+ else {
41
+ return left % right;
42
+ }
43
+ }
44
+
45
+ @Override
46
+ public Double visitAddSub(CalculatorParser.AddSubContext ctx)
47
+ {
48
+ Double left = visit(ctx.expr(0));
49
+ Double right = visit(ctx.expr(1));
50
+ if (left == null || right == null) {
51
+ return null;
52
+ }
53
+ else if (ctx.op.getType() == CalculatorParser.ADD) {
54
+ return left + right;
55
+ }
56
+ else {
57
+ return left - right;
58
+ }
59
+ }
60
+
61
+ @Override
62
+ public Double visitNumber(CalculatorParser.NumberContext ctx)
63
+ {
64
+ String id = ctx.NUM().getText();
65
+
66
+ return Double.parseDouble(id);
67
+ }
68
+
69
+ @Override
70
+ public Double visitIdentifier(CalculatorParser.IdentifierContext ctx)
71
+ {
72
+ String id = ctx.ID().getText();
73
+ Double val;
74
+ Column column = inputSchema.lookupColumn(id);
75
+
76
+ if (pageReader.isNull(column)) {
77
+ val = null;
78
+ }
79
+ else if (Types.DOUBLE.equals(column.getType())) {
80
+ val = pageReader.getDouble(column);
81
+ }
82
+ else if (Types.LONG.equals(column.getType())) {
83
+ Long v;
84
+ v = pageReader.getLong(column);
85
+ val = v.doubleValue();
86
+ }
87
+ else {
88
+ // throw
89
+ val = null;
90
+ }
91
+ return val;
92
+ }
93
+
94
+ @Override
95
+ public Double visitParen(CalculatorParser.ParenContext ctx)
96
+ {
97
+ return visit(ctx.expr());
98
+ }
99
+
100
+ @Override
101
+ public Double visitPower(CalculatorParser.PowerContext ctx)
102
+ {
103
+ Double left = visit(ctx.expr(0));
104
+ Double right = visit(ctx.expr(1));
105
+ if (left == null || right == null) {
106
+ return null;
107
+ }
108
+
109
+ return Math.pow(left, right);
110
+ }
111
+
112
+ // Scientific Functions
113
+ @Override
114
+ public Double visitFuncCos(CalculatorParser.FuncCosContext ctx)
115
+ {
116
+ return Math.cos(visit(ctx.expr()));
117
+ }
118
+
119
+ @Override
120
+ public Double visitFuncSin(CalculatorParser.FuncSinContext ctx)
121
+ {
122
+ return Math.sin(visit(ctx.expr()));
123
+ }
124
+
125
+ @Override
126
+ public Double visitFuncTan(CalculatorParser.FuncTanContext ctx)
127
+ {
128
+ return Math.tan(visit(ctx.expr()));
129
+ }
130
+ }
@@ -0,0 +1,141 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.ColumnVisitor;
5
+ import org.embulk.spi.Exec;
6
+ import org.embulk.spi.PageBuilder;
7
+ import org.embulk.spi.PageReader;
8
+ import org.embulk.spi.Schema;
9
+ import org.slf4j.Logger;
10
+
11
+ import java.util.HashMap;
12
+
13
+ public class CalcVisitorImpl
14
+ implements ColumnVisitor
15
+ {
16
+
17
+ private static final Logger logger = Exec.getLogger(CalcFilterPlugin.class);
18
+ private final CalcFilterPlugin.PluginTask task;
19
+ private final Schema inputSchema;
20
+ private final Schema outputSchema;
21
+ private final PageReader pageReader;
22
+ private final PageBuilder pageBuilder;
23
+ private final HashMap<String, Calculator> calcMap = new HashMap<>();
24
+
25
+ CalcVisitorImpl(CalcFilterPlugin.PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
26
+ {
27
+ this.task = task;
28
+ this.inputSchema = inputSchema;
29
+ this.outputSchema = outputSchema;
30
+ this.pageReader = pageReader;
31
+ this.pageBuilder = pageBuilder;
32
+ initializeCalcMap();
33
+ }
34
+
35
+ private void initializeCalcMap()
36
+ {
37
+
38
+ for (CalcFilterPlugin.CalcConfig calcConfig : task.getCalcConfig()) {
39
+ String name = calcConfig.getName();
40
+ String formula = calcConfig.getFormula();
41
+ Calculator calc = new Calculator(formula, inputSchema, pageReader);
42
+ calcMap.put(name, calc);
43
+ }
44
+ }
45
+
46
+ @Override
47
+ public void booleanColumn(Column outputColumn)
48
+ {
49
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
50
+ if (pageReader.isNull(inputColumn)) {
51
+ pageBuilder.setNull(outputColumn);
52
+ }
53
+ else {
54
+ pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
55
+ }
56
+ }
57
+
58
+ @Override
59
+ public void longColumn(Column outputColumn)
60
+ {
61
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
62
+ Calculator calc = calcMap.get(outputColumn.getName());
63
+ if (calc == null) {
64
+ if (pageReader.isNull(inputColumn)) {
65
+ pageBuilder.setNull(outputColumn);
66
+ }
67
+ else {
68
+ pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
69
+ }
70
+ }
71
+ else {
72
+ Double val = calc.calc();
73
+ if (val == null) {
74
+ pageBuilder.setNull(outputColumn);
75
+ }
76
+ else {
77
+ pageBuilder.setLong(outputColumn, val.longValue());
78
+ }
79
+ }
80
+ }
81
+
82
+ @Override
83
+ public void doubleColumn(Column outputColumn)
84
+ {
85
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
86
+ Calculator calc = calcMap.get(outputColumn.getName());
87
+ if (calc == null) {
88
+ if (pageReader.isNull(inputColumn)) {
89
+ pageBuilder.setNull(outputColumn);
90
+ }
91
+ else {
92
+ pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
93
+ }
94
+ }
95
+ else {
96
+ Double val = calc.calc();
97
+ if (val == null) {
98
+ pageBuilder.setNull(outputColumn);
99
+ }
100
+ else {
101
+ pageBuilder.setDouble(outputColumn, val);
102
+ }
103
+ }
104
+ }
105
+
106
+ @Override
107
+ public void stringColumn(Column outputColumn)
108
+ {
109
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
110
+ if (pageReader.isNull(inputColumn)) {
111
+ pageBuilder.setNull(outputColumn);
112
+ }
113
+ else {
114
+ pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
115
+ }
116
+ }
117
+
118
+ @Override
119
+ public void jsonColumn(Column outputColumn)
120
+ {
121
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
122
+ if (pageReader.isNull(inputColumn)) {
123
+ pageBuilder.setNull(outputColumn);
124
+ }
125
+ else {
126
+ pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
127
+ }
128
+ }
129
+
130
+ @Override
131
+ public void timestampColumn(Column outputColumn)
132
+ {
133
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
134
+ if (pageReader.isNull(inputColumn)) {
135
+ pageBuilder.setNull(outputColumn);
136
+ }
137
+ else {
138
+ pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
139
+ }
140
+ }
141
+ }
@@ -0,0 +1,30 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.antlr.v4.runtime.ANTLRInputStream;
4
+ import org.antlr.v4.runtime.CommonTokenStream;
5
+ import org.antlr.v4.runtime.tree.ParseTree;
6
+ import org.embulk.spi.PageReader;
7
+ import org.embulk.spi.Schema;
8
+
9
+ public class Calculator
10
+ {
11
+ private ParseTree tree;
12
+ private CalcFormulaVisitor visitor;
13
+
14
+ public Calculator(String formula, Schema inputSchema, PageReader pageReader)
15
+ {
16
+ ANTLRInputStream input = new ANTLRInputStream(formula);
17
+ CalculatorLexer lexer = new CalculatorLexer(input);
18
+ CommonTokenStream tokens = new CommonTokenStream(lexer);
19
+ CalculatorParser parser = new CalculatorParser(tokens);
20
+
21
+ this.tree = parser.expr();
22
+ this.visitor = new CalcFormulaVisitor(inputSchema, pageReader);
23
+ }
24
+
25
+ public Double calc()
26
+ {
27
+ return visitor.visit(tree);
28
+ }
29
+ }
30
+
@@ -0,0 +1,27 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.antlr.v4.runtime.BaseErrorListener;
4
+ import org.antlr.v4.runtime.RecognitionException;
5
+ import org.antlr.v4.runtime.Recognizer;
6
+ import org.embulk.config.ConfigException;
7
+
8
+ import static java.util.Locale.ENGLISH;
9
+
10
+ public class ConfigErrorListener
11
+ extends BaseErrorListener
12
+ {
13
+ private String column_name;
14
+
15
+ public ConfigErrorListener(String column_name)
16
+ {
17
+ this.column_name = column_name;
18
+ }
19
+
20
+ @Override
21
+ public void syntaxError(Recognizer<?, ?> recognizer,
22
+ Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e)
23
+ {
24
+ String err = String.format(ENGLISH, "The \"%s\" column has invalid formula. line: %d error: %s", column_name, line, msg);
25
+ throw new ConfigException(err);
26
+ }
27
+ }