embulk-filter-calc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.config.TaskSource;
8
+ import org.embulk.spi.Column;
9
+ import org.embulk.spi.ColumnConfig;
10
+ import org.embulk.spi.Exec;
11
+ import org.embulk.spi.FilterPlugin;
12
+ import org.embulk.spi.Page;
13
+ import org.embulk.spi.PageBuilder;
14
+ import org.embulk.spi.PageOutput;
15
+ import org.embulk.spi.PageReader;
16
+ import org.embulk.spi.Schema;
17
+ import org.embulk.spi.SchemaConfigException;
18
+ import org.embulk.spi.type.Type;
19
+ import org.embulk.spi.type.Types;
20
+
21
+ import java.util.List;
22
+
23
+ import static java.util.Locale.ENGLISH;
24
+
25
+ public class CalcFilterPlugin
26
+ implements FilterPlugin
27
+ {
28
+
29
+ // private Object IOException;
30
+
31
+ public interface CalcConfig
32
+ extends Task
33
+ {
34
+ @Config("formula")
35
+ String getFormula();
36
+
37
+ @Config("name")
38
+ String getName();
39
+ }
40
+
41
+ public interface PluginTask
42
+ extends Task
43
+ {
44
+
45
+ @Config("columns")
46
+ public List<CalcConfig> getCalcConfig();
47
+
48
+ @Config("output_columns")
49
+ @ConfigDefault("[]")
50
+ public List<ColumnConfig> getOutputColumns();
51
+ }
52
+
53
+ @Override
54
+ public void transaction(ConfigSource config, Schema inputSchema,
55
+ FilterPlugin.Control control)
56
+ {
57
+ PluginTask task = config.loadConfig(PluginTask.class);
58
+
59
+ Schema outputSchema = buildOutputSchema(task, inputSchema);
60
+ for (CalcConfig calcConfig : task.getCalcConfig()) {
61
+ CalcConfigChecker calc = new CalcConfigChecker(calcConfig.getName(), calcConfig.getFormula(), outputSchema);
62
+ calc.validateFormula();
63
+ }
64
+
65
+ control.run(task.dump(), outputSchema);
66
+ }
67
+
68
+ static Schema buildOutputSchema(PluginTask task, Schema inputSchema)
69
+ {
70
+ Schema.Builder builder = Schema.builder();
71
+ for (Column inputColumns : inputSchema.getColumns()) {
72
+ builder.add(inputColumns.getName(), inputColumns.getType());
73
+ }
74
+
75
+ List<ColumnConfig> outputColumns = task.getOutputColumns();
76
+ for (ColumnConfig outputColumn : outputColumns) {
77
+
78
+ String name = outputColumn.getName();
79
+ Type type = outputColumn.getType();
80
+ Column inputColumn;
81
+ try {
82
+ inputColumn = inputSchema.lookupColumn(name);
83
+ }
84
+ catch (SchemaConfigException ex) {
85
+ inputColumn = null;
86
+ }
87
+ if (inputColumn != null) {
88
+ throw new SchemaConfigException(String.format(ENGLISH, "The column \"%s\" already exists.", name));
89
+ }
90
+
91
+ if (Types.DOUBLE.equals(type)) {
92
+ builder.add(name, Types.DOUBLE);
93
+ }
94
+ else if (Types.LONG.equals(type)) {
95
+ builder.add(name, Types.LONG);
96
+ }
97
+ else {
98
+ throw new SchemaConfigException(String.format(ENGLISH, "The column \"%s\" must specify either long or double.", name));
99
+ }
100
+ }
101
+ return builder.build();
102
+ }
103
+
104
+ @Override
105
+ public PageOutput open(TaskSource taskSource, final Schema inputSchema,
106
+ final Schema outputSchema, final PageOutput output)
107
+ {
108
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
109
+
110
+ return new PageOutput()
111
+ {
112
+ private PageReader pageReader = new PageReader(inputSchema);
113
+ private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
114
+ private CalcVisitorImpl visitor = new CalcVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
115
+
116
+ @Override
117
+ public void finish()
118
+ {
119
+ pageBuilder.finish();
120
+ }
121
+
122
+ @Override
123
+ public void close()
124
+ {
125
+ pageBuilder.close();
126
+ }
127
+
128
+ @Override
129
+ public void add(Page page)
130
+ {
131
+ pageReader.setPage(page);
132
+
133
+ while (pageReader.nextRecord()) {
134
+ outputSchema.visitColumns(visitor);
135
+ pageBuilder.addRecord();
136
+ }
137
+ }
138
+ };
139
+ }
140
+ }
@@ -0,0 +1,130 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.embulk.config.ConfigException;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.Page;
6
+ import org.embulk.spi.PageReader;
7
+ import org.embulk.spi.Schema;
8
+ import org.embulk.spi.type.Types;
9
+
10
+ import static java.util.Locale.ENGLISH;
11
+
12
+ public class CalcFormulaVisitor
13
+ extends CalculatorBaseVisitor<Double>
14
+ {
15
+ private PageReader pageReader;
16
+ private Schema inputSchema;
17
+
18
+ public CalcFormulaVisitor(Schema inputSchema, PageReader pageReader)
19
+ {
20
+ this.pageReader = pageReader;
21
+ this.inputSchema = inputSchema;
22
+ }
23
+
24
+ @Override
25
+ public Double visitMulDivMod(CalculatorParser.MulDivModContext ctx)
26
+ {
27
+ Double left = visit(ctx.expr(0));
28
+ Double right = visit(ctx.expr(1));
29
+
30
+ if (left == null || right == null) {
31
+ return null;
32
+ }
33
+ else if (ctx.op.getType() == CalculatorParser.MUL) {
34
+ return left * right;
35
+ }
36
+
37
+ else if (ctx.op.getType() == CalculatorParser.DIV) {
38
+ return left / right;
39
+ }
40
+ else {
41
+ return left % right;
42
+ }
43
+ }
44
+
45
+ @Override
46
+ public Double visitAddSub(CalculatorParser.AddSubContext ctx)
47
+ {
48
+ Double left = visit(ctx.expr(0));
49
+ Double right = visit(ctx.expr(1));
50
+ if (left == null || right == null) {
51
+ return null;
52
+ }
53
+ else if (ctx.op.getType() == CalculatorParser.ADD) {
54
+ return left + right;
55
+ }
56
+ else {
57
+ return left - right;
58
+ }
59
+ }
60
+
61
+ @Override
62
+ public Double visitNumber(CalculatorParser.NumberContext ctx)
63
+ {
64
+ String id = ctx.NUM().getText();
65
+
66
+ return Double.parseDouble(id);
67
+ }
68
+
69
+ @Override
70
+ public Double visitIdentifier(CalculatorParser.IdentifierContext ctx)
71
+ {
72
+ String id = ctx.ID().getText();
73
+ Double val;
74
+ Column column = inputSchema.lookupColumn(id);
75
+
76
+ if (pageReader.isNull(column)) {
77
+ val = null;
78
+ }
79
+ else if (Types.DOUBLE.equals(column.getType())) {
80
+ val = pageReader.getDouble(column);
81
+ }
82
+ else if (Types.LONG.equals(column.getType())) {
83
+ Long v;
84
+ v = pageReader.getLong(column);
85
+ val = v.doubleValue();
86
+ }
87
+ else {
88
+ // throw
89
+ val = null;
90
+ }
91
+ return val;
92
+ }
93
+
94
+ @Override
95
+ public Double visitParen(CalculatorParser.ParenContext ctx)
96
+ {
97
+ return visit(ctx.expr());
98
+ }
99
+
100
+ @Override
101
+ public Double visitPower(CalculatorParser.PowerContext ctx)
102
+ {
103
+ Double left = visit(ctx.expr(0));
104
+ Double right = visit(ctx.expr(1));
105
+ if (left == null || right == null) {
106
+ return null;
107
+ }
108
+
109
+ return Math.pow(left, right);
110
+ }
111
+
112
+ // Scientific Functions
113
+ @Override
114
+ public Double visitFuncCos(CalculatorParser.FuncCosContext ctx)
115
+ {
116
+ return Math.cos(visit(ctx.expr()));
117
+ }
118
+
119
+ @Override
120
+ public Double visitFuncSin(CalculatorParser.FuncSinContext ctx)
121
+ {
122
+ return Math.sin(visit(ctx.expr()));
123
+ }
124
+
125
+ @Override
126
+ public Double visitFuncTan(CalculatorParser.FuncTanContext ctx)
127
+ {
128
+ return Math.tan(visit(ctx.expr()));
129
+ }
130
+ }
@@ -0,0 +1,141 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.ColumnVisitor;
5
+ import org.embulk.spi.Exec;
6
+ import org.embulk.spi.PageBuilder;
7
+ import org.embulk.spi.PageReader;
8
+ import org.embulk.spi.Schema;
9
+ import org.slf4j.Logger;
10
+
11
+ import java.util.HashMap;
12
+
13
+ public class CalcVisitorImpl
14
+ implements ColumnVisitor
15
+ {
16
+
17
+ private static final Logger logger = Exec.getLogger(CalcFilterPlugin.class);
18
+ private final CalcFilterPlugin.PluginTask task;
19
+ private final Schema inputSchema;
20
+ private final Schema outputSchema;
21
+ private final PageReader pageReader;
22
+ private final PageBuilder pageBuilder;
23
+ private final HashMap<String, Calculator> calcMap = new HashMap<>();
24
+
25
+ CalcVisitorImpl(CalcFilterPlugin.PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
26
+ {
27
+ this.task = task;
28
+ this.inputSchema = inputSchema;
29
+ this.outputSchema = outputSchema;
30
+ this.pageReader = pageReader;
31
+ this.pageBuilder = pageBuilder;
32
+ initializeCalcMap();
33
+ }
34
+
35
+ private void initializeCalcMap()
36
+ {
37
+
38
+ for (CalcFilterPlugin.CalcConfig calcConfig : task.getCalcConfig()) {
39
+ String name = calcConfig.getName();
40
+ String formula = calcConfig.getFormula();
41
+ Calculator calc = new Calculator(formula, inputSchema, pageReader);
42
+ calcMap.put(name, calc);
43
+ }
44
+ }
45
+
46
+ @Override
47
+ public void booleanColumn(Column outputColumn)
48
+ {
49
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
50
+ if (pageReader.isNull(inputColumn)) {
51
+ pageBuilder.setNull(outputColumn);
52
+ }
53
+ else {
54
+ pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
55
+ }
56
+ }
57
+
58
+ @Override
59
+ public void longColumn(Column outputColumn)
60
+ {
61
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
62
+ Calculator calc = calcMap.get(outputColumn.getName());
63
+ if (calc == null) {
64
+ if (pageReader.isNull(inputColumn)) {
65
+ pageBuilder.setNull(outputColumn);
66
+ }
67
+ else {
68
+ pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
69
+ }
70
+ }
71
+ else {
72
+ Double val = calc.calc();
73
+ if (val == null) {
74
+ pageBuilder.setNull(outputColumn);
75
+ }
76
+ else {
77
+ pageBuilder.setLong(outputColumn, val.longValue());
78
+ }
79
+ }
80
+ }
81
+
82
+ @Override
83
+ public void doubleColumn(Column outputColumn)
84
+ {
85
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
86
+ Calculator calc = calcMap.get(outputColumn.getName());
87
+ if (calc == null) {
88
+ if (pageReader.isNull(inputColumn)) {
89
+ pageBuilder.setNull(outputColumn);
90
+ }
91
+ else {
92
+ pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
93
+ }
94
+ }
95
+ else {
96
+ Double val = calc.calc();
97
+ if (val == null) {
98
+ pageBuilder.setNull(outputColumn);
99
+ }
100
+ else {
101
+ pageBuilder.setDouble(outputColumn, val);
102
+ }
103
+ }
104
+ }
105
+
106
+ @Override
107
+ public void stringColumn(Column outputColumn)
108
+ {
109
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
110
+ if (pageReader.isNull(inputColumn)) {
111
+ pageBuilder.setNull(outputColumn);
112
+ }
113
+ else {
114
+ pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
115
+ }
116
+ }
117
+
118
+ @Override
119
+ public void jsonColumn(Column outputColumn)
120
+ {
121
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
122
+ if (pageReader.isNull(inputColumn)) {
123
+ pageBuilder.setNull(outputColumn);
124
+ }
125
+ else {
126
+ pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
127
+ }
128
+ }
129
+
130
+ @Override
131
+ public void timestampColumn(Column outputColumn)
132
+ {
133
+ Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
134
+ if (pageReader.isNull(inputColumn)) {
135
+ pageBuilder.setNull(outputColumn);
136
+ }
137
+ else {
138
+ pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
139
+ }
140
+ }
141
+ }
@@ -0,0 +1,30 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.antlr.v4.runtime.ANTLRInputStream;
4
+ import org.antlr.v4.runtime.CommonTokenStream;
5
+ import org.antlr.v4.runtime.tree.ParseTree;
6
+ import org.embulk.spi.PageReader;
7
+ import org.embulk.spi.Schema;
8
+
9
+ public class Calculator
10
+ {
11
+ private ParseTree tree;
12
+ private CalcFormulaVisitor visitor;
13
+
14
+ public Calculator(String formula, Schema inputSchema, PageReader pageReader)
15
+ {
16
+ ANTLRInputStream input = new ANTLRInputStream(formula);
17
+ CalculatorLexer lexer = new CalculatorLexer(input);
18
+ CommonTokenStream tokens = new CommonTokenStream(lexer);
19
+ CalculatorParser parser = new CalculatorParser(tokens);
20
+
21
+ this.tree = parser.expr();
22
+ this.visitor = new CalcFormulaVisitor(inputSchema, pageReader);
23
+ }
24
+
25
+ public Double calc()
26
+ {
27
+ return visitor.visit(tree);
28
+ }
29
+ }
30
+
@@ -0,0 +1,27 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.antlr.v4.runtime.BaseErrorListener;
4
+ import org.antlr.v4.runtime.RecognitionException;
5
+ import org.antlr.v4.runtime.Recognizer;
6
+ import org.embulk.config.ConfigException;
7
+
8
+ import static java.util.Locale.ENGLISH;
9
+
10
+ public class ConfigErrorListener
11
+ extends BaseErrorListener
12
+ {
13
+ private String column_name;
14
+
15
+ public ConfigErrorListener(String column_name)
16
+ {
17
+ this.column_name = column_name;
18
+ }
19
+
20
+ @Override
21
+ public void syntaxError(Recognizer<?, ?> recognizer,
22
+ Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e)
23
+ {
24
+ String err = String.format(ENGLISH, "The \"%s\" column has invalid formula. line: %d error: %s", column_name, line, msg);
25
+ throw new ConfigException(err);
26
+ }
27
+ }