embulk-filter-calc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import com.google.common.collect.Lists;
4
+
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigException;
7
+ import org.embulk.config.ConfigLoader;
8
+ import org.embulk.config.ConfigSource;
9
+ import org.embulk.config.TaskSource;
10
+ import org.embulk.filter.calc.CalcFilterPlugin.PluginTask;
11
+ import org.embulk.spi.Column;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.FilterPlugin;
14
+ import org.embulk.spi.Schema;
15
+ import org.junit.Before;
16
+ import org.junit.Rule;
17
+ import org.junit.Test;
18
+
19
+ import static org.embulk.spi.type.Types.BOOLEAN;
20
+ import static org.embulk.spi.type.Types.DOUBLE;
21
+ import static org.embulk.spi.type.Types.JSON;
22
+ import static org.embulk.spi.type.Types.LONG;
23
+ import static org.embulk.spi.type.Types.STRING;
24
+ import static org.embulk.spi.type.Types.TIMESTAMP;
25
+ import static org.junit.Assert.assertEquals;
26
+
27
+
28
+ public class TestCalcFilterPlugin
29
+ {
30
+
31
+ @Rule
32
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
33
+
34
+ private CalcFilterPlugin plugin;
35
+
36
+
37
+ private Schema schema(Column... columns)
38
+ {
39
+ return new Schema(Lists.newArrayList(columns));
40
+ }
41
+
42
+ private ConfigSource configFromYamlString(String... lines)
43
+ {
44
+ StringBuilder builder = new StringBuilder();
45
+ for (String line : lines) {
46
+ builder.append(line).append("\n");
47
+ }
48
+ String yamlString = builder.toString();
49
+
50
+ ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
51
+ return loader.fromYamlString(yamlString);
52
+ }
53
+
54
+ private PluginTask taskFromYamlString(String... lines)
55
+ {
56
+ ConfigSource config = configFromYamlString(lines);
57
+ return config.loadConfig(PluginTask.class);
58
+ }
59
+
60
+ private void transaction(ConfigSource config, Schema inputSchema)
61
+ {
62
+ plugin.transaction(config, inputSchema, new FilterPlugin.Control() {
63
+ @Override
64
+ public void run(TaskSource taskSource, Schema outputSchema)
65
+ {
66
+ }
67
+ });
68
+ }
69
+
70
+
71
+ @Before
72
+ public void createResource()
73
+ {
74
+ plugin = new CalcFilterPlugin();
75
+ }
76
+
77
+
78
+ @Test
79
+ public void buildOutputSchema_Columns()
80
+ {
81
+ PluginTask task = taskFromYamlString(
82
+ "type: calc",
83
+ "columns:",
84
+ " - { name: long, formula: \"long + 1\"}");
85
+ Schema inputSchema = Schema.builder()
86
+ .add("timestamp", TIMESTAMP)
87
+ .add("string", STRING)
88
+ .add("boolean", BOOLEAN)
89
+ .add("long", LONG)
90
+ .add("double", DOUBLE)
91
+ .add("json", JSON)
92
+ .add("remove_me", STRING)
93
+ .build();
94
+
95
+ Schema outputSchema = CalcFilterPlugin.buildOutputSchema(task, inputSchema);
96
+ assertEquals(7, outputSchema.size());
97
+
98
+ Column column;
99
+ {
100
+ column = outputSchema.getColumn(0);
101
+ assertEquals("timestamp", column.getName());
102
+ }
103
+ }
104
+
105
+
106
+
107
+ }
@@ -0,0 +1,448 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+ import org.embulk.config.ConfigLoader;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.filter.calc.CalcFilterPlugin.PluginTask;
7
+ import org.embulk.spi.Exec;
8
+ import org.embulk.spi.Page;
9
+ import org.embulk.spi.PageBuilder;
10
+ import org.embulk.spi.PageReader;
11
+ import org.embulk.spi.PageTestUtils;
12
+ import org.embulk.spi.Schema;
13
+ import org.embulk.spi.TestPageBuilderReader;
14
+ import org.embulk.spi.time.Timestamp;
15
+ import org.embulk.spi.util.Pages;
16
+ import org.junit.Before;
17
+ import org.junit.Rule;
18
+ import org.junit.Test;
19
+ import org.msgpack.value.ValueFactory;
20
+
21
+ import static org.embulk.spi.type.Types.BOOLEAN;
22
+ import static org.embulk.spi.type.Types.DOUBLE;
23
+ import static org.embulk.spi.type.Types.JSON;
24
+ import static org.embulk.spi.type.Types.LONG;
25
+ import static org.embulk.spi.type.Types.STRING;
26
+ import static org.embulk.spi.type.Types.TIMESTAMP;
27
+ import static org.junit.Assert.assertEquals;
28
+
29
+ import java.util.List;
30
+
31
+
32
+ public class TestCalcVisitorImpl
33
+ {
34
+ @Rule
35
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
36
+
37
+ @Before
38
+ public void createResource()
39
+ {
40
+ }
41
+
42
+ private ConfigSource config()
43
+ {
44
+ return runtime.getExec().newConfigSource();
45
+ }
46
+
47
+ private PluginTask taskFromYamlString(String... lines)
48
+ {
49
+ StringBuilder builder = new StringBuilder();
50
+ for (String line : lines) {
51
+ builder.append(line).append("\n");
52
+ }
53
+ String yamlString = builder.toString();
54
+
55
+ ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
56
+ ConfigSource config = loader.fromYamlString(yamlString);
57
+ return config.loadConfig(PluginTask.class);
58
+ }
59
+
60
+ private List<Object[]> filter(PluginTask task, Schema inputSchema, Object ... objects)
61
+ {
62
+ TestPageBuilderReader.MockPageOutput output = new TestPageBuilderReader.MockPageOutput();
63
+ Schema outputSchema = CalcFilterPlugin.buildOutputSchema(task, inputSchema);
64
+ PageBuilder pageBuilder = new PageBuilder(runtime.getBufferAllocator(), outputSchema, output);
65
+ PageReader pageReader = new PageReader(inputSchema);
66
+ CalcVisitorImpl visitor = new CalcVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
67
+
68
+ List<Page> pages = PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema, objects);
69
+ for (Page page : pages) {
70
+ pageReader.setPage(page);
71
+
72
+ while (pageReader.nextRecord()) {
73
+ outputSchema.visitColumns(visitor);
74
+ pageBuilder.addRecord();
75
+ }
76
+ }
77
+ pageBuilder.finish();
78
+ pageBuilder.close();
79
+ return Pages.toObjects(outputSchema, output.pages);
80
+ }
81
+
82
+ @Test
83
+ public void visit_calc_NoFormula()
84
+ {
85
+ PluginTask task = taskFromYamlString(
86
+ "type: calc",
87
+ "columns: []");
88
+ Schema inputSchema = Schema.builder()
89
+ .add("timestamp",TIMESTAMP)
90
+ .add("string",STRING)
91
+ .add("boolean", BOOLEAN)
92
+ .add("long", LONG)
93
+ .add("double",DOUBLE)
94
+ .add("json",JSON)
95
+ .build();
96
+ List<Object[]> records = filter(task, inputSchema,
97
+ // row1
98
+ Timestamp.ofEpochSecond(1436745600), "string", new Boolean(true), new Long(0), new Double(0.5), ValueFactory.newString("json"),
99
+ // row2
100
+ Timestamp.ofEpochSecond(1436745600), "string", new Boolean(true), new Long(0), new Double(0.5), ValueFactory.newString("json"));
101
+
102
+ assertEquals(2, records.size());
103
+
104
+ Object[] record;
105
+ {
106
+ record = records.get(0);
107
+ assertEquals(6, record.length);
108
+ assertEquals(Timestamp.ofEpochSecond(1436745600),record[0]);
109
+ assertEquals("string",record[1]);
110
+ assertEquals(new Boolean(true),record[2]);
111
+ assertEquals(new Long(0),record[3]);
112
+ assertEquals(new Double(0.5),record[4]);
113
+ assertEquals(ValueFactory.newString("json"),record[5]);
114
+ }
115
+ }
116
+
117
+ @Test
118
+ public void visit_calc_NoFormulaWithNull()
119
+ {
120
+ PluginTask task = taskFromYamlString(
121
+ "type: calc",
122
+ "columns: []");
123
+ Schema inputSchema = Schema.builder()
124
+ .add("dummy",STRING)
125
+ .add("timestamp",TIMESTAMP)
126
+ .add("string",STRING)
127
+ .add("boolean", BOOLEAN)
128
+ .add("long", LONG)
129
+ .add("double",DOUBLE)
130
+ .add("json",JSON)
131
+ .build();
132
+ List<Object[]> records = filter(task, inputSchema,
133
+ // row1
134
+ "dummy",null,null,null,null,null,null,
135
+ // row2
136
+ "dummy",null,null,null,null,null,null);
137
+
138
+ assertEquals(2, records.size());
139
+
140
+ Object[] record;
141
+ {
142
+ record = records.get(0);
143
+ assertEquals(7, record.length);
144
+ assertEquals("dummy",record[0]);
145
+ assertEquals(null,record[1]);
146
+ assertEquals(null,record[2]);
147
+ assertEquals(null,record[3]);
148
+ assertEquals(null,record[4]);
149
+ assertEquals(null,record[5]);
150
+ assertEquals(null,record[6]);
151
+
152
+ }
153
+ }
154
+
155
+ @Test
156
+ public void visit_calc_NullFormula()
157
+ {
158
+ PluginTask task = taskFromYamlString(
159
+ "type: calc",
160
+ "columns:",
161
+ " - {name: long, formula: \" long + 10 \"}",
162
+ " - {name: double, formula: \" double + 10 \"}");
163
+ Schema inputSchema = Schema.builder()
164
+ .add("long", LONG)
165
+ .add("double", DOUBLE)
166
+ .build();
167
+ List<Object[]> records = filter(task, inputSchema,
168
+ // row1
169
+ null,null,
170
+ // row2
171
+ null,null);
172
+
173
+ assertEquals(2, records.size());
174
+
175
+ Object[] record;
176
+ {
177
+ record = records.get(0);
178
+ assertEquals(2, record.length);
179
+ assertEquals(null,record[0]);
180
+ assertEquals(null,record[1]);
181
+ }
182
+ }
183
+
184
+ @Test
185
+ public void visit_calc_SingleFormula()
186
+ {
187
+ PluginTask task = taskFromYamlString(
188
+ "type: calc",
189
+ "columns:",
190
+ " - {name: long1, formula: \" 100 \"}",
191
+ " - {name: long2, formula: \" long2 \"}",
192
+ " - {name: double1, formula: \" 11.1 \"}",
193
+ " - {name: double2, formula: \" double2 \"}");
194
+ Schema inputSchema = Schema.builder()
195
+ .add("long1", LONG)
196
+ .add("long2", LONG)
197
+ .add("double1", DOUBLE)
198
+ .add("double2", DOUBLE)
199
+ .build();
200
+ List<Object[]> records = filter(task, inputSchema,
201
+ // row1
202
+ new Long(521),new Long(521),new Double(523.5),new Double(523.5),
203
+ // row2
204
+ new Long(521),new Long(521),new Double(523.5),new Double(523.5));
205
+
206
+ assertEquals(2, records.size());
207
+
208
+ Object[] record;
209
+ {
210
+ record = records.get(0);
211
+ assertEquals(4, record.length);
212
+ assertEquals(new Long(100), record[0]);
213
+ assertEquals(new Long(521), record[1]);
214
+ assertEquals(new Double(11.1), record[2]);
215
+ assertEquals(new Double(523.5),record[3]);
216
+ }
217
+ }
218
+
219
+
220
+ @Test
221
+ public void visit_calc_MathFormula()
222
+ {
223
+ PluginTask task = taskFromYamlString(
224
+ "type: calc",
225
+ "columns:",
226
+ " - {name: sin_value, formula: \" sin(sin_value) \"}",
227
+ " - {name: cos_value, formula: \" cos(cos_value) \"}",
228
+ " - {name: tan_value, formula: \" tan(tan_value) \"}");
229
+ Schema inputSchema = Schema.builder()
230
+ .add("sin_value", DOUBLE)
231
+ .add("cos_value", DOUBLE)
232
+ .add("tan_value", DOUBLE)
233
+ .build();
234
+ List<Object[]> records = filter(task, inputSchema,
235
+ // row1
236
+ new Double(0.05),new Double(0.05),new Double(0.05),
237
+ // row2
238
+ new Double(0.05),new Double(0.05),new Double(0.05));
239
+
240
+ assertEquals(2, records.size());
241
+
242
+ Object[] record;
243
+ {
244
+ record = records.get(0);
245
+ assertEquals(3, record.length);
246
+ assertEquals(Math.sin(0.05),record[0]);
247
+ assertEquals(Math.cos(0.05),record[1]);
248
+ assertEquals(Math.tan(0.05),record[2]);
249
+ }
250
+ }
251
+
252
+ @Test
253
+ public void visit_calc_SinglePowerFormula()
254
+ {
255
+ PluginTask task = taskFromYamlString(
256
+ "type: calc",
257
+ "columns:",
258
+ " - {name: long1, formula: \" 2 ^ 8\"}",
259
+ " - {name: long2, formula: \" long2 ^ 8 \"}",
260
+ " - {name: double1, formula: \" 2 ^ 8 \"}",
261
+ " - {name: double2, formula: \" double2 ^ 8 \"}");
262
+ Schema inputSchema = Schema.builder()
263
+ .add("long1", LONG)
264
+ .add("long2", LONG)
265
+ .add("double1", DOUBLE)
266
+ .add("double2", DOUBLE)
267
+ .build();
268
+ List<Object[]> records = filter(task, inputSchema,
269
+ // row1
270
+ new Long(10),new Long(3),new Double(10.0),new Double(2.0),
271
+ // row2
272
+ new Long(10),new Long(2),new Double(10.0),new Double(2.0));
273
+
274
+ assertEquals(2, records.size());
275
+
276
+ Object[] record;
277
+ {
278
+ record = records.get(0);
279
+ assertEquals(4, record.length);
280
+ assertEquals(new Long(256), record[0]);
281
+ assertEquals(new Long(6561), record[1]);
282
+ assertEquals(new Double(256), record[2]);
283
+ assertEquals(new Double(256.0),record[3]);
284
+ }
285
+ }
286
+ @Test
287
+ public void visit_calc_BasicFormula()
288
+ {
289
+ PluginTask task = taskFromYamlString(
290
+ "type: calc",
291
+ "columns:",
292
+ " - {name: add_long, formula: \" add_long + 100\"}",
293
+ " - {name: sub_long, formula: \" sub_long - 100\"}",
294
+ " - {name: mul_long, formula: \" mul_long * 100\"}",
295
+ " - {name: div_long, formula: \" div_long / 100\"}",
296
+ " - {name: mod_long, formula: \" mod_long % 100\"}",
297
+ " - {name: add_double, formula: \" add_double + 100\"}",
298
+ " - {name: sub_double, formula: \" sub_double - 100\"}",
299
+ " - {name: mul_double, formula: \" mul_double * 100\"}",
300
+ " - {name: div_double, formula: \" div_double / 100\"}",
301
+ " - {name: mod_double, formula: \" mod_double % 100\"}");
302
+ Schema inputSchema = Schema.builder()
303
+ .add("add_long", LONG)
304
+ .add("sub_long", LONG)
305
+ .add("mul_long", LONG)
306
+ .add("div_long", LONG)
307
+ .add("mod_long", LONG)
308
+ .add("add_double", DOUBLE)
309
+ .add("sub_double", DOUBLE)
310
+ .add("mul_double", DOUBLE)
311
+ .add("div_double", DOUBLE)
312
+ .add("mod_double", DOUBLE)
313
+ .build();
314
+ List<Object[]> records = filter(task, inputSchema,
315
+ // row1
316
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
317
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),
318
+ // row2
319
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
320
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5));
321
+
322
+ assertEquals(2, records.size());
323
+
324
+ Object[] record;
325
+ {
326
+ record = records.get(0);
327
+ assertEquals(10, record.length);
328
+ assertEquals(new Long(621), record[0]);
329
+ assertEquals(new Long(421), record[1]);
330
+ assertEquals(new Long(52100), record[2]);
331
+ assertEquals(new Long(5), record[3]);
332
+ assertEquals(new Long(21), record[4]);
333
+ assertEquals(new Double(623.5),record[5]);
334
+ assertEquals(new Double(423.5),record[6]);
335
+ assertEquals(new Double(52350),record[7]);
336
+ assertEquals(new Double(5.235),record[8]);
337
+ assertEquals(new Double(23.5), record[9]);
338
+ }
339
+ }
340
+ @Test
341
+ public void visit_calc_PriorityChkFormula()
342
+ {
343
+ PluginTask task = taskFromYamlString(
344
+ "type: calc",
345
+ "columns:",
346
+ " - {name: add_long, formula: \" add_long + 100 * 3\"}",
347
+ " - {name: sub_long, formula: \" sub_long - 100 * 3\"}",
348
+ " - {name: mul_long, formula: \" mul_long * 100 * 3\"}",
349
+ " - {name: div_long, formula: \" div_long / 100 * 3\"}",
350
+ " - {name: mod_long, formula: \" mod_long % 100 * 3\"}",
351
+ " - {name: add_double, formula: \" add_double + 100 * 3\"}",
352
+ " - {name: sub_double, formula: \" sub_double - 100 * 3\"}",
353
+ " - {name: mul_double, formula: \" mul_double * 100 * 3\"}",
354
+ " - {name: div_double, formula: \" div_double / 100 * 3\"}",
355
+ " - {name: mod_double, formula: \" mod_double % 100 * 3\"}");
356
+ Schema inputSchema = Schema.builder()
357
+ .add("add_long", LONG)
358
+ .add("sub_long", LONG)
359
+ .add("mul_long", LONG)
360
+ .add("div_long", LONG)
361
+ .add("mod_long", LONG)
362
+ .add("add_double", DOUBLE)
363
+ .add("sub_double", DOUBLE)
364
+ .add("mul_double", DOUBLE)
365
+ .add("div_double", DOUBLE)
366
+ .add("mod_double", DOUBLE)
367
+ .build();
368
+ List<Object[]> records = filter(task, inputSchema,
369
+ // row1
370
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
371
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),
372
+ // row2
373
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
374
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5));
375
+
376
+ assertEquals(2, records.size());
377
+
378
+ Object[] record;
379
+ {
380
+ record = records.get(0);
381
+ assertEquals(10, record.length);
382
+ assertEquals(new Long(821), record[0]);
383
+ assertEquals(new Long(221), record[1]);
384
+ assertEquals(new Long(156300), record[2]);
385
+ assertEquals(new Long(15), record[3]);
386
+ assertEquals(new Long(63), record[4]);
387
+ assertEquals(new Double(823.5),record[5]);
388
+ assertEquals(new Double(223.5),record[6]);
389
+ assertEquals(new Double(157050),record[7]);
390
+ // assertEquals(new Double(15.705),record[8]); // TODO result 15.705000000000002
391
+ assertEquals(new Double(70.5), record[9]);
392
+ }
393
+ }
394
+ @Test
395
+ public void visit_calc_ParenChkFormula()
396
+ {
397
+ PluginTask task = taskFromYamlString(
398
+ "type: calc",
399
+ "columns:",
400
+ " - {name: add_long, formula: \" ( add_long + 100 ) * 3\"}",
401
+ " - {name: sub_long, formula: \" ( sub_long - 100 ) * 3\"}",
402
+ " - {name: mul_long, formula: \" ( mul_long * 100 ) * 3\"}",
403
+ " - {name: div_long, formula: \" ( div_long / 100 ) * 3\"}",
404
+ " - {name: mod_long, formula: \" ( mod_long % 100 ) * 3\"}",
405
+ " - {name: add_double, formula: \" ( add_double + 100 ) * 3\"}",
406
+ " - {name: sub_double, formula: \" ( sub_double - 100 ) * 3\"}",
407
+ " - {name: mul_double, formula: \" ( mul_double * 100 ) * 3\"}",
408
+ " - {name: div_double, formula: \" ( div_double / 100 ) * 3\"}",
409
+ " - {name: mod_double, formula: \" ( mod_double % 100 ) * 3\"}");
410
+ Schema inputSchema = Schema.builder()
411
+ .add("add_long", LONG)
412
+ .add("sub_long", LONG)
413
+ .add("mul_long", LONG)
414
+ .add("div_long", LONG)
415
+ .add("mod_long", LONG)
416
+ .add("add_double", DOUBLE)
417
+ .add("sub_double", DOUBLE)
418
+ .add("mul_double", DOUBLE)
419
+ .add("div_double", DOUBLE)
420
+ .add("mod_double", DOUBLE)
421
+ .build();
422
+ List<Object[]> records = filter(task, inputSchema,
423
+ // row1
424
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
425
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),
426
+ // row2
427
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
428
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5));
429
+
430
+ assertEquals(2, records.size());
431
+
432
+ Object[] record;
433
+ {
434
+ record = records.get(0);
435
+ assertEquals(10, record.length);
436
+ assertEquals(new Long(1863), record[0]);
437
+ assertEquals(new Long(1263), record[1]);
438
+ assertEquals(new Long(156300), record[2]);
439
+ assertEquals(new Long(15), record[3]);
440
+ assertEquals(new Long(63), record[4]);
441
+ assertEquals(new Double(1870.5),record[5]);
442
+ assertEquals(new Double(1270.5),record[6]);
443
+ assertEquals(new Double(157050),record[7]);
444
+ // assertEquals(new Double(15.705),record[8]); // TODO result 15.705000000000002
445
+ assertEquals(new Double(70.5), record[9]);
446
+ }
447
+ }
448
+ }