embulk-parser-csv_with_default_value 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +54 -0
  5. data/build.gradle +96 -0
  6. data/config/checkstyle/checkstyle.xml +128 -0
  7. data/config/checkstyle/default.xml +108 -0
  8. data/gradlew +160 -0
  9. data/gradlew.bat +90 -0
  10. data/lib/embulk/guess/csv_with_default_value.rb +61 -0
  11. data/lib/embulk/parser/csv_with_default_value.rb +3 -0
  12. data/src/main/java/org/embulk/parser/csv_with_default_value/ColumnDefaultValue.java +123 -0
  13. data/src/main/java/org/embulk/parser/csv_with_default_value/ColumnDefaultValueImpl.java +68 -0
  14. data/src/main/java/org/embulk/parser/csv_with_default_value/CsvRecordValidateException.java +13 -0
  15. data/src/main/java/org/embulk/parser/csv_with_default_value/CsvTokenizer.java +512 -0
  16. data/src/main/java/org/embulk/parser/csv_with_default_value/CsvWithDefaultValueParserPlugin.java +447 -0
  17. data/src/test/java/org/embulk/EmbulkTestRuntime.java +113 -0
  18. data/src/test/java/org/embulk/GuiceBinder.java +72 -0
  19. data/src/test/java/org/embulk/RandomManager.java +53 -0
  20. data/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  21. data/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  22. data/src/test/java/org/embulk/parser/csv_with_default_value/TestCsvWithDefaultValueParserPlugin.java +97 -0
  23. data/src/test/java/org/embulk/parser/csv_with_default_value/ValueTypeTest.java +47 -0
  24. data/src/test/java/org/embulk/spi/MockFormatterPlugin.java +108 -0
  25. data/src/test/java/org/embulk/spi/MockParserPlugin.java +80 -0
  26. metadata +97 -0
@@ -0,0 +1,72 @@
1
+ package org.embulk;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+ import org.junit.rules.TestRule;
6
+ import org.junit.rules.TestWatcher;
7
+ import org.junit.runner.Description;
8
+ import org.junit.runners.model.Statement;
9
+ import com.google.common.collect.ImmutableList;
10
+ import com.google.inject.Guice;
11
+ import com.google.inject.Injector;
12
+ import com.google.inject.Module;
13
+
14
+ public class GuiceBinder
15
+ implements TestRule
16
+ {
17
+ private final List<Module> baseModules;
18
+ private List<Module> extraModules;
19
+ private Injector injector;
20
+
21
+ public GuiceBinder(Module... baseModules)
22
+ {
23
+ this.baseModules = ImmutableList.copyOf(baseModules);
24
+ reset();
25
+ }
26
+
27
+ private void reset()
28
+ {
29
+ extraModules = new ArrayList<Module>();
30
+ injector = null;
31
+ }
32
+
33
+ public synchronized void addModule(Module module)
34
+ {
35
+ if (injector != null) {
36
+ throw new IllegalStateException("Injector is already initialized. Call addModule before getInjector or getInstance");
37
+ }
38
+ extraModules.add(module);
39
+ }
40
+
41
+ public synchronized Injector getInjector()
42
+ {
43
+ if (injector == null) {
44
+ ImmutableList.Builder<Module> modules = ImmutableList.builder();
45
+ modules.addAll(baseModules);
46
+ modules.addAll(extraModules);
47
+ injector = Guice.createInjector(modules.build());
48
+ }
49
+ return injector;
50
+ }
51
+
52
+ public <T> T getInstance(Class<T> klass)
53
+ {
54
+ return getInjector().getInstance(klass);
55
+ }
56
+
57
+ @Override
58
+ public Statement apply(Statement base, Description description)
59
+ {
60
+ return new GuceBinderWatcher().apply(base, description);
61
+ }
62
+
63
+ private class GuceBinderWatcher
64
+ extends TestWatcher
65
+ {
66
+ @Override
67
+ protected void starting(Description description)
68
+ {
69
+ reset();
70
+ }
71
+ }
72
+ }
@@ -0,0 +1,53 @@
1
+ package org.embulk;
2
+
3
+ import java.util.Map;
4
+ import java.util.Random;
5
+
6
+ public class RandomManager
7
+ {
8
+ protected long seed;
9
+ protected Random random;
10
+
11
+ public RandomManager()
12
+ {
13
+ this(getDefaultSeed());
14
+ }
15
+
16
+ public RandomManager(long seed)
17
+ {
18
+ this.seed = seed;
19
+ this.random = new Random(seed);
20
+ System.out.println(" Random seed: 0x"+Long.toHexString(seed)+"L");
21
+ }
22
+
23
+ public long getRandomSeed()
24
+ {
25
+ return seed;
26
+ }
27
+
28
+ public void setRandomSeed(long seed)
29
+ {
30
+ random.setSeed(seed);
31
+ this.seed = seed;
32
+ System.out.println(" Set random seed: 0x"+Long.toHexString(this.seed)+"L");
33
+ }
34
+
35
+ public Random getRandom()
36
+ {
37
+ return random;
38
+ }
39
+
40
+ private static long getDefaultSeed() {
41
+ Map<String, String> env = System.getenv();
42
+ String s = env.get("RANDOM_SEED");
43
+ try {
44
+ if(s != null) {
45
+ return Long.parseLong(s);
46
+ }
47
+ } catch (NumberFormatException e) {
48
+ System.out.println("RANDOM_SEED variable is wrong: "+e);
49
+ }
50
+
51
+ return new Random().nextLong();
52
+ }
53
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk;
2
+
3
+ import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
4
+
5
+ import org.embulk.spi.FormatterPlugin;
6
+ import org.embulk.spi.MockFormatterPlugin;
7
+ import org.embulk.spi.MockParserPlugin;
8
+ import org.embulk.spi.ParserPlugin;
9
+
10
+ import com.google.inject.Binder;
11
+ import com.google.inject.Module;
12
+
13
+ public class TestPluginSourceModule implements Module
14
+ {
15
+ @Override
16
+ public void configure(Binder binder)
17
+ {
18
+ registerPluginTo(binder, ParserPlugin.class, "mock",
19
+ MockParserPlugin.class);
20
+ registerPluginTo(binder, FormatterPlugin.class, "mock",
21
+ MockFormatterPlugin.class);
22
+ }
23
+ }
@@ -0,0 +1,17 @@
1
+ package org.embulk;
2
+
3
+ //import org.embulk.record.RandomRecordGenerator;
4
+ //import org.embulk.record.RandomSchemaGenerator;
5
+ import com.google.inject.Binder;
6
+ import com.google.inject.Module;
7
+
8
+ public class TestUtilityModule
9
+ implements Module
10
+ {
11
+ @Override
12
+ public void configure(Binder binder) {
13
+ binder.bind(RandomManager.class);
14
+ //binder.bind(RandomRecordGenerator.class);
15
+ //binder.bind(RandomSchemaGenerator.class);
16
+ }
17
+ }
@@ -0,0 +1,97 @@
1
+ package org.embulk.parser.csv_with_default_value;
2
+
3
+ import com.google.common.collect.Maps;
4
+ import org.junit.Rule;
5
+ import org.junit.Test;
6
+ import static org.junit.Assert.assertEquals;
7
+ import java.nio.charset.Charset;
8
+ import com.google.common.base.Optional;
9
+ import com.google.common.collect.ImmutableList;
10
+ import com.google.common.collect.ImmutableMap;
11
+ import org.joda.time.DateTimeZone;
12
+ import org.embulk.config.ConfigException;
13
+ import org.embulk.config.ConfigSource;
14
+ import org.embulk.spi.Exec;
15
+ import org.embulk.spi.util.Newline;
16
+ import org.embulk.EmbulkTestRuntime;
17
+
18
+
19
+ public class TestCsvWithDefaultValueParserPlugin {
20
+
21
+
22
+ @Rule
23
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
24
+
25
+ @Test
26
+ public void checkDefaultValues()
27
+ {
28
+ ConfigSource config = Exec.newConfigSource()
29
+ .set("columns", ImmutableList.of(
30
+ ImmutableMap.of(
31
+ "name", "date_code",
32
+ "type", "string"))
33
+ );
34
+
35
+ CsvWithDefaultValueParserPlugin.PluginTask task = config.loadConfig(CsvWithDefaultValueParserPlugin.PluginTask.class);
36
+ assertEquals(Charset.forName("utf-8"), task.getCharset());
37
+ assertEquals(Newline.CRLF, task.getNewline());
38
+ assertEquals(false, task.getHeaderLine().or(false));
39
+ assertEquals(",", task.getDelimiter());
40
+ assertEquals(Optional.of(new CsvWithDefaultValueParserPlugin.QuoteCharacter('\"')), task.getQuoteChar());
41
+ assertEquals(false, task.getAllowOptionalColumns());
42
+ assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
43
+ assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
44
+ assertEquals(Maps.newHashMap(), task.getDefaultValues());
45
+ }
46
+ @Test(expected = ConfigException.class)
47
+ public void checkColumnsRequired()
48
+ {
49
+ ConfigSource config = Exec.newConfigSource();
50
+
51
+ config.loadConfig(CsvWithDefaultValueParserPlugin.PluginTask.class);
52
+ }
53
+
54
+ @Test
55
+ public void checkLoadConfig()
56
+ {
57
+ ConfigSource config = Exec.newConfigSource()
58
+ .set("charset", "utf-16")
59
+ .set("newline", "LF")
60
+ .set("header_line", true)
61
+ .set("delimiter", "\t")
62
+ .set("quote", "\\")
63
+ .set("allow_optional_columns", true)
64
+ .set("columns", ImmutableList.of(
65
+ ImmutableMap.of(
66
+ "name", "date_code",
67
+ "type", "string"))
68
+
69
+ )
70
+ .set("default_values", ImmutableMap.of(
71
+ "longCol", ImmutableMap.of("default_value", 123, "type", "immediate"),
72
+ "doubleCol", ImmutableMap.of("default_value", 123, "type", "immediate"),
73
+ "timestampCol", ImmutableMap.of("default_value", "1900-01-01 00:00:00", "type", "immediate")
74
+ ));
75
+
76
+ CsvWithDefaultValueParserPlugin.PluginTask task = config.loadConfig(CsvWithDefaultValueParserPlugin.PluginTask.class);
77
+ assertEquals(Charset.forName("utf-16"), task.getCharset());
78
+ assertEquals(Newline.LF, task.getNewline());
79
+ assertEquals(true, task.getHeaderLine().or(false));
80
+ assertEquals("\t", task.getDelimiter());
81
+ assertEquals(Optional.of(new CsvWithDefaultValueParserPlugin.QuoteCharacter('\\')), task.getQuoteChar());
82
+ assertEquals(true, task.getAllowOptionalColumns());
83
+
84
+ assertEquals(ImmutableMap.of(
85
+ "longCol", new ColumnDefaultValueImpl("123", ColumnDefaultValue.ValueType.IMMEDIATE),
86
+ "doubleCol", new ColumnDefaultValueImpl("123", ColumnDefaultValue.ValueType.IMMEDIATE),
87
+ "timestampCol", new ColumnDefaultValueImpl("1900-01-01 00:00:00", ColumnDefaultValue.ValueType.IMMEDIATE)),
88
+ task.getDefaultValues());
89
+ }
90
+
91
+ @Test
92
+ public void checkColumnDefaultValues(){
93
+
94
+ }
95
+
96
+
97
+ }
@@ -0,0 +1,47 @@
1
+ package org.embulk.parser.csv_with_default_value;
2
+
3
+ import org.embulk.config.ConfigException;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.PageBuilder;
6
+ import org.embulk.spi.type.Types;
7
+ import org.junit.Test;
8
+
9
+ import static org.junit.Assert.*;
10
+ import static org.hamcrest.CoreMatchers.*;
11
+ import static org.embulk.parser.csv_with_default_value.ColumnDefaultValue.ValueType.*;
12
+ import static org.mockito.Mockito.*;
13
+ public class ValueTypeTest {
14
+
15
+ @Test
16
+ public void testFromString(){
17
+ assertThat(ColumnDefaultValue.ValueType.fromString("immediate"), equalTo(ColumnDefaultValue.ValueType.IMMEDIATE));
18
+ assertThat(ColumnDefaultValue.ValueType.fromString("null"), equalTo(ColumnDefaultValue.ValueType.NULL));
19
+ //TODO add assert for timestamp
20
+ }
21
+
22
+ @Test(expected = ConfigException.class)
23
+ public void testFromStringThrowsException(){
24
+ ColumnDefaultValue.ValueType.fromString("hoge");
25
+ }
26
+
27
+ @Test
28
+ public void testGetValueMethods(){
29
+ PageBuilder pageBuilder = mock(PageBuilder.class);
30
+
31
+ IMMEDIATE.doubleValue(new ColumnDefaultValueImpl("123", IMMEDIATE), pageBuilder, new Column(0, "doubleCol", Types.DOUBLE));
32
+ IMMEDIATE.longValue(new ColumnDefaultValueImpl("123", IMMEDIATE), pageBuilder, new Column(1, "longCol", Types.LONG));
33
+ verify(pageBuilder).setDouble(new Column(0, "doubleCol", Types.DOUBLE),123.0);
34
+ verify(pageBuilder).setLong(new Column(1, "longCol", Types.LONG),123L);
35
+ }
36
+
37
+ @Test(expected = ConfigException.class)
38
+ public void testGetLongValueFromNullFails(){
39
+ NULL.longValue(new ColumnDefaultValueImpl("123", NULL), mock(PageBuilder.class), new Column(0, "", Types.DOUBLE));
40
+ }
41
+
42
+ @Test(expected = ConfigException.class)
43
+ public void testGetDoubleValueFromNullFails(){
44
+ NULL.doubleValue(new ColumnDefaultValueImpl("123", NULL), mock(PageBuilder.class), new Column(0, "", Types.DOUBLE));
45
+ }
46
+
47
+ }
@@ -0,0 +1,108 @@
1
+ package org.embulk.spi;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.Task;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.spi.Column;
10
+ import org.embulk.spi.Schema;
11
+ import org.embulk.spi.ColumnVisitor;
12
+
13
+ public class MockFormatterPlugin implements FormatterPlugin
14
+ {
15
+ public static List<List<Object>> records;
16
+
17
+ public interface PluginTask extends Task
18
+ {
19
+ }
20
+
21
+ @Override
22
+ public void transaction(ConfigSource config, Schema schema,
23
+ FormatterPlugin.Control control)
24
+ {
25
+ PluginTask task = config.loadConfig(PluginTask.class);
26
+ control.run(task.dump());
27
+ }
28
+
29
+ @Override
30
+ public PageOutput open(TaskSource taskSource, final Schema schema,
31
+ FileOutput output)
32
+ {
33
+ return new PageOutput()
34
+ {
35
+ public void add(Page page)
36
+ {
37
+ records = readPage(schema, page);
38
+ }
39
+
40
+ @Override
41
+ public void finish()
42
+ {
43
+ }
44
+
45
+ @Override
46
+ public void close()
47
+ {
48
+ }
49
+ };
50
+ }
51
+
52
+ public static List<List<Object>> readPage(final Schema schema, Page page)
53
+ {
54
+ List<List<Object>> records = new ArrayList<>();
55
+ try (final PageReader pageReader = new PageReader(schema)) {
56
+ pageReader.setPage(page);
57
+ while (pageReader.nextRecord()) {
58
+ final List<Object> record = new ArrayList<>();
59
+ schema.visitColumns(new ColumnVisitor()
60
+ {
61
+ public void booleanColumn(Column column)
62
+ {
63
+ if (!pageReader.isNull(column)) {
64
+ record.add(pageReader.getBoolean(column));
65
+ }
66
+ }
67
+
68
+ public void longColumn(Column column)
69
+ {
70
+ if (!pageReader.isNull(column)) {
71
+ record.add(pageReader.getLong(column));
72
+ }
73
+ }
74
+
75
+ public void doubleColumn(Column column)
76
+ {
77
+ if (!pageReader.isNull(column)) {
78
+ record.add(pageReader.getDouble(column));
79
+ }
80
+ }
81
+
82
+ public void stringColumn(Column column)
83
+ {
84
+ if (!pageReader.isNull(column)) {
85
+ record.add(pageReader.getString(column));
86
+ }
87
+ }
88
+
89
+ public void timestampColumn(Column column)
90
+ {
91
+ if (!pageReader.isNull(column)) {
92
+ record.add(pageReader.getTimestamp(column));
93
+ }
94
+ }
95
+
96
+ public void jsonColumn(Column column)
97
+ {
98
+ if (!pageReader.isNull(column)) {
99
+ record.add(pageReader.getJson(column));
100
+ }
101
+ }
102
+ });
103
+ records.add(record);
104
+ }
105
+ }
106
+ return records;
107
+ }
108
+ }
@@ -0,0 +1,80 @@
1
+ package org.embulk.spi;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigSource;
5
+ import org.embulk.config.Task;
6
+ import org.embulk.config.TaskSource;
7
+ import org.embulk.spi.json.JsonParser;
8
+ import org.embulk.spi.time.Timestamp;
9
+ import org.embulk.spi.type.Type;
10
+ import org.embulk.spi.Column;
11
+ import org.embulk.spi.Schema;
12
+ import org.embulk.spi.SchemaConfig;
13
+ import org.embulk.spi.PageOutput;
14
+
15
+ public class MockParserPlugin implements ParserPlugin
16
+ {
17
+ public static boolean raiseException = false;
18
+
19
+ public interface PluginTask extends Task
20
+ {
21
+ @Config("columns")
22
+ SchemaConfig getSchemaConfig();
23
+ }
24
+
25
+ @Override
26
+ public void transaction(ConfigSource config, Control control)
27
+ {
28
+ PluginTask task = config.loadConfig(PluginTask.class);
29
+ control.run(task.dump(), task.getSchemaConfig().toSchema());
30
+ }
31
+
32
+ @Override
33
+ public void run(TaskSource taskSource, Schema schema,
34
+ FileInput input, PageOutput output)
35
+ {
36
+ try (final PageBuilder pageBuilder = new PageBuilder(
37
+ Exec.getBufferAllocator(), schema, output)) {
38
+ while (input.nextFile()) {
39
+ Buffer buffer = input.poll();
40
+ if (buffer != null) {
41
+ for (Column column : schema.getColumns()) {
42
+ Type type = column.getType();
43
+ switch (type.getName()) {
44
+ case "boolean":
45
+ pageBuilder.setBoolean(column, true);
46
+ break;
47
+ case "long":
48
+ pageBuilder.setLong(column, 2L);
49
+ break;
50
+ case "double":
51
+ pageBuilder.setDouble(column, 3.0D);
52
+ break;
53
+ case "string":
54
+ pageBuilder.setString(column, "45");
55
+ break;
56
+ case "timestamp":
57
+ pageBuilder.setTimestamp(column,
58
+ Timestamp.ofEpochMilli(678L));
59
+ break;
60
+ case "json":
61
+ pageBuilder.setJson(
62
+ column,
63
+ new JsonParser().parse("{\"_c1\":true,\"_c2\":10,\"_c3\":\"embulk\",\"_c4\":{\"k\":\"v\"}}")
64
+ );
65
+ break;
66
+ default:
67
+ throw new IllegalStateException("Unknown getType: "
68
+ + type.getName());
69
+ }
70
+ }
71
+ pageBuilder.addRecord();
72
+ if (raiseException) {
73
+ throw new RuntimeException("emulated exception");
74
+ }
75
+ }
76
+ }
77
+ pageBuilder.finish();
78
+ }
79
+ }
80
+ }