embulk-parser-csv_with_schema_file 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31db0959f10ea5f4ed7dd1cf502966899eb60e36
|
4
|
+
data.tar.gz: 9e6b08b3f90978ec99a74d2c1630ecef31b31a3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 965a3fe66e997f53401325bda3f7fa8a1ec55722a5416ddd782f10e5dfb3ce1343267f2e9ff2584b76024da8a23cb24074dafd3bbc7c932a8bfd85df4edc9d62
|
7
|
+
data.tar.gz: 309e34839bb2572a5d800b7d00d7022d43b7d612ac36442d96911b0c9191af81d5336febb0c8bba1afabc3b1a3a3f25a4bf75c0b721b732112530a83508ceb5a
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ $ embulk gem install embulk-parser-csv_with_schema_file
|
|
18
18
|
## Configuration
|
19
19
|
|
20
20
|
- **schema_path**: schema file path. json. (string, required)
|
21
|
-
- **columns**:
|
21
|
+
- **columns**: Optional. If exists, overwrite in schema file that same column name. (hash, default: `[]`)
|
22
22
|
- other configs extends csv parser. see : http://www.embulk.org/docs/built-in.html#csv-parser-plugin
|
23
23
|
|
24
24
|
## Example
|
data/build.gradle
CHANGED
data/src/main/java/org/embulk/parser/csv_with_schema_file/CsvParserWithSchemaFilePlugin.java
CHANGED
@@ -2,6 +2,7 @@ package org.embulk.parser.csv_with_schema_file;
|
|
2
2
|
|
3
3
|
import java.io.File;
|
4
4
|
import java.util.List;
|
5
|
+
import java.util.Map;
|
5
6
|
|
6
7
|
import org.embulk.config.Config;
|
7
8
|
import org.embulk.config.ConfigDefault;
|
@@ -12,11 +13,14 @@ import org.embulk.spi.Exec;
|
|
12
13
|
import org.embulk.spi.ParserPlugin;
|
13
14
|
import org.embulk.spi.Schema;
|
14
15
|
import org.embulk.spi.SchemaConfig;
|
16
|
+
import org.embulk.spi.type.Type;
|
15
17
|
import org.embulk.standards.CsvParserPlugin;
|
18
|
+
import org.jruby.org.objectweb.asm.TypeReference;
|
16
19
|
import org.slf4j.Logger;
|
17
20
|
|
18
21
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
19
22
|
import com.google.common.collect.Lists;
|
23
|
+
import com.google.common.collect.Maps;
|
20
24
|
|
21
25
|
public class CsvParserWithSchemaFilePlugin
|
22
26
|
extends CsvParserPlugin
|
@@ -29,6 +33,10 @@ public class CsvParserWithSchemaFilePlugin
|
|
29
33
|
@Config("schema_path")
|
30
34
|
String getSchemaPath();
|
31
35
|
|
36
|
+
@Config("schema_class")
|
37
|
+
@ConfigDefault("\"Schema\"")
|
38
|
+
String getSchemaClass();
|
39
|
+
|
32
40
|
@Config("columns")
|
33
41
|
@ConfigDefault("[]")
|
34
42
|
@Override
|
@@ -42,29 +50,92 @@ public class CsvParserWithSchemaFilePlugin
|
|
42
50
|
|
43
51
|
log.info("default timestamp format : {}", schemaTask.getDefaultTimestampFormat() );
|
44
52
|
|
45
|
-
config.set("columns", getSchemaConfig(schemaTask
|
53
|
+
config.set("columns", getSchemaConfig(schemaTask, config));
|
46
54
|
|
47
55
|
super.transaction(config, control);
|
48
56
|
}
|
49
57
|
|
50
|
-
public
|
58
|
+
public static class ColumnConfigTemp
|
59
|
+
{
|
60
|
+
|
61
|
+
public ColumnConfigTemp() {
|
62
|
+
super();
|
63
|
+
}
|
64
|
+
|
65
|
+
private String name;
|
66
|
+
private Type type;
|
67
|
+
private String format;
|
68
|
+
public String getName() {
|
69
|
+
return name;
|
70
|
+
}
|
71
|
+
public void setName(String name) {
|
72
|
+
this.name = name;
|
73
|
+
}
|
74
|
+
public Type getType() {
|
75
|
+
return type;
|
76
|
+
}
|
77
|
+
public void setType(Type type) {
|
78
|
+
this.type = type;
|
79
|
+
}
|
80
|
+
public String getFormat() {
|
81
|
+
return format;
|
82
|
+
}
|
83
|
+
public void setFormat(String format) {
|
84
|
+
this.format = format;
|
85
|
+
}
|
86
|
+
|
87
|
+
}
|
88
|
+
/**
|
89
|
+
* if "columns" in embulk config file, use that
|
90
|
+
* @param schemaTask
|
91
|
+
* @param config
|
92
|
+
* @return
|
93
|
+
*/
|
94
|
+
public SchemaConfig getSchemaConfig(BqPluginTask schemaTask, ConfigSource config) {
|
95
|
+
Map<String, ColumnConfig> map = Maps.newHashMap();
|
96
|
+
if(schemaTask.getSchemaConfig() != null && schemaTask.getSchemaConfig().isEmpty() == false) {
|
97
|
+
// schemaTask.getSchemaConfig().getColumns().stream().collect(Collectors.toMap(x-> x.getName(), y -> y, (a,b) -> b));
|
98
|
+
for(ColumnConfig c : schemaTask.getSchemaConfig().getColumns()) {
|
99
|
+
map.put(c.getName(), c);
|
100
|
+
}
|
101
|
+
}
|
51
102
|
List<ColumnConfig> columns = Lists.newArrayList();
|
52
|
-
|
53
|
-
|
54
|
-
|
103
|
+
if("SchemaConfig".equals(schemaTask.getSchemaClass().trim())) {
|
104
|
+
//SchemaConfig schema = getSchemaFromFile(schemaTask.getSchemaPath(), SchemaConfig.class);
|
105
|
+
ColumnConfigTemp[] mapList = getSchemaFromFile(schemaTask.getSchemaPath(), ColumnConfigTemp[].class);
|
106
|
+
for(ColumnConfigTemp c : mapList){
|
107
|
+
if(map.containsKey(c.getName())) {
|
108
|
+
columns.add(map.get(c.getName()));
|
109
|
+
}else {
|
110
|
+
columns.add(new ColumnConfig(c.getName(), c.getType(), c.getFormat()));
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}else {
|
114
|
+
Schema schema = getSchemaFromFile(schemaTask.getSchemaPath(), Schema.class);
|
115
|
+
for(Column c : schema.getColumns()){
|
116
|
+
if(map.containsKey(c.getName())) {
|
117
|
+
columns.add(map.get(c.getName()));
|
118
|
+
}else {
|
119
|
+
columns.add(new ColumnConfig(c.getName(), c.getType(), config));
|
120
|
+
}
|
121
|
+
}
|
55
122
|
}
|
56
123
|
SchemaConfig conf = new SchemaConfig(columns);
|
124
|
+
|
125
|
+
log.info("Final Config : {}", conf.toSchema());
|
126
|
+
|
57
127
|
return conf;
|
58
128
|
}
|
59
129
|
|
60
|
-
|
130
|
+
|
131
|
+
|
132
|
+
public <T> T getSchemaFromFile(String path, Class<T> cls) {
|
61
133
|
ObjectMapper mapper = new ObjectMapper();
|
62
134
|
try {
|
63
|
-
|
135
|
+
T schema = mapper.readValue(new File(path), cls);
|
64
136
|
return schema;
|
65
137
|
} catch (Exception e) {
|
66
|
-
throw new RuntimeException("error when parse
|
67
|
-
|
138
|
+
throw new RuntimeException("error when parse Schema : <"+cls+"> file : " + path,e);
|
68
139
|
}
|
69
140
|
}
|
70
141
|
}
|
data/src/test/java/org/embulk/parser/csv_with_schema_file/TestCsvParserWithSchemaFilePlugin.java
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
package org.embulk.parser.csv_with_schema_file;
|
2
2
|
|
3
|
+
import static org.junit.Assert.assertEquals;
|
3
4
|
import static org.junit.Assume.assumeNotNull;
|
4
5
|
|
5
6
|
import java.io.File;
|
6
7
|
import java.io.IOException;
|
7
8
|
import java.util.List;
|
8
9
|
|
10
|
+
import org.codehaus.plexus.util.FileUtils;
|
9
11
|
import org.embulk.EmbulkTestRuntime;
|
10
12
|
import org.embulk.config.ConfigLoader;
|
11
13
|
import org.embulk.config.ConfigSource;
|
@@ -30,7 +32,10 @@ import org.slf4j.Logger;
|
|
30
32
|
import org.slf4j.LoggerFactory;
|
31
33
|
|
32
34
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
35
|
+
import com.fasterxml.jackson.core.type.TypeReference;
|
33
36
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
37
|
+
import com.fasterxml.jackson.databind.SerializationFeature;
|
38
|
+
import com.fasterxml.jackson.databind.node.ObjectNode;
|
34
39
|
import com.google.common.collect.Lists;
|
35
40
|
|
36
41
|
public class TestCsvParserWithSchemaFilePlugin
|
@@ -57,9 +62,10 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
57
62
|
public void createResources() throws IOException
|
58
63
|
{
|
59
64
|
config = Exec.newConfigSource();
|
60
|
-
config.set("schema_path", "D:\\Temp\\gcstemp2\\csv_schema.json");
|
61
65
|
config.set("default_timestamp_format", "%Y-%m-%d %H:%M:%S %z");
|
62
|
-
|
66
|
+
|
67
|
+
SchemaConfig con = createSchemaConfigForTest();
|
68
|
+
config.set("schema_path", createSchemaFileForTest(con));
|
63
69
|
|
64
70
|
plugin = new CsvParserWithSchemaFilePlugin();
|
65
71
|
runner = new FileInputRunner(runtime.getInstance(LocalFileInputPlugin.class));
|
@@ -68,12 +74,14 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
68
74
|
assumeNotNull(
|
69
75
|
config.get(String.class, "schema_path")
|
70
76
|
);
|
77
|
+
|
78
|
+
|
71
79
|
}
|
72
80
|
|
73
81
|
@Test
|
74
82
|
public void testParseSchema(){
|
75
83
|
BqPluginTask task = config.loadConfig(BqPluginTask.class);
|
76
|
-
Schema schema = plugin.getSchemaFromFile(task.getSchemaPath());
|
84
|
+
Schema schema = plugin.getSchemaFromFile(task.getSchemaPath(), Schema.class);
|
77
85
|
log.info("{}",schema.getColumns());
|
78
86
|
assumeNotNull(schema.getColumns());
|
79
87
|
}
|
@@ -81,6 +89,7 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
81
89
|
public String toJson(Object o){
|
82
90
|
ObjectMapper mapper = new ObjectMapper();
|
83
91
|
try {
|
92
|
+
mapper.enable(SerializationFeature.INDENT_OUTPUT);
|
84
93
|
String jsonString = mapper.writeValueAsString(o);
|
85
94
|
return jsonString;
|
86
95
|
} catch (JsonProcessingException e) {
|
@@ -88,26 +97,97 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
88
97
|
return null;
|
89
98
|
}
|
90
99
|
}
|
91
|
-
|
92
|
-
public
|
100
|
+
|
101
|
+
public SchemaConfig createSchemaConfigForTest() {
|
93
102
|
List<ColumnConfig> l = Lists.newArrayList();
|
103
|
+
ConfigSource emptySource = Exec.newConfigSource();
|
104
|
+
l.add( new ColumnConfig("idx",Types.LONG, emptySource));
|
105
|
+
l.add( new ColumnConfig("title",Types.STRING, emptySource));
|
106
|
+
l.add( new ColumnConfig("regdate",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S"));
|
107
|
+
SchemaConfig con = new SchemaConfig(l);
|
108
|
+
return con;
|
109
|
+
}
|
110
|
+
|
111
|
+
|
112
|
+
public File createSchemaConfigFileForTest(SchemaConfig con) throws IOException {
|
113
|
+
//con.getColumns().stream().map(x -> x.getConfigSource().getObjectNode()).collect(Collectors.toList());
|
114
|
+
List<ObjectNode> jsonList = Lists.newArrayList();
|
115
|
+
for(ColumnConfig c : con.getColumns()) {
|
116
|
+
jsonList.add( c.getConfigSource().getObjectNode() );
|
117
|
+
}
|
118
|
+
String configString = toJson(jsonList);
|
119
|
+
log.info("SchemaConfig String : {}",configString);
|
120
|
+
|
121
|
+
File f = File.createTempFile("embulk-test-schemaconfig", ".json");
|
122
|
+
FileUtils.fileWrite(f, configString);
|
123
|
+
f.deleteOnExit();
|
124
|
+
|
125
|
+
return f;
|
126
|
+
}
|
127
|
+
|
128
|
+
public File createSchemaFileForTest(SchemaConfig con) throws IOException {
|
129
|
+
String schemaString = toJson(con.toSchema());
|
130
|
+
log.debug("Schema String : {}",schemaString);
|
131
|
+
File f = File.createTempFile("embulk-test-schema", ".json");
|
132
|
+
FileUtils.fileWrite(f, schemaString);
|
133
|
+
f.deleteOnExit();
|
134
|
+
return f;
|
135
|
+
}
|
136
|
+
|
137
|
+
|
138
|
+
@Test
|
139
|
+
public void testSchemaFile() throws IOException{
|
140
|
+
BqPluginTask task = config.loadConfig(BqPluginTask.class);
|
141
|
+
SchemaConfig finalconfig = plugin.getSchemaConfig(task,config);
|
142
|
+
log.info("final config : {}",toJson(finalconfig.getColumns()));
|
143
|
+
|
144
|
+
assertEquals(finalconfig.getColumn(0).getName(), "idx");
|
145
|
+
assertEquals(finalconfig.getColumn(1).getName(), "title");
|
146
|
+
assertEquals(finalconfig.getColumn(2).getName(), "regdate");
|
147
|
+
}
|
148
|
+
|
149
|
+
@Test
|
150
|
+
public void testSchemaClass() throws IOException{
|
151
|
+
SchemaConfig con = createSchemaConfigForTest();
|
152
|
+
File f = createSchemaConfigFileForTest(con);
|
153
|
+
|
154
|
+
ConfigSource c = config.deepCopy();
|
155
|
+
c.set("schema_path",f);
|
156
|
+
c.set("schema_class","SchemaConfig");
|
157
|
+
|
158
|
+
BqPluginTask task = c.loadConfig(BqPluginTask.class);
|
159
|
+
SchemaConfig finalconfig = plugin.getSchemaConfig(task,c);
|
160
|
+
log.info("final config : {}",toJson(finalconfig.getColumns()));
|
161
|
+
}
|
162
|
+
|
163
|
+
@Test
|
164
|
+
public void testMergeOriginalConfig() throws IOException{
|
165
|
+
List<ColumnConfig> l = Lists.newArrayList();
|
94
166
|
//ColumnConfig c = new ColumnConfig(config);
|
95
167
|
ConfigSource emptySource = Exec.newConfigSource();
|
168
|
+
l.add( new ColumnConfig("regdate",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S.%H %z"));
|
169
|
+
SchemaConfig originalSchemaConfig = new SchemaConfig(l);
|
96
170
|
|
97
|
-
|
98
|
-
|
99
|
-
l.add( new ColumnConfig("cc",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S"));
|
171
|
+
SchemaConfig con = createSchemaConfigForTest();
|
172
|
+
File f = createSchemaFileForTest(con);
|
100
173
|
|
101
|
-
|
174
|
+
ConfigSource c = config.deepCopy();
|
175
|
+
c.set("columns", originalSchemaConfig );
|
176
|
+
c.set("schema_path",f);
|
177
|
+
BqPluginTask task = c.loadConfig(BqPluginTask.class);
|
178
|
+
SchemaConfig finalconfig = plugin.getSchemaConfig(task,config);
|
102
179
|
|
103
|
-
|
104
|
-
log.info("
|
180
|
+
log.info("final config : {}",toJson(finalconfig));
|
181
|
+
log.info("final config to schema : {}", toJson(finalconfig.toSchema()));
|
182
|
+
|
183
|
+
assertEquals(finalconfig.getColumn(0).getName(), con.getColumn(0).getName());
|
184
|
+
assertEquals(finalconfig.getColumn(1).getName(), con.getColumn(1).getName());
|
185
|
+
assertEquals(finalconfig.getColumn(2).getName(), con.getColumn(2).getName());
|
105
186
|
|
106
|
-
|
107
|
-
|
187
|
+
assertEquals(finalconfig.getColumn(2).getOption().get(String.class, "format"), "%Y-%m-%d %H:%M:%S.%H %z");
|
188
|
+
assertEquals(con.getColumn(2).getOption().get(String.class, "format"), "%Y-%m-%d %H:%M:%S");
|
108
189
|
}
|
109
190
|
|
110
|
-
|
111
191
|
@Test
|
112
192
|
public void testInit(){
|
113
193
|
BqPluginTask task = config.loadConfig(BqPluginTask.class);
|
@@ -121,9 +201,8 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
121
201
|
CsvParserPlugin.PluginTask parents = config.loadConfig(CsvParserPlugin.PluginTask.class);
|
122
202
|
log.info("{}", parents);
|
123
203
|
}
|
124
|
-
|
125
204
|
|
126
|
-
@Test
|
205
|
+
// @Test
|
127
206
|
public void testParserDefaultrConfig() throws IOException{
|
128
207
|
File f = new File("D:\\temp\\embulk_test.yml");
|
129
208
|
ConfigSource cpn = new ConfigLoader(Exec.session().getModelManager()).fromYamlFile(f).getNested("in").getNested("parser");
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-csv_with_schema_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jo8937
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,7 +59,7 @@ files:
|
|
59
59
|
- src/main/java/org/embulk/parser/csv_with_schema_file/CsvParserWithSchemaFilePlugin.java
|
60
60
|
- src/test/java/org/embulk/parser/csv_with_schema_file/TestCsvParserWithSchemaFilePlugin.java
|
61
61
|
- classpath/commons-compress-1.10.jar
|
62
|
-
- classpath/embulk-parser-csv_with_schema_file-0.0.
|
62
|
+
- classpath/embulk-parser-csv_with_schema_file-0.0.2.jar
|
63
63
|
- classpath/embulk-standards-0.8.36.jar
|
64
64
|
homepage: https://github.com/jo8937/embulk-parser-csv_with_schema_file
|
65
65
|
licenses:
|