embulk-parser-csv_with_schema_file 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31db0959f10ea5f4ed7dd1cf502966899eb60e36
|
4
|
+
data.tar.gz: 9e6b08b3f90978ec99a74d2c1630ecef31b31a3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 965a3fe66e997f53401325bda3f7fa8a1ec55722a5416ddd782f10e5dfb3ce1343267f2e9ff2584b76024da8a23cb24074dafd3bbc7c932a8bfd85df4edc9d62
|
7
|
+
data.tar.gz: 309e34839bb2572a5d800b7d00d7022d43b7d612ac36442d96911b0c9191af81d5336febb0c8bba1afabc3b1a3a3f25a4bf75c0b721b732112530a83508ceb5a
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ $ embulk gem install embulk-parser-csv_with_schema_file
|
|
18
18
|
## Configuration
|
19
19
|
|
20
20
|
- **schema_path**: schema file path. json. (string, required)
|
21
|
-
- **columns**:
|
21
|
+
- **columns**: Optional. If exists, overwrite in schema file that same column name. (hash, default: `[]`)
|
22
22
|
- other configs extends csv parser. see : http://www.embulk.org/docs/built-in.html#csv-parser-plugin
|
23
23
|
|
24
24
|
## Example
|
data/build.gradle
CHANGED
data/src/main/java/org/embulk/parser/csv_with_schema_file/CsvParserWithSchemaFilePlugin.java
CHANGED
@@ -2,6 +2,7 @@ package org.embulk.parser.csv_with_schema_file;
|
|
2
2
|
|
3
3
|
import java.io.File;
|
4
4
|
import java.util.List;
|
5
|
+
import java.util.Map;
|
5
6
|
|
6
7
|
import org.embulk.config.Config;
|
7
8
|
import org.embulk.config.ConfigDefault;
|
@@ -12,11 +13,14 @@ import org.embulk.spi.Exec;
|
|
12
13
|
import org.embulk.spi.ParserPlugin;
|
13
14
|
import org.embulk.spi.Schema;
|
14
15
|
import org.embulk.spi.SchemaConfig;
|
16
|
+
import org.embulk.spi.type.Type;
|
15
17
|
import org.embulk.standards.CsvParserPlugin;
|
18
|
+
import org.jruby.org.objectweb.asm.TypeReference;
|
16
19
|
import org.slf4j.Logger;
|
17
20
|
|
18
21
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
19
22
|
import com.google.common.collect.Lists;
|
23
|
+
import com.google.common.collect.Maps;
|
20
24
|
|
21
25
|
public class CsvParserWithSchemaFilePlugin
|
22
26
|
extends CsvParserPlugin
|
@@ -29,6 +33,10 @@ public class CsvParserWithSchemaFilePlugin
|
|
29
33
|
@Config("schema_path")
|
30
34
|
String getSchemaPath();
|
31
35
|
|
36
|
+
@Config("schema_class")
|
37
|
+
@ConfigDefault("\"Schema\"")
|
38
|
+
String getSchemaClass();
|
39
|
+
|
32
40
|
@Config("columns")
|
33
41
|
@ConfigDefault("[]")
|
34
42
|
@Override
|
@@ -42,29 +50,92 @@ public class CsvParserWithSchemaFilePlugin
|
|
42
50
|
|
43
51
|
log.info("default timestamp format : {}", schemaTask.getDefaultTimestampFormat() );
|
44
52
|
|
45
|
-
config.set("columns", getSchemaConfig(schemaTask
|
53
|
+
config.set("columns", getSchemaConfig(schemaTask, config));
|
46
54
|
|
47
55
|
super.transaction(config, control);
|
48
56
|
}
|
49
57
|
|
50
|
-
public
|
58
|
+
public static class ColumnConfigTemp
|
59
|
+
{
|
60
|
+
|
61
|
+
public ColumnConfigTemp() {
|
62
|
+
super();
|
63
|
+
}
|
64
|
+
|
65
|
+
private String name;
|
66
|
+
private Type type;
|
67
|
+
private String format;
|
68
|
+
public String getName() {
|
69
|
+
return name;
|
70
|
+
}
|
71
|
+
public void setName(String name) {
|
72
|
+
this.name = name;
|
73
|
+
}
|
74
|
+
public Type getType() {
|
75
|
+
return type;
|
76
|
+
}
|
77
|
+
public void setType(Type type) {
|
78
|
+
this.type = type;
|
79
|
+
}
|
80
|
+
public String getFormat() {
|
81
|
+
return format;
|
82
|
+
}
|
83
|
+
public void setFormat(String format) {
|
84
|
+
this.format = format;
|
85
|
+
}
|
86
|
+
|
87
|
+
}
|
88
|
+
/**
|
89
|
+
* if "columns" in embulk config file, use that
|
90
|
+
* @param schemaTask
|
91
|
+
* @param config
|
92
|
+
* @return
|
93
|
+
*/
|
94
|
+
public SchemaConfig getSchemaConfig(BqPluginTask schemaTask, ConfigSource config) {
|
95
|
+
Map<String, ColumnConfig> map = Maps.newHashMap();
|
96
|
+
if(schemaTask.getSchemaConfig() != null && schemaTask.getSchemaConfig().isEmpty() == false) {
|
97
|
+
// schemaTask.getSchemaConfig().getColumns().stream().collect(Collectors.toMap(x-> x.getName(), y -> y, (a,b) -> b));
|
98
|
+
for(ColumnConfig c : schemaTask.getSchemaConfig().getColumns()) {
|
99
|
+
map.put(c.getName(), c);
|
100
|
+
}
|
101
|
+
}
|
51
102
|
List<ColumnConfig> columns = Lists.newArrayList();
|
52
|
-
|
53
|
-
|
54
|
-
|
103
|
+
if("SchemaConfig".equals(schemaTask.getSchemaClass().trim())) {
|
104
|
+
//SchemaConfig schema = getSchemaFromFile(schemaTask.getSchemaPath(), SchemaConfig.class);
|
105
|
+
ColumnConfigTemp[] mapList = getSchemaFromFile(schemaTask.getSchemaPath(), ColumnConfigTemp[].class);
|
106
|
+
for(ColumnConfigTemp c : mapList){
|
107
|
+
if(map.containsKey(c.getName())) {
|
108
|
+
columns.add(map.get(c.getName()));
|
109
|
+
}else {
|
110
|
+
columns.add(new ColumnConfig(c.getName(), c.getType(), c.getFormat()));
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}else {
|
114
|
+
Schema schema = getSchemaFromFile(schemaTask.getSchemaPath(), Schema.class);
|
115
|
+
for(Column c : schema.getColumns()){
|
116
|
+
if(map.containsKey(c.getName())) {
|
117
|
+
columns.add(map.get(c.getName()));
|
118
|
+
}else {
|
119
|
+
columns.add(new ColumnConfig(c.getName(), c.getType(), config));
|
120
|
+
}
|
121
|
+
}
|
55
122
|
}
|
56
123
|
SchemaConfig conf = new SchemaConfig(columns);
|
124
|
+
|
125
|
+
log.info("Final Config : {}", conf.toSchema());
|
126
|
+
|
57
127
|
return conf;
|
58
128
|
}
|
59
129
|
|
60
|
-
|
130
|
+
|
131
|
+
|
132
|
+
public <T> T getSchemaFromFile(String path, Class<T> cls) {
|
61
133
|
ObjectMapper mapper = new ObjectMapper();
|
62
134
|
try {
|
63
|
-
|
135
|
+
T schema = mapper.readValue(new File(path), cls);
|
64
136
|
return schema;
|
65
137
|
} catch (Exception e) {
|
66
|
-
throw new RuntimeException("error when parse
|
67
|
-
|
138
|
+
throw new RuntimeException("error when parse Schema : <"+cls+"> file : " + path,e);
|
68
139
|
}
|
69
140
|
}
|
70
141
|
}
|
data/src/test/java/org/embulk/parser/csv_with_schema_file/TestCsvParserWithSchemaFilePlugin.java
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
package org.embulk.parser.csv_with_schema_file;
|
2
2
|
|
3
|
+
import static org.junit.Assert.assertEquals;
|
3
4
|
import static org.junit.Assume.assumeNotNull;
|
4
5
|
|
5
6
|
import java.io.File;
|
6
7
|
import java.io.IOException;
|
7
8
|
import java.util.List;
|
8
9
|
|
10
|
+
import org.codehaus.plexus.util.FileUtils;
|
9
11
|
import org.embulk.EmbulkTestRuntime;
|
10
12
|
import org.embulk.config.ConfigLoader;
|
11
13
|
import org.embulk.config.ConfigSource;
|
@@ -30,7 +32,10 @@ import org.slf4j.Logger;
|
|
30
32
|
import org.slf4j.LoggerFactory;
|
31
33
|
|
32
34
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
35
|
+
import com.fasterxml.jackson.core.type.TypeReference;
|
33
36
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
37
|
+
import com.fasterxml.jackson.databind.SerializationFeature;
|
38
|
+
import com.fasterxml.jackson.databind.node.ObjectNode;
|
34
39
|
import com.google.common.collect.Lists;
|
35
40
|
|
36
41
|
public class TestCsvParserWithSchemaFilePlugin
|
@@ -57,9 +62,10 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
57
62
|
public void createResources() throws IOException
|
58
63
|
{
|
59
64
|
config = Exec.newConfigSource();
|
60
|
-
config.set("schema_path", "D:\\Temp\\gcstemp2\\csv_schema.json");
|
61
65
|
config.set("default_timestamp_format", "%Y-%m-%d %H:%M:%S %z");
|
62
|
-
|
66
|
+
|
67
|
+
SchemaConfig con = createSchemaConfigForTest();
|
68
|
+
config.set("schema_path", createSchemaFileForTest(con));
|
63
69
|
|
64
70
|
plugin = new CsvParserWithSchemaFilePlugin();
|
65
71
|
runner = new FileInputRunner(runtime.getInstance(LocalFileInputPlugin.class));
|
@@ -68,12 +74,14 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
68
74
|
assumeNotNull(
|
69
75
|
config.get(String.class, "schema_path")
|
70
76
|
);
|
77
|
+
|
78
|
+
|
71
79
|
}
|
72
80
|
|
73
81
|
@Test
|
74
82
|
public void testParseSchema(){
|
75
83
|
BqPluginTask task = config.loadConfig(BqPluginTask.class);
|
76
|
-
Schema schema = plugin.getSchemaFromFile(task.getSchemaPath());
|
84
|
+
Schema schema = plugin.getSchemaFromFile(task.getSchemaPath(), Schema.class);
|
77
85
|
log.info("{}",schema.getColumns());
|
78
86
|
assumeNotNull(schema.getColumns());
|
79
87
|
}
|
@@ -81,6 +89,7 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
81
89
|
public String toJson(Object o){
|
82
90
|
ObjectMapper mapper = new ObjectMapper();
|
83
91
|
try {
|
92
|
+
mapper.enable(SerializationFeature.INDENT_OUTPUT);
|
84
93
|
String jsonString = mapper.writeValueAsString(o);
|
85
94
|
return jsonString;
|
86
95
|
} catch (JsonProcessingException e) {
|
@@ -88,26 +97,97 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
88
97
|
return null;
|
89
98
|
}
|
90
99
|
}
|
91
|
-
|
92
|
-
public
|
100
|
+
|
101
|
+
public SchemaConfig createSchemaConfigForTest() {
|
93
102
|
List<ColumnConfig> l = Lists.newArrayList();
|
103
|
+
ConfigSource emptySource = Exec.newConfigSource();
|
104
|
+
l.add( new ColumnConfig("idx",Types.LONG, emptySource));
|
105
|
+
l.add( new ColumnConfig("title",Types.STRING, emptySource));
|
106
|
+
l.add( new ColumnConfig("regdate",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S"));
|
107
|
+
SchemaConfig con = new SchemaConfig(l);
|
108
|
+
return con;
|
109
|
+
}
|
110
|
+
|
111
|
+
|
112
|
+
public File createSchemaConfigFileForTest(SchemaConfig con) throws IOException {
|
113
|
+
//con.getColumns().stream().map(x -> x.getConfigSource().getObjectNode()).collect(Collectors.toList());
|
114
|
+
List<ObjectNode> jsonList = Lists.newArrayList();
|
115
|
+
for(ColumnConfig c : con.getColumns()) {
|
116
|
+
jsonList.add( c.getConfigSource().getObjectNode() );
|
117
|
+
}
|
118
|
+
String configString = toJson(jsonList);
|
119
|
+
log.info("SchemaConfig String : {}",configString);
|
120
|
+
|
121
|
+
File f = File.createTempFile("embulk-test-schemaconfig", ".json");
|
122
|
+
FileUtils.fileWrite(f, configString);
|
123
|
+
f.deleteOnExit();
|
124
|
+
|
125
|
+
return f;
|
126
|
+
}
|
127
|
+
|
128
|
+
public File createSchemaFileForTest(SchemaConfig con) throws IOException {
|
129
|
+
String schemaString = toJson(con.toSchema());
|
130
|
+
log.debug("Schema String : {}",schemaString);
|
131
|
+
File f = File.createTempFile("embulk-test-schema", ".json");
|
132
|
+
FileUtils.fileWrite(f, schemaString);
|
133
|
+
f.deleteOnExit();
|
134
|
+
return f;
|
135
|
+
}
|
136
|
+
|
137
|
+
|
138
|
+
@Test
|
139
|
+
public void testSchemaFile() throws IOException{
|
140
|
+
BqPluginTask task = config.loadConfig(BqPluginTask.class);
|
141
|
+
SchemaConfig finalconfig = plugin.getSchemaConfig(task,config);
|
142
|
+
log.info("final config : {}",toJson(finalconfig.getColumns()));
|
143
|
+
|
144
|
+
assertEquals(finalconfig.getColumn(0).getName(), "idx");
|
145
|
+
assertEquals(finalconfig.getColumn(1).getName(), "title");
|
146
|
+
assertEquals(finalconfig.getColumn(2).getName(), "regdate");
|
147
|
+
}
|
148
|
+
|
149
|
+
@Test
|
150
|
+
public void testSchemaClass() throws IOException{
|
151
|
+
SchemaConfig con = createSchemaConfigForTest();
|
152
|
+
File f = createSchemaConfigFileForTest(con);
|
153
|
+
|
154
|
+
ConfigSource c = config.deepCopy();
|
155
|
+
c.set("schema_path",f);
|
156
|
+
c.set("schema_class","SchemaConfig");
|
157
|
+
|
158
|
+
BqPluginTask task = c.loadConfig(BqPluginTask.class);
|
159
|
+
SchemaConfig finalconfig = plugin.getSchemaConfig(task,c);
|
160
|
+
log.info("final config : {}",toJson(finalconfig.getColumns()));
|
161
|
+
}
|
162
|
+
|
163
|
+
@Test
|
164
|
+
public void testMergeOriginalConfig() throws IOException{
|
165
|
+
List<ColumnConfig> l = Lists.newArrayList();
|
94
166
|
//ColumnConfig c = new ColumnConfig(config);
|
95
167
|
ConfigSource emptySource = Exec.newConfigSource();
|
168
|
+
l.add( new ColumnConfig("regdate",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S.%H %z"));
|
169
|
+
SchemaConfig originalSchemaConfig = new SchemaConfig(l);
|
96
170
|
|
97
|
-
|
98
|
-
|
99
|
-
l.add( new ColumnConfig("cc",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S"));
|
171
|
+
SchemaConfig con = createSchemaConfigForTest();
|
172
|
+
File f = createSchemaFileForTest(con);
|
100
173
|
|
101
|
-
|
174
|
+
ConfigSource c = config.deepCopy();
|
175
|
+
c.set("columns", originalSchemaConfig );
|
176
|
+
c.set("schema_path",f);
|
177
|
+
BqPluginTask task = c.loadConfig(BqPluginTask.class);
|
178
|
+
SchemaConfig finalconfig = plugin.getSchemaConfig(task,config);
|
102
179
|
|
103
|
-
|
104
|
-
log.info("
|
180
|
+
log.info("final config : {}",toJson(finalconfig));
|
181
|
+
log.info("final config to schema : {}", toJson(finalconfig.toSchema()));
|
182
|
+
|
183
|
+
assertEquals(finalconfig.getColumn(0).getName(), con.getColumn(0).getName());
|
184
|
+
assertEquals(finalconfig.getColumn(1).getName(), con.getColumn(1).getName());
|
185
|
+
assertEquals(finalconfig.getColumn(2).getName(), con.getColumn(2).getName());
|
105
186
|
|
106
|
-
|
107
|
-
|
187
|
+
assertEquals(finalconfig.getColumn(2).getOption().get(String.class, "format"), "%Y-%m-%d %H:%M:%S.%H %z");
|
188
|
+
assertEquals(con.getColumn(2).getOption().get(String.class, "format"), "%Y-%m-%d %H:%M:%S");
|
108
189
|
}
|
109
190
|
|
110
|
-
|
111
191
|
@Test
|
112
192
|
public void testInit(){
|
113
193
|
BqPluginTask task = config.loadConfig(BqPluginTask.class);
|
@@ -121,9 +201,8 @@ public class TestCsvParserWithSchemaFilePlugin
|
|
121
201
|
CsvParserPlugin.PluginTask parents = config.loadConfig(CsvParserPlugin.PluginTask.class);
|
122
202
|
log.info("{}", parents);
|
123
203
|
}
|
124
|
-
|
125
204
|
|
126
|
-
@Test
|
205
|
+
// @Test
|
127
206
|
public void testParserDefaultrConfig() throws IOException{
|
128
207
|
File f = new File("D:\\temp\\embulk_test.yml");
|
129
208
|
ConfigSource cpn = new ConfigLoader(Exec.session().getModelManager()).fromYamlFile(f).getNested("in").getNested("parser");
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-csv_with_schema_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jo8937
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,7 +59,7 @@ files:
|
|
59
59
|
- src/main/java/org/embulk/parser/csv_with_schema_file/CsvParserWithSchemaFilePlugin.java
|
60
60
|
- src/test/java/org/embulk/parser/csv_with_schema_file/TestCsvParserWithSchemaFilePlugin.java
|
61
61
|
- classpath/commons-compress-1.10.jar
|
62
|
-
- classpath/embulk-parser-csv_with_schema_file-0.0.
|
62
|
+
- classpath/embulk-parser-csv_with_schema_file-0.0.2.jar
|
63
63
|
- classpath/embulk-standards-0.8.36.jar
|
64
64
|
homepage: https://github.com/jo8937/embulk-parser-csv_with_schema_file
|
65
65
|
licenses:
|