embulk-parser-csv_with_schema_file 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9deedb78f712bd47f5c8f70d2ef2e9044551a50a
4
- data.tar.gz: 9b6c9c9608f42ec7261f37d81954f7cea979627d
3
+ metadata.gz: 31db0959f10ea5f4ed7dd1cf502966899eb60e36
4
+ data.tar.gz: 9e6b08b3f90978ec99a74d2c1630ecef31b31a3f
5
5
  SHA512:
6
- metadata.gz: 0540e1b7302979e29b6f77f10c5a18731197e3fa2f0e8a8a2edc761a3212de3298db205e7fb4a267fe9901cf7e965b17482b42f1cc92c3681ba24e5756774706
7
- data.tar.gz: 7e59b5115f104eedcbf19fa40fd9923ab8df3f10cd76c4bcb3904d08c47f956851e6e2a82b279639b7fc9a24cf5409108dfa5d2b7b4f23cb4901708c6dd2a8b7
6
+ metadata.gz: 965a3fe66e997f53401325bda3f7fa8a1ec55722a5416ddd782f10e5dfb3ce1343267f2e9ff2584b76024da8a23cb24074dafd3bbc7c932a8bfd85df4edc9d62
7
+ data.tar.gz: 309e34839bb2572a5d800b7d00d7022d43b7d612ac36442d96911b0c9191af81d5336febb0c8bba1afabc3b1a3a3f25a4bf75c0b721b732112530a83508ceb5a
data/README.md CHANGED
@@ -18,7 +18,7 @@ $ embulk gem install embulk-parser-csv_with_schema_file
18
18
  ## Configuration
19
19
 
20
20
  - **schema_path**: schema file path. json. (string, required)
21
- - **columns**: this config is ignored in this plugin.
21
+ - **columns**: Optional. If exists, overwrite in schema file that same column name. (hash, default: `[]`)
22
22
  - other configs extends csv parser. see : http://www.embulk.org/docs/built-in.html#csv-parser-plugin
23
23
 
24
24
  ## Example
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.1"
16
+ version = "0.0.2"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -2,6 +2,7 @@ package org.embulk.parser.csv_with_schema_file;
2
2
 
3
3
  import java.io.File;
4
4
  import java.util.List;
5
+ import java.util.Map;
5
6
 
6
7
  import org.embulk.config.Config;
7
8
  import org.embulk.config.ConfigDefault;
@@ -12,11 +13,14 @@ import org.embulk.spi.Exec;
12
13
  import org.embulk.spi.ParserPlugin;
13
14
  import org.embulk.spi.Schema;
14
15
  import org.embulk.spi.SchemaConfig;
16
+ import org.embulk.spi.type.Type;
15
17
  import org.embulk.standards.CsvParserPlugin;
18
+ import org.jruby.org.objectweb.asm.TypeReference;
16
19
  import org.slf4j.Logger;
17
20
 
18
21
  import com.fasterxml.jackson.databind.ObjectMapper;
19
22
  import com.google.common.collect.Lists;
23
+ import com.google.common.collect.Maps;
20
24
 
21
25
  public class CsvParserWithSchemaFilePlugin
22
26
  extends CsvParserPlugin
@@ -29,6 +33,10 @@ public class CsvParserWithSchemaFilePlugin
29
33
  @Config("schema_path")
30
34
  String getSchemaPath();
31
35
 
36
+ @Config("schema_class")
37
+ @ConfigDefault("\"Schema\"")
38
+ String getSchemaClass();
39
+
32
40
  @Config("columns")
33
41
  @ConfigDefault("[]")
34
42
  @Override
@@ -42,29 +50,92 @@ public class CsvParserWithSchemaFilePlugin
42
50
 
43
51
  log.info("default timestamp format : {}", schemaTask.getDefaultTimestampFormat() );
44
52
 
45
- config.set("columns", getSchemaConfig(schemaTask.getSchemaPath(), config));
53
+ config.set("columns", getSchemaConfig(schemaTask, config));
46
54
 
47
55
  super.transaction(config, control);
48
56
  }
49
57
 
50
- public SchemaConfig getSchemaConfig(String path, ConfigSource config) {
58
+ public static class ColumnConfigTemp
59
+ {
60
+
61
+ public ColumnConfigTemp() {
62
+ super();
63
+ }
64
+
65
+ private String name;
66
+ private Type type;
67
+ private String format;
68
+ public String getName() {
69
+ return name;
70
+ }
71
+ public void setName(String name) {
72
+ this.name = name;
73
+ }
74
+ public Type getType() {
75
+ return type;
76
+ }
77
+ public void setType(Type type) {
78
+ this.type = type;
79
+ }
80
+ public String getFormat() {
81
+ return format;
82
+ }
83
+ public void setFormat(String format) {
84
+ this.format = format;
85
+ }
86
+
87
+ }
88
+ /**
89
+ * if "columns" in embulk config file, use that
90
+ * @param schemaTask
91
+ * @param config
92
+ * @return
93
+ */
94
+ public SchemaConfig getSchemaConfig(BqPluginTask schemaTask, ConfigSource config) {
95
+ Map<String, ColumnConfig> map = Maps.newHashMap();
96
+ if(schemaTask.getSchemaConfig() != null && schemaTask.getSchemaConfig().isEmpty() == false) {
97
+ // schemaTask.getSchemaConfig().getColumns().stream().collect(Collectors.toMap(x-> x.getName(), y -> y, (a,b) -> b));
98
+ for(ColumnConfig c : schemaTask.getSchemaConfig().getColumns()) {
99
+ map.put(c.getName(), c);
100
+ }
101
+ }
51
102
  List<ColumnConfig> columns = Lists.newArrayList();
52
- Schema schema = getSchemaFromFile(path);
53
- for(Column c : schema.getColumns()){
54
- columns.add(new ColumnConfig(c.getName(), c.getType(), config));
103
+ if("SchemaConfig".equals(schemaTask.getSchemaClass().trim())) {
104
+ //SchemaConfig schema = getSchemaFromFile(schemaTask.getSchemaPath(), SchemaConfig.class);
105
+ ColumnConfigTemp[] mapList = getSchemaFromFile(schemaTask.getSchemaPath(), ColumnConfigTemp[].class);
106
+ for(ColumnConfigTemp c : mapList){
107
+ if(map.containsKey(c.getName())) {
108
+ columns.add(map.get(c.getName()));
109
+ }else {
110
+ columns.add(new ColumnConfig(c.getName(), c.getType(), c.getFormat()));
111
+ }
112
+ }
113
+ }else {
114
+ Schema schema = getSchemaFromFile(schemaTask.getSchemaPath(), Schema.class);
115
+ for(Column c : schema.getColumns()){
116
+ if(map.containsKey(c.getName())) {
117
+ columns.add(map.get(c.getName()));
118
+ }else {
119
+ columns.add(new ColumnConfig(c.getName(), c.getType(), config));
120
+ }
121
+ }
55
122
  }
56
123
  SchemaConfig conf = new SchemaConfig(columns);
124
+
125
+ log.info("Final Config : {}", conf.toSchema());
126
+
57
127
  return conf;
58
128
  }
59
129
 
60
- public Schema getSchemaFromFile(String path) {
130
+
131
+
132
+ public <T> T getSchemaFromFile(String path, Class<T> cls) {
61
133
  ObjectMapper mapper = new ObjectMapper();
62
134
  try {
63
- Schema schema = mapper.readValue(new File(path), Schema.class);
135
+ T schema = mapper.readValue(new File(path), cls);
64
136
  return schema;
65
137
  } catch (Exception e) {
66
- throw new RuntimeException("error when parse schema file : " + path,e);
67
-
138
+ throw new RuntimeException("error when parse Schema : <"+cls+"> file : " + path,e);
68
139
  }
69
140
  }
70
141
  }
@@ -1,11 +1,13 @@
1
1
  package org.embulk.parser.csv_with_schema_file;
2
2
 
3
+ import static org.junit.Assert.assertEquals;
3
4
  import static org.junit.Assume.assumeNotNull;
4
5
 
5
6
  import java.io.File;
6
7
  import java.io.IOException;
7
8
  import java.util.List;
8
9
 
10
+ import org.codehaus.plexus.util.FileUtils;
9
11
  import org.embulk.EmbulkTestRuntime;
10
12
  import org.embulk.config.ConfigLoader;
11
13
  import org.embulk.config.ConfigSource;
@@ -30,7 +32,10 @@ import org.slf4j.Logger;
30
32
  import org.slf4j.LoggerFactory;
31
33
 
32
34
  import com.fasterxml.jackson.core.JsonProcessingException;
35
+ import com.fasterxml.jackson.core.type.TypeReference;
33
36
  import com.fasterxml.jackson.databind.ObjectMapper;
37
+ import com.fasterxml.jackson.databind.SerializationFeature;
38
+ import com.fasterxml.jackson.databind.node.ObjectNode;
34
39
  import com.google.common.collect.Lists;
35
40
 
36
41
  public class TestCsvParserWithSchemaFilePlugin
@@ -57,9 +62,10 @@ public class TestCsvParserWithSchemaFilePlugin
57
62
  public void createResources() throws IOException
58
63
  {
59
64
  config = Exec.newConfigSource();
60
- config.set("schema_path", "D:\\Temp\\gcstemp2\\csv_schema.json");
61
65
  config.set("default_timestamp_format", "%Y-%m-%d %H:%M:%S %z");
62
- //config.set("columns","");
66
+
67
+ SchemaConfig con = createSchemaConfigForTest();
68
+ config.set("schema_path", createSchemaFileForTest(con));
63
69
 
64
70
  plugin = new CsvParserWithSchemaFilePlugin();
65
71
  runner = new FileInputRunner(runtime.getInstance(LocalFileInputPlugin.class));
@@ -68,12 +74,14 @@ public class TestCsvParserWithSchemaFilePlugin
68
74
  assumeNotNull(
69
75
  config.get(String.class, "schema_path")
70
76
  );
77
+
78
+
71
79
  }
72
80
 
73
81
  @Test
74
82
  public void testParseSchema(){
75
83
  BqPluginTask task = config.loadConfig(BqPluginTask.class);
76
- Schema schema = plugin.getSchemaFromFile(task.getSchemaPath());
84
+ Schema schema = plugin.getSchemaFromFile(task.getSchemaPath(), Schema.class);
77
85
  log.info("{}",schema.getColumns());
78
86
  assumeNotNull(schema.getColumns());
79
87
  }
@@ -81,6 +89,7 @@ public class TestCsvParserWithSchemaFilePlugin
81
89
  public String toJson(Object o){
82
90
  ObjectMapper mapper = new ObjectMapper();
83
91
  try {
92
+ mapper.enable(SerializationFeature.INDENT_OUTPUT);
84
93
  String jsonString = mapper.writeValueAsString(o);
85
94
  return jsonString;
86
95
  } catch (JsonProcessingException e) {
@@ -88,26 +97,97 @@ public class TestCsvParserWithSchemaFilePlugin
88
97
  return null;
89
98
  }
90
99
  }
91
- @Test
92
- public void testColumnConfig(){
100
+
101
+ public SchemaConfig createSchemaConfigForTest() {
93
102
  List<ColumnConfig> l = Lists.newArrayList();
103
+ ConfigSource emptySource = Exec.newConfigSource();
104
+ l.add( new ColumnConfig("idx",Types.LONG, emptySource));
105
+ l.add( new ColumnConfig("title",Types.STRING, emptySource));
106
+ l.add( new ColumnConfig("regdate",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S"));
107
+ SchemaConfig con = new SchemaConfig(l);
108
+ return con;
109
+ }
110
+
111
+
112
+ public File createSchemaConfigFileForTest(SchemaConfig con) throws IOException {
113
+ //con.getColumns().stream().map(x -> x.getConfigSource().getObjectNode()).collect(Collectors.toList());
114
+ List<ObjectNode> jsonList = Lists.newArrayList();
115
+ for(ColumnConfig c : con.getColumns()) {
116
+ jsonList.add( c.getConfigSource().getObjectNode() );
117
+ }
118
+ String configString = toJson(jsonList);
119
+ log.info("SchemaConfig String : {}",configString);
120
+
121
+ File f = File.createTempFile("embulk-test-schemaconfig", ".json");
122
+ FileUtils.fileWrite(f, configString);
123
+ f.deleteOnExit();
124
+
125
+ return f;
126
+ }
127
+
128
+ public File createSchemaFileForTest(SchemaConfig con) throws IOException {
129
+ String schemaString = toJson(con.toSchema());
130
+ log.debug("Schema String : {}",schemaString);
131
+ File f = File.createTempFile("embulk-test-schema", ".json");
132
+ FileUtils.fileWrite(f, schemaString);
133
+ f.deleteOnExit();
134
+ return f;
135
+ }
136
+
137
+
138
+ @Test
139
+ public void testSchemaFile() throws IOException{
140
+ BqPluginTask task = config.loadConfig(BqPluginTask.class);
141
+ SchemaConfig finalconfig = plugin.getSchemaConfig(task,config);
142
+ log.info("final config : {}",toJson(finalconfig.getColumns()));
143
+
144
+ assertEquals(finalconfig.getColumn(0).getName(), "idx");
145
+ assertEquals(finalconfig.getColumn(1).getName(), "title");
146
+ assertEquals(finalconfig.getColumn(2).getName(), "regdate");
147
+ }
148
+
149
+ @Test
150
+ public void testSchemaClass() throws IOException{
151
+ SchemaConfig con = createSchemaConfigForTest();
152
+ File f = createSchemaConfigFileForTest(con);
153
+
154
+ ConfigSource c = config.deepCopy();
155
+ c.set("schema_path",f);
156
+ c.set("schema_class","SchemaConfig");
157
+
158
+ BqPluginTask task = c.loadConfig(BqPluginTask.class);
159
+ SchemaConfig finalconfig = plugin.getSchemaConfig(task,c);
160
+ log.info("final config : {}",toJson(finalconfig.getColumns()));
161
+ }
162
+
163
+ @Test
164
+ public void testMergeOriginalConfig() throws IOException{
165
+ List<ColumnConfig> l = Lists.newArrayList();
94
166
  //ColumnConfig c = new ColumnConfig(config);
95
167
  ConfigSource emptySource = Exec.newConfigSource();
168
+ l.add( new ColumnConfig("regdate",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S.%H %z"));
169
+ SchemaConfig originalSchemaConfig = new SchemaConfig(l);
96
170
 
97
- l.add( new ColumnConfig("aa",Types.LONG, emptySource));
98
- l.add( new ColumnConfig("bb",Types.STRING, emptySource));
99
- l.add( new ColumnConfig("cc",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S"));
171
+ SchemaConfig con = createSchemaConfigForTest();
172
+ File f = createSchemaFileForTest(con);
100
173
 
101
- log.info("list : {}", toJson(l));
174
+ ConfigSource c = config.deepCopy();
175
+ c.set("columns", originalSchemaConfig );
176
+ c.set("schema_path",f);
177
+ BqPluginTask task = c.loadConfig(BqPluginTask.class);
178
+ SchemaConfig finalconfig = plugin.getSchemaConfig(task,config);
102
179
 
103
- SchemaConfig con = new SchemaConfig(l);
104
- log.info("SchemaConfig : {}", toJson(con));
180
+ log.info("final config : {}",toJson(finalconfig));
181
+ log.info("final config to schema : {}", toJson(finalconfig.toSchema()));
182
+
183
+ assertEquals(finalconfig.getColumn(0).getName(), con.getColumn(0).getName());
184
+ assertEquals(finalconfig.getColumn(1).getName(), con.getColumn(1).getName());
185
+ assertEquals(finalconfig.getColumn(2).getName(), con.getColumn(2).getName());
105
186
 
106
- log.info("{}", toJson(con.toSchema()) );
107
- log.info("{}", con.toSchema().getColumns());
187
+ assertEquals(finalconfig.getColumn(2).getOption().get(String.class, "format"), "%Y-%m-%d %H:%M:%S.%H %z");
188
+ assertEquals(con.getColumn(2).getOption().get(String.class, "format"), "%Y-%m-%d %H:%M:%S");
108
189
  }
109
190
 
110
-
111
191
  @Test
112
192
  public void testInit(){
113
193
  BqPluginTask task = config.loadConfig(BqPluginTask.class);
@@ -121,9 +201,8 @@ public class TestCsvParserWithSchemaFilePlugin
121
201
  CsvParserPlugin.PluginTask parents = config.loadConfig(CsvParserPlugin.PluginTask.class);
122
202
  log.info("{}", parents);
123
203
  }
124
-
125
204
 
126
- @Test
205
+ // @Test
127
206
  public void testParserDefaultrConfig() throws IOException{
128
207
  File f = new File("D:\\temp\\embulk_test.yml");
129
208
  ConfigSource cpn = new ConfigLoader(Exec.session().getModelManager()).fromYamlFile(f).getNested("in").getNested("parser");
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-csv_with_schema_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - jo8937
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-20 00:00:00.000000000 Z
11
+ date: 2017-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -59,7 +59,7 @@ files:
59
59
  - src/main/java/org/embulk/parser/csv_with_schema_file/CsvParserWithSchemaFilePlugin.java
60
60
  - src/test/java/org/embulk/parser/csv_with_schema_file/TestCsvParserWithSchemaFilePlugin.java
61
61
  - classpath/commons-compress-1.10.jar
62
- - classpath/embulk-parser-csv_with_schema_file-0.0.1.jar
62
+ - classpath/embulk-parser-csv_with_schema_file-0.0.2.jar
63
63
  - classpath/embulk-standards-0.8.36.jar
64
64
  homepage: https://github.com/jo8937/embulk-parser-csv_with_schema_file
65
65
  licenses: