embulk-parser-csv_with_schema_file 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9deedb78f712bd47f5c8f70d2ef2e9044551a50a
4
- data.tar.gz: 9b6c9c9608f42ec7261f37d81954f7cea979627d
3
+ metadata.gz: 31db0959f10ea5f4ed7dd1cf502966899eb60e36
4
+ data.tar.gz: 9e6b08b3f90978ec99a74d2c1630ecef31b31a3f
5
5
  SHA512:
6
- metadata.gz: 0540e1b7302979e29b6f77f10c5a18731197e3fa2f0e8a8a2edc761a3212de3298db205e7fb4a267fe9901cf7e965b17482b42f1cc92c3681ba24e5756774706
7
- data.tar.gz: 7e59b5115f104eedcbf19fa40fd9923ab8df3f10cd76c4bcb3904d08c47f956851e6e2a82b279639b7fc9a24cf5409108dfa5d2b7b4f23cb4901708c6dd2a8b7
6
+ metadata.gz: 965a3fe66e997f53401325bda3f7fa8a1ec55722a5416ddd782f10e5dfb3ce1343267f2e9ff2584b76024da8a23cb24074dafd3bbc7c932a8bfd85df4edc9d62
7
+ data.tar.gz: 309e34839bb2572a5d800b7d00d7022d43b7d612ac36442d96911b0c9191af81d5336febb0c8bba1afabc3b1a3a3f25a4bf75c0b721b732112530a83508ceb5a
data/README.md CHANGED
@@ -18,7 +18,7 @@ $ embulk gem install embulk-parser-csv_with_schema_file
18
18
  ## Configuration
19
19
 
20
20
  - **schema_path**: schema file path. json. (string, required)
21
- - **columns**: this config is ignored in this plugin.
21
+ - **columns**: Optional. If exists, overwrite in schema file that same column name. (hash, default: `[]`)
22
22
  - other configs extends csv parser. see : http://www.embulk.org/docs/built-in.html#csv-parser-plugin
23
23
 
24
24
  ## Example
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.1"
16
+ version = "0.0.2"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -2,6 +2,7 @@ package org.embulk.parser.csv_with_schema_file;
2
2
 
3
3
  import java.io.File;
4
4
  import java.util.List;
5
+ import java.util.Map;
5
6
 
6
7
  import org.embulk.config.Config;
7
8
  import org.embulk.config.ConfigDefault;
@@ -12,11 +13,14 @@ import org.embulk.spi.Exec;
12
13
  import org.embulk.spi.ParserPlugin;
13
14
  import org.embulk.spi.Schema;
14
15
  import org.embulk.spi.SchemaConfig;
16
+ import org.embulk.spi.type.Type;
15
17
  import org.embulk.standards.CsvParserPlugin;
18
+ import org.jruby.org.objectweb.asm.TypeReference;
16
19
  import org.slf4j.Logger;
17
20
 
18
21
  import com.fasterxml.jackson.databind.ObjectMapper;
19
22
  import com.google.common.collect.Lists;
23
+ import com.google.common.collect.Maps;
20
24
 
21
25
  public class CsvParserWithSchemaFilePlugin
22
26
  extends CsvParserPlugin
@@ -29,6 +33,10 @@ public class CsvParserWithSchemaFilePlugin
29
33
  @Config("schema_path")
30
34
  String getSchemaPath();
31
35
 
36
+ @Config("schema_class")
37
+ @ConfigDefault("\"Schema\"")
38
+ String getSchemaClass();
39
+
32
40
  @Config("columns")
33
41
  @ConfigDefault("[]")
34
42
  @Override
@@ -42,29 +50,92 @@ public class CsvParserWithSchemaFilePlugin
42
50
 
43
51
  log.info("default timestamp format : {}", schemaTask.getDefaultTimestampFormat() );
44
52
 
45
- config.set("columns", getSchemaConfig(schemaTask.getSchemaPath(), config));
53
+ config.set("columns", getSchemaConfig(schemaTask, config));
46
54
 
47
55
  super.transaction(config, control);
48
56
  }
49
57
 
50
- public SchemaConfig getSchemaConfig(String path, ConfigSource config) {
58
+ public static class ColumnConfigTemp
59
+ {
60
+
61
+ public ColumnConfigTemp() {
62
+ super();
63
+ }
64
+
65
+ private String name;
66
+ private Type type;
67
+ private String format;
68
+ public String getName() {
69
+ return name;
70
+ }
71
+ public void setName(String name) {
72
+ this.name = name;
73
+ }
74
+ public Type getType() {
75
+ return type;
76
+ }
77
+ public void setType(Type type) {
78
+ this.type = type;
79
+ }
80
+ public String getFormat() {
81
+ return format;
82
+ }
83
+ public void setFormat(String format) {
84
+ this.format = format;
85
+ }
86
+
87
+ }
88
+ /**
89
+ * if "columns" in embulk config file, use that
90
+ * @param schemaTask
91
+ * @param config
92
+ * @return
93
+ */
94
+ public SchemaConfig getSchemaConfig(BqPluginTask schemaTask, ConfigSource config) {
95
+ Map<String, ColumnConfig> map = Maps.newHashMap();
96
+ if(schemaTask.getSchemaConfig() != null && schemaTask.getSchemaConfig().isEmpty() == false) {
97
+ // schemaTask.getSchemaConfig().getColumns().stream().collect(Collectors.toMap(x-> x.getName(), y -> y, (a,b) -> b));
98
+ for(ColumnConfig c : schemaTask.getSchemaConfig().getColumns()) {
99
+ map.put(c.getName(), c);
100
+ }
101
+ }
51
102
  List<ColumnConfig> columns = Lists.newArrayList();
52
- Schema schema = getSchemaFromFile(path);
53
- for(Column c : schema.getColumns()){
54
- columns.add(new ColumnConfig(c.getName(), c.getType(), config));
103
+ if("SchemaConfig".equals(schemaTask.getSchemaClass().trim())) {
104
+ //SchemaConfig schema = getSchemaFromFile(schemaTask.getSchemaPath(), SchemaConfig.class);
105
+ ColumnConfigTemp[] mapList = getSchemaFromFile(schemaTask.getSchemaPath(), ColumnConfigTemp[].class);
106
+ for(ColumnConfigTemp c : mapList){
107
+ if(map.containsKey(c.getName())) {
108
+ columns.add(map.get(c.getName()));
109
+ }else {
110
+ columns.add(new ColumnConfig(c.getName(), c.getType(), c.getFormat()));
111
+ }
112
+ }
113
+ }else {
114
+ Schema schema = getSchemaFromFile(schemaTask.getSchemaPath(), Schema.class);
115
+ for(Column c : schema.getColumns()){
116
+ if(map.containsKey(c.getName())) {
117
+ columns.add(map.get(c.getName()));
118
+ }else {
119
+ columns.add(new ColumnConfig(c.getName(), c.getType(), config));
120
+ }
121
+ }
55
122
  }
56
123
  SchemaConfig conf = new SchemaConfig(columns);
124
+
125
+ log.info("Final Config : {}", conf.toSchema());
126
+
57
127
  return conf;
58
128
  }
59
129
 
60
- public Schema getSchemaFromFile(String path) {
130
+
131
+
132
+ public <T> T getSchemaFromFile(String path, Class<T> cls) {
61
133
  ObjectMapper mapper = new ObjectMapper();
62
134
  try {
63
- Schema schema = mapper.readValue(new File(path), Schema.class);
135
+ T schema = mapper.readValue(new File(path), cls);
64
136
  return schema;
65
137
  } catch (Exception e) {
66
- throw new RuntimeException("error when parse schema file : " + path,e);
67
-
138
+ throw new RuntimeException("error when parse Schema : <"+cls+"> file : " + path,e);
68
139
  }
69
140
  }
70
141
  }
@@ -1,11 +1,13 @@
1
1
  package org.embulk.parser.csv_with_schema_file;
2
2
 
3
+ import static org.junit.Assert.assertEquals;
3
4
  import static org.junit.Assume.assumeNotNull;
4
5
 
5
6
  import java.io.File;
6
7
  import java.io.IOException;
7
8
  import java.util.List;
8
9
 
10
+ import org.codehaus.plexus.util.FileUtils;
9
11
  import org.embulk.EmbulkTestRuntime;
10
12
  import org.embulk.config.ConfigLoader;
11
13
  import org.embulk.config.ConfigSource;
@@ -30,7 +32,10 @@ import org.slf4j.Logger;
30
32
  import org.slf4j.LoggerFactory;
31
33
 
32
34
  import com.fasterxml.jackson.core.JsonProcessingException;
35
+ import com.fasterxml.jackson.core.type.TypeReference;
33
36
  import com.fasterxml.jackson.databind.ObjectMapper;
37
+ import com.fasterxml.jackson.databind.SerializationFeature;
38
+ import com.fasterxml.jackson.databind.node.ObjectNode;
34
39
  import com.google.common.collect.Lists;
35
40
 
36
41
  public class TestCsvParserWithSchemaFilePlugin
@@ -57,9 +62,10 @@ public class TestCsvParserWithSchemaFilePlugin
57
62
  public void createResources() throws IOException
58
63
  {
59
64
  config = Exec.newConfigSource();
60
- config.set("schema_path", "D:\\Temp\\gcstemp2\\csv_schema.json");
61
65
  config.set("default_timestamp_format", "%Y-%m-%d %H:%M:%S %z");
62
- //config.set("columns","");
66
+
67
+ SchemaConfig con = createSchemaConfigForTest();
68
+ config.set("schema_path", createSchemaFileForTest(con));
63
69
 
64
70
  plugin = new CsvParserWithSchemaFilePlugin();
65
71
  runner = new FileInputRunner(runtime.getInstance(LocalFileInputPlugin.class));
@@ -68,12 +74,14 @@ public class TestCsvParserWithSchemaFilePlugin
68
74
  assumeNotNull(
69
75
  config.get(String.class, "schema_path")
70
76
  );
77
+
78
+
71
79
  }
72
80
 
73
81
  @Test
74
82
  public void testParseSchema(){
75
83
  BqPluginTask task = config.loadConfig(BqPluginTask.class);
76
- Schema schema = plugin.getSchemaFromFile(task.getSchemaPath());
84
+ Schema schema = plugin.getSchemaFromFile(task.getSchemaPath(), Schema.class);
77
85
  log.info("{}",schema.getColumns());
78
86
  assumeNotNull(schema.getColumns());
79
87
  }
@@ -81,6 +89,7 @@ public class TestCsvParserWithSchemaFilePlugin
81
89
  public String toJson(Object o){
82
90
  ObjectMapper mapper = new ObjectMapper();
83
91
  try {
92
+ mapper.enable(SerializationFeature.INDENT_OUTPUT);
84
93
  String jsonString = mapper.writeValueAsString(o);
85
94
  return jsonString;
86
95
  } catch (JsonProcessingException e) {
@@ -88,26 +97,97 @@ public class TestCsvParserWithSchemaFilePlugin
88
97
  return null;
89
98
  }
90
99
  }
91
- @Test
92
- public void testColumnConfig(){
100
+
101
+ public SchemaConfig createSchemaConfigForTest() {
93
102
  List<ColumnConfig> l = Lists.newArrayList();
103
+ ConfigSource emptySource = Exec.newConfigSource();
104
+ l.add( new ColumnConfig("idx",Types.LONG, emptySource));
105
+ l.add( new ColumnConfig("title",Types.STRING, emptySource));
106
+ l.add( new ColumnConfig("regdate",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S"));
107
+ SchemaConfig con = new SchemaConfig(l);
108
+ return con;
109
+ }
110
+
111
+
112
+ public File createSchemaConfigFileForTest(SchemaConfig con) throws IOException {
113
+ //con.getColumns().stream().map(x -> x.getConfigSource().getObjectNode()).collect(Collectors.toList());
114
+ List<ObjectNode> jsonList = Lists.newArrayList();
115
+ for(ColumnConfig c : con.getColumns()) {
116
+ jsonList.add( c.getConfigSource().getObjectNode() );
117
+ }
118
+ String configString = toJson(jsonList);
119
+ log.info("SchemaConfig String : {}",configString);
120
+
121
+ File f = File.createTempFile("embulk-test-schemaconfig", ".json");
122
+ FileUtils.fileWrite(f, configString);
123
+ f.deleteOnExit();
124
+
125
+ return f;
126
+ }
127
+
128
+ public File createSchemaFileForTest(SchemaConfig con) throws IOException {
129
+ String schemaString = toJson(con.toSchema());
130
+ log.debug("Schema String : {}",schemaString);
131
+ File f = File.createTempFile("embulk-test-schema", ".json");
132
+ FileUtils.fileWrite(f, schemaString);
133
+ f.deleteOnExit();
134
+ return f;
135
+ }
136
+
137
+
138
+ @Test
139
+ public void testSchemaFile() throws IOException{
140
+ BqPluginTask task = config.loadConfig(BqPluginTask.class);
141
+ SchemaConfig finalconfig = plugin.getSchemaConfig(task,config);
142
+ log.info("final config : {}",toJson(finalconfig.getColumns()));
143
+
144
+ assertEquals(finalconfig.getColumn(0).getName(), "idx");
145
+ assertEquals(finalconfig.getColumn(1).getName(), "title");
146
+ assertEquals(finalconfig.getColumn(2).getName(), "regdate");
147
+ }
148
+
149
+ @Test
150
+ public void testSchemaClass() throws IOException{
151
+ SchemaConfig con = createSchemaConfigForTest();
152
+ File f = createSchemaConfigFileForTest(con);
153
+
154
+ ConfigSource c = config.deepCopy();
155
+ c.set("schema_path",f);
156
+ c.set("schema_class","SchemaConfig");
157
+
158
+ BqPluginTask task = c.loadConfig(BqPluginTask.class);
159
+ SchemaConfig finalconfig = plugin.getSchemaConfig(task,c);
160
+ log.info("final config : {}",toJson(finalconfig.getColumns()));
161
+ }
162
+
163
+ @Test
164
+ public void testMergeOriginalConfig() throws IOException{
165
+ List<ColumnConfig> l = Lists.newArrayList();
94
166
  //ColumnConfig c = new ColumnConfig(config);
95
167
  ConfigSource emptySource = Exec.newConfigSource();
168
+ l.add( new ColumnConfig("regdate",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S.%H %z"));
169
+ SchemaConfig originalSchemaConfig = new SchemaConfig(l);
96
170
 
97
- l.add( new ColumnConfig("aa",Types.LONG, emptySource));
98
- l.add( new ColumnConfig("bb",Types.STRING, emptySource));
99
- l.add( new ColumnConfig("cc",Types.TIMESTAMP, "%Y-%m-%d %H:%M:%S"));
171
+ SchemaConfig con = createSchemaConfigForTest();
172
+ File f = createSchemaFileForTest(con);
100
173
 
101
- log.info("list : {}", toJson(l));
174
+ ConfigSource c = config.deepCopy();
175
+ c.set("columns", originalSchemaConfig );
176
+ c.set("schema_path",f);
177
+ BqPluginTask task = c.loadConfig(BqPluginTask.class);
178
+ SchemaConfig finalconfig = plugin.getSchemaConfig(task,config);
102
179
 
103
- SchemaConfig con = new SchemaConfig(l);
104
- log.info("SchemaConfig : {}", toJson(con));
180
+ log.info("final config : {}",toJson(finalconfig));
181
+ log.info("final config to schema : {}", toJson(finalconfig.toSchema()));
182
+
183
+ assertEquals(finalconfig.getColumn(0).getName(), con.getColumn(0).getName());
184
+ assertEquals(finalconfig.getColumn(1).getName(), con.getColumn(1).getName());
185
+ assertEquals(finalconfig.getColumn(2).getName(), con.getColumn(2).getName());
105
186
 
106
- log.info("{}", toJson(con.toSchema()) );
107
- log.info("{}", con.toSchema().getColumns());
187
+ assertEquals(finalconfig.getColumn(2).getOption().get(String.class, "format"), "%Y-%m-%d %H:%M:%S.%H %z");
188
+ assertEquals(con.getColumn(2).getOption().get(String.class, "format"), "%Y-%m-%d %H:%M:%S");
108
189
  }
109
190
 
110
-
111
191
  @Test
112
192
  public void testInit(){
113
193
  BqPluginTask task = config.loadConfig(BqPluginTask.class);
@@ -121,9 +201,8 @@ public class TestCsvParserWithSchemaFilePlugin
121
201
  CsvParserPlugin.PluginTask parents = config.loadConfig(CsvParserPlugin.PluginTask.class);
122
202
  log.info("{}", parents);
123
203
  }
124
-
125
204
 
126
- @Test
205
+ // @Test
127
206
  public void testParserDefaultrConfig() throws IOException{
128
207
  File f = new File("D:\\temp\\embulk_test.yml");
129
208
  ConfigSource cpn = new ConfigLoader(Exec.session().getModelManager()).fromYamlFile(f).getNested("in").getNested("parser");
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-csv_with_schema_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - jo8937
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-20 00:00:00.000000000 Z
11
+ date: 2017-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -59,7 +59,7 @@ files:
59
59
  - src/main/java/org/embulk/parser/csv_with_schema_file/CsvParserWithSchemaFilePlugin.java
60
60
  - src/test/java/org/embulk/parser/csv_with_schema_file/TestCsvParserWithSchemaFilePlugin.java
61
61
  - classpath/commons-compress-1.10.jar
62
- - classpath/embulk-parser-csv_with_schema_file-0.0.1.jar
62
+ - classpath/embulk-parser-csv_with_schema_file-0.0.2.jar
63
63
  - classpath/embulk-standards-0.8.36.jar
64
64
  homepage: https://github.com/jo8937/embulk-parser-csv_with_schema_file
65
65
  licenses: