embulk-parser-csv_guessable 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/build.gradle +17 -15
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/parser/csv_guessable/CsvGuessableParserPlugin.java +5 -3
- data/src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java +29 -0
- data/src/test/resources/org/embulk/parser/csv_guessable/data/test.tsv +3 -0
- metadata +30 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f277752d08cd0133b915c5b00bc17d42cc87537d
|
4
|
+
data.tar.gz: 705c7bba540b5c58e2be6dea18f204b51cbd50a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24ef39e21fbf84eb370411796857eff54c4c891097be3bdb9202459074d2af6ee0fd98082702f7a1db00836534876db7c5fc9bc0701b4d33a01c984509e86b2c
|
7
|
+
data.tar.gz: c35874678bb4f097ebd59c76d7dd16037ffbd12768c3fda61066331661cd9549b39bc4f6cb26be9f76a3aaf931fe29f24008441fff0dedfb402e724c5443c920
|
data/README.md
CHANGED
@@ -8,13 +8,17 @@ Csv file sometimes has a schema in the header.
|
|
8
8
|
**embulk-parser-csv_guessable** parses such a csv by using their header as column name.
|
9
9
|
This plugin is useful in case of target csv schema changes frequently.
|
10
10
|
|
11
|
-
It behaves as original csv parser when **embulk-parser-csv_guessable**
|
11
|
+
It behaves as original csv parser when **embulk-parser-csv_guessable** configs(`schema_file` and `schema_line`) is not defined.
|
12
12
|
|
13
13
|
## Overview
|
14
14
|
|
15
15
|
* **Plugin type**: parser
|
16
16
|
* **Guess supported**: no
|
17
17
|
|
18
|
+
## Prerequisites
|
19
|
+
- java: 1.8+
|
20
|
+
- embulk: 0.9+
|
21
|
+
|
18
22
|
## Configuration
|
19
23
|
|
20
24
|
- **schema_file**: filename which has schema.(string, default: `null`)
|
data/build.gradle
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
plugins {
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
|
-
id "com.github.jruby-gradle.base" version "
|
3
|
+
id "com.github.jruby-gradle.base" version "1.5.0"
|
4
4
|
id "java"
|
5
5
|
id "checkstyle"
|
6
6
|
}
|
@@ -13,19 +13,19 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.2.0"
|
17
17
|
|
18
|
-
sourceCompatibility = 1.
|
19
|
-
targetCompatibility = 1.
|
18
|
+
sourceCompatibility = 1.8
|
19
|
+
targetCompatibility = 1.8
|
20
20
|
|
21
21
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.
|
23
|
-
compile "org.embulk:embulk-standards:0.
|
24
|
-
compile "com.opencsv:opencsv:
|
25
|
-
provided "org.embulk:embulk-core:0.
|
22
|
+
compile "org.embulk:embulk-core:0.9.+"
|
23
|
+
compile "org.embulk:embulk-standards:0.9.+"
|
24
|
+
compile "com.opencsv:opencsv:4.2"
|
25
|
+
provided "org.embulk:embulk-core:0.9.+"
|
26
26
|
testCompile "junit:junit:4.+"
|
27
|
-
testCompile "org.embulk:embulk-core:0.
|
28
|
-
testCompile "org.embulk:embulk-standards:0.
|
27
|
+
testCompile "org.embulk:embulk-core:0.9.+:tests"
|
28
|
+
testCompile "org.embulk:embulk-standards:0.9.+:tests"
|
29
29
|
}
|
30
30
|
|
31
31
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -53,20 +53,22 @@ task checkstyle(type: Checkstyle) {
|
|
53
53
|
}
|
54
54
|
|
55
55
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
56
|
-
jrubyArgs "-
|
57
|
-
script "
|
56
|
+
jrubyArgs "-S"
|
57
|
+
script "gem"
|
58
|
+
scriptArgs "build", "${project.name}.gemspec"
|
58
59
|
doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
|
59
60
|
}
|
60
61
|
|
61
62
|
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
62
|
-
jrubyArgs "-
|
63
|
-
script "
|
63
|
+
jrubyArgs "-S"
|
64
|
+
script "gem"
|
65
|
+
scriptArgs "push", "pkg/${project.name}-${project.version}.gem"
|
64
66
|
}
|
65
67
|
|
66
68
|
task "package"(dependsOn: ["gemspec", "classpath"]) {
|
67
69
|
doLast {
|
68
70
|
println "> Build succeeded."
|
69
|
-
println "> You can run embulk with
|
71
|
+
println "> You can run embulk with ${argumentToRunEmbulkJava} argument."
|
70
72
|
}
|
71
73
|
}
|
72
74
|
|
Binary file
|
@@ -1,5 +1,5 @@
|
|
1
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-4.2.1-bin.zip
|
2
1
|
distributionBase=GRADLE_USER_HOME
|
3
2
|
distributionPath=wrapper/dists
|
4
|
-
|
3
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-4.9-bin.zip
|
5
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
|
+
zipStorePath=wrapper/dists
|
@@ -141,7 +141,9 @@ public class CsvGuessableParserPlugin
|
|
141
141
|
|
142
142
|
String header = readHeader(task.getSchemaFile().get().getPath(), schemaLine, task.getCharset());
|
143
143
|
log.debug(header);
|
144
|
-
|
144
|
+
String delimiter = task.getDelimiter();
|
145
|
+
ArrayList<ColumnConfig> schema = newColumns(header, config, delimiter);
|
146
|
+
|
145
147
|
|
146
148
|
/* alias and set type */
|
147
149
|
if (task.getSchemaConfig().isPresent()) {
|
@@ -396,12 +398,12 @@ public class CsvGuessableParserPlugin
|
|
396
398
|
return line;
|
397
399
|
}
|
398
400
|
|
399
|
-
private ArrayList<ColumnConfig> newColumns(String header, ConfigSource config)
|
401
|
+
private ArrayList<ColumnConfig> newColumns(String header, ConfigSource config, String delimiter)
|
400
402
|
{
|
401
403
|
ArrayList<ColumnConfig> columns = new ArrayList<ColumnConfig>();
|
402
404
|
PluginTask task = config.loadConfig(PluginTask.class);
|
403
405
|
|
404
|
-
try (CSVReader reader = new CSVReader(new StringReader(header))) {
|
406
|
+
try (CSVReader reader = new CSVReader(new StringReader(header), delimiter.charAt(0))) {
|
405
407
|
String[] csv = reader.readNext();
|
406
408
|
for (String column : csv) {
|
407
409
|
columns.add(new ColumnConfig(column, Types.STRING, config));
|
@@ -120,6 +120,35 @@ public class TestCsvGuessableParserPlugin
|
|
120
120
|
}
|
121
121
|
}
|
122
122
|
|
123
|
+
@Test
|
124
|
+
public void guessableTsv()
|
125
|
+
throws Exception
|
126
|
+
{
|
127
|
+
String configYaml = "" +
|
128
|
+
"type: csv_guessable\n" +
|
129
|
+
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.tsv\n" + // TODO: FIX PATH
|
130
|
+
"schema_line: 1\n" +
|
131
|
+
"delimiter: \"\\t\"";
|
132
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
133
|
+
transaction(config, fileInput("data/test.tsv"));
|
134
|
+
List<Object[]> records = Pages.toObjects(schema, output.pages);
|
135
|
+
assertEquals(2, records.size());
|
136
|
+
|
137
|
+
Object[] record;
|
138
|
+
{
|
139
|
+
record = records.get(0);
|
140
|
+
assertEquals("100", record[0]);
|
141
|
+
assertEquals("test-title", record[1]);
|
142
|
+
assertEquals("ok", record[2]);
|
143
|
+
}
|
144
|
+
{
|
145
|
+
record = records.get(1);
|
146
|
+
assertEquals("191", record[0]);
|
147
|
+
assertEquals("title2", record[1]);
|
148
|
+
assertEquals("ng", record[2]);
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
123
152
|
@Test
|
124
153
|
public void specifyType()
|
125
154
|
throws Exception
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-csv_guessable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- koooge
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-08-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
|
-
- - ~>
|
16
|
+
- - "~>"
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - "~>"
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
|
-
- -
|
30
|
+
- - ">="
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Parses Guessable Csv files read by other file input plugins.
|
42
42
|
email:
|
43
43
|
- koooooge@gmail.com
|
@@ -45,11 +45,21 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
- .circleci/config.yml
|
49
|
-
- .gitignore
|
48
|
+
- ".circleci/config.yml"
|
49
|
+
- ".gitignore"
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- classpath/commons-beanutils-1.9.3.jar
|
54
|
+
- classpath/commons-collections-3.2.2.jar
|
55
|
+
- classpath/commons-collections4-4.1.jar
|
56
|
+
- classpath/commons-compress-1.10.jar
|
57
|
+
- classpath/commons-lang3-3.7.jar
|
58
|
+
- classpath/commons-logging-1.2.jar
|
59
|
+
- classpath/commons-text-1.3.jar
|
60
|
+
- classpath/embulk-parser-csv_guessable-0.2.0.jar
|
61
|
+
- classpath/embulk-standards-0.9.7.jar
|
62
|
+
- classpath/opencsv-4.2.jar
|
53
63
|
- config/checkstyle/checkstyle.xml
|
54
64
|
- config/checkstyle/default.xml
|
55
65
|
- gradle/wrapper/gradle-wrapper.jar
|
@@ -67,19 +77,12 @@ files:
|
|
67
77
|
- src/main/java/org/embulk/parser/csv_guessable/CsvTokenizer.java
|
68
78
|
- src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java
|
69
79
|
- src/test/resources/org/embulk/parser/csv_guessable/data/test.csv
|
80
|
+
- src/test/resources/org/embulk/parser/csv_guessable/data/test.tsv
|
70
81
|
- src/test/resources/org/embulk/parser/csv_guessable/data/test_alias.csv
|
71
82
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/guess_and_set_type.yml
|
72
83
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/guess_from_header.yml
|
73
84
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/original-csv.yml
|
74
85
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/replace_column_name.yml
|
75
|
-
- classpath/commons-lang3-3.5.jar
|
76
|
-
- classpath/embulk-parser-csv_guessable-0.1.5.jar
|
77
|
-
- classpath/embulk-standards-0.8.35.jar
|
78
|
-
- classpath/opencsv-3.9.jar
|
79
|
-
- classpath/commons-beanutils-1.9.3.jar
|
80
|
-
- classpath/commons-compress-1.10.jar
|
81
|
-
- classpath/commons-collections-3.2.2.jar
|
82
|
-
- classpath/commons-logging-1.2.jar
|
83
86
|
homepage: https://github.com/koooge/embulk-parser-csv_guessable
|
84
87
|
licenses:
|
85
88
|
- MIT
|
@@ -90,17 +93,17 @@ require_paths:
|
|
90
93
|
- lib
|
91
94
|
required_ruby_version: !ruby/object:Gem::Requirement
|
92
95
|
requirements:
|
93
|
-
- -
|
96
|
+
- - ">="
|
94
97
|
- !ruby/object:Gem::Version
|
95
98
|
version: '0'
|
96
99
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
100
|
requirements:
|
98
|
-
- -
|
101
|
+
- - ">="
|
99
102
|
- !ruby/object:Gem::Version
|
100
103
|
version: '0'
|
101
104
|
requirements: []
|
102
105
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.
|
106
|
+
rubygems_version: 2.6.8
|
104
107
|
signing_key:
|
105
108
|
specification_version: 4
|
106
109
|
summary: Guessable Csv parser plugin for Embulk
|