embulk-parser-csv_guessable 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/build.gradle +17 -15
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/parser/csv_guessable/CsvGuessableParserPlugin.java +5 -3
- data/src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java +29 -0
- data/src/test/resources/org/embulk/parser/csv_guessable/data/test.tsv +3 -0
- metadata +30 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f277752d08cd0133b915c5b00bc17d42cc87537d
|
4
|
+
data.tar.gz: 705c7bba540b5c58e2be6dea18f204b51cbd50a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24ef39e21fbf84eb370411796857eff54c4c891097be3bdb9202459074d2af6ee0fd98082702f7a1db00836534876db7c5fc9bc0701b4d33a01c984509e86b2c
|
7
|
+
data.tar.gz: c35874678bb4f097ebd59c76d7dd16037ffbd12768c3fda61066331661cd9549b39bc4f6cb26be9f76a3aaf931fe29f24008441fff0dedfb402e724c5443c920
|
data/README.md
CHANGED
@@ -8,13 +8,17 @@ Csv file sometimes has a schema in the header.
|
|
8
8
|
**embulk-parser-csv_guessable** parses such a csv by using their header as column name.
|
9
9
|
This plugin is useful in case of target csv schema changes frequently.
|
10
10
|
|
11
|
-
It behaves as original csv parser when **embulk-parser-csv_guessable**
|
11
|
+
It behaves as original csv parser when **embulk-parser-csv_guessable** configs(`schema_file` and `schema_line`) is not defined.
|
12
12
|
|
13
13
|
## Overview
|
14
14
|
|
15
15
|
* **Plugin type**: parser
|
16
16
|
* **Guess supported**: no
|
17
17
|
|
18
|
+
## Prerequisites
|
19
|
+
- java: 1.8+
|
20
|
+
- embulk: 0.9+
|
21
|
+
|
18
22
|
## Configuration
|
19
23
|
|
20
24
|
- **schema_file**: filename which has schema.(string, default: `null`)
|
data/build.gradle
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
plugins {
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
|
-
id "com.github.jruby-gradle.base" version "
|
3
|
+
id "com.github.jruby-gradle.base" version "1.5.0"
|
4
4
|
id "java"
|
5
5
|
id "checkstyle"
|
6
6
|
}
|
@@ -13,19 +13,19 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.2.0"
|
17
17
|
|
18
|
-
sourceCompatibility = 1.
|
19
|
-
targetCompatibility = 1.
|
18
|
+
sourceCompatibility = 1.8
|
19
|
+
targetCompatibility = 1.8
|
20
20
|
|
21
21
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.
|
23
|
-
compile "org.embulk:embulk-standards:0.
|
24
|
-
compile "com.opencsv:opencsv:
|
25
|
-
provided "org.embulk:embulk-core:0.
|
22
|
+
compile "org.embulk:embulk-core:0.9.+"
|
23
|
+
compile "org.embulk:embulk-standards:0.9.+"
|
24
|
+
compile "com.opencsv:opencsv:4.2"
|
25
|
+
provided "org.embulk:embulk-core:0.9.+"
|
26
26
|
testCompile "junit:junit:4.+"
|
27
|
-
testCompile "org.embulk:embulk-core:0.
|
28
|
-
testCompile "org.embulk:embulk-standards:0.
|
27
|
+
testCompile "org.embulk:embulk-core:0.9.+:tests"
|
28
|
+
testCompile "org.embulk:embulk-standards:0.9.+:tests"
|
29
29
|
}
|
30
30
|
|
31
31
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -53,20 +53,22 @@ task checkstyle(type: Checkstyle) {
|
|
53
53
|
}
|
54
54
|
|
55
55
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
56
|
-
jrubyArgs "-
|
57
|
-
script "
|
56
|
+
jrubyArgs "-S"
|
57
|
+
script "gem"
|
58
|
+
scriptArgs "build", "${project.name}.gemspec"
|
58
59
|
doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
|
59
60
|
}
|
60
61
|
|
61
62
|
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
62
|
-
jrubyArgs "-
|
63
|
-
script "
|
63
|
+
jrubyArgs "-S"
|
64
|
+
script "gem"
|
65
|
+
scriptArgs "push", "pkg/${project.name}-${project.version}.gem"
|
64
66
|
}
|
65
67
|
|
66
68
|
task "package"(dependsOn: ["gemspec", "classpath"]) {
|
67
69
|
doLast {
|
68
70
|
println "> Build succeeded."
|
69
|
-
println "> You can run embulk with
|
71
|
+
println "> You can run embulk with ${argumentToRunEmbulkJava} argument."
|
70
72
|
}
|
71
73
|
}
|
72
74
|
|
Binary file
|
@@ -1,5 +1,5 @@
|
|
1
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-4.2.1-bin.zip
|
2
1
|
distributionBase=GRADLE_USER_HOME
|
3
2
|
distributionPath=wrapper/dists
|
4
|
-
|
3
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-4.9-bin.zip
|
5
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
|
+
zipStorePath=wrapper/dists
|
@@ -141,7 +141,9 @@ public class CsvGuessableParserPlugin
|
|
141
141
|
|
142
142
|
String header = readHeader(task.getSchemaFile().get().getPath(), schemaLine, task.getCharset());
|
143
143
|
log.debug(header);
|
144
|
-
|
144
|
+
String delimiter = task.getDelimiter();
|
145
|
+
ArrayList<ColumnConfig> schema = newColumns(header, config, delimiter);
|
146
|
+
|
145
147
|
|
146
148
|
/* alias and set type */
|
147
149
|
if (task.getSchemaConfig().isPresent()) {
|
@@ -396,12 +398,12 @@ public class CsvGuessableParserPlugin
|
|
396
398
|
return line;
|
397
399
|
}
|
398
400
|
|
399
|
-
private ArrayList<ColumnConfig> newColumns(String header, ConfigSource config)
|
401
|
+
private ArrayList<ColumnConfig> newColumns(String header, ConfigSource config, String delimiter)
|
400
402
|
{
|
401
403
|
ArrayList<ColumnConfig> columns = new ArrayList<ColumnConfig>();
|
402
404
|
PluginTask task = config.loadConfig(PluginTask.class);
|
403
405
|
|
404
|
-
try (CSVReader reader = new CSVReader(new StringReader(header))) {
|
406
|
+
try (CSVReader reader = new CSVReader(new StringReader(header), delimiter.charAt(0))) {
|
405
407
|
String[] csv = reader.readNext();
|
406
408
|
for (String column : csv) {
|
407
409
|
columns.add(new ColumnConfig(column, Types.STRING, config));
|
@@ -120,6 +120,35 @@ public class TestCsvGuessableParserPlugin
|
|
120
120
|
}
|
121
121
|
}
|
122
122
|
|
123
|
+
@Test
|
124
|
+
public void guessableTsv()
|
125
|
+
throws Exception
|
126
|
+
{
|
127
|
+
String configYaml = "" +
|
128
|
+
"type: csv_guessable\n" +
|
129
|
+
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.tsv\n" + // TODO: FIX PATH
|
130
|
+
"schema_line: 1\n" +
|
131
|
+
"delimiter: \"\\t\"";
|
132
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
133
|
+
transaction(config, fileInput("data/test.tsv"));
|
134
|
+
List<Object[]> records = Pages.toObjects(schema, output.pages);
|
135
|
+
assertEquals(2, records.size());
|
136
|
+
|
137
|
+
Object[] record;
|
138
|
+
{
|
139
|
+
record = records.get(0);
|
140
|
+
assertEquals("100", record[0]);
|
141
|
+
assertEquals("test-title", record[1]);
|
142
|
+
assertEquals("ok", record[2]);
|
143
|
+
}
|
144
|
+
{
|
145
|
+
record = records.get(1);
|
146
|
+
assertEquals("191", record[0]);
|
147
|
+
assertEquals("title2", record[1]);
|
148
|
+
assertEquals("ng", record[2]);
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
123
152
|
@Test
|
124
153
|
public void specifyType()
|
125
154
|
throws Exception
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-csv_guessable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- koooge
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-08-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
|
-
- - ~>
|
16
|
+
- - "~>"
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - "~>"
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
|
-
- -
|
30
|
+
- - ">="
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Parses Guessable Csv files read by other file input plugins.
|
42
42
|
email:
|
43
43
|
- koooooge@gmail.com
|
@@ -45,11 +45,21 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
- .circleci/config.yml
|
49
|
-
- .gitignore
|
48
|
+
- ".circleci/config.yml"
|
49
|
+
- ".gitignore"
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- classpath/commons-beanutils-1.9.3.jar
|
54
|
+
- classpath/commons-collections-3.2.2.jar
|
55
|
+
- classpath/commons-collections4-4.1.jar
|
56
|
+
- classpath/commons-compress-1.10.jar
|
57
|
+
- classpath/commons-lang3-3.7.jar
|
58
|
+
- classpath/commons-logging-1.2.jar
|
59
|
+
- classpath/commons-text-1.3.jar
|
60
|
+
- classpath/embulk-parser-csv_guessable-0.2.0.jar
|
61
|
+
- classpath/embulk-standards-0.9.7.jar
|
62
|
+
- classpath/opencsv-4.2.jar
|
53
63
|
- config/checkstyle/checkstyle.xml
|
54
64
|
- config/checkstyle/default.xml
|
55
65
|
- gradle/wrapper/gradle-wrapper.jar
|
@@ -67,19 +77,12 @@ files:
|
|
67
77
|
- src/main/java/org/embulk/parser/csv_guessable/CsvTokenizer.java
|
68
78
|
- src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java
|
69
79
|
- src/test/resources/org/embulk/parser/csv_guessable/data/test.csv
|
80
|
+
- src/test/resources/org/embulk/parser/csv_guessable/data/test.tsv
|
70
81
|
- src/test/resources/org/embulk/parser/csv_guessable/data/test_alias.csv
|
71
82
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/guess_and_set_type.yml
|
72
83
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/guess_from_header.yml
|
73
84
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/original-csv.yml
|
74
85
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/replace_column_name.yml
|
75
|
-
- classpath/commons-lang3-3.5.jar
|
76
|
-
- classpath/embulk-parser-csv_guessable-0.1.5.jar
|
77
|
-
- classpath/embulk-standards-0.8.35.jar
|
78
|
-
- classpath/opencsv-3.9.jar
|
79
|
-
- classpath/commons-beanutils-1.9.3.jar
|
80
|
-
- classpath/commons-compress-1.10.jar
|
81
|
-
- classpath/commons-collections-3.2.2.jar
|
82
|
-
- classpath/commons-logging-1.2.jar
|
83
86
|
homepage: https://github.com/koooge/embulk-parser-csv_guessable
|
84
87
|
licenses:
|
85
88
|
- MIT
|
@@ -90,17 +93,17 @@ require_paths:
|
|
90
93
|
- lib
|
91
94
|
required_ruby_version: !ruby/object:Gem::Requirement
|
92
95
|
requirements:
|
93
|
-
- -
|
96
|
+
- - ">="
|
94
97
|
- !ruby/object:Gem::Version
|
95
98
|
version: '0'
|
96
99
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
100
|
requirements:
|
98
|
-
- -
|
101
|
+
- - ">="
|
99
102
|
- !ruby/object:Gem::Version
|
100
103
|
version: '0'
|
101
104
|
requirements: []
|
102
105
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.
|
106
|
+
rubygems_version: 2.6.8
|
104
107
|
signing_key:
|
105
108
|
specification_version: 4
|
106
109
|
summary: Guessable Csv parser plugin for Embulk
|