embulk-parser-csv_guessable 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +21 -0
- data/README.md +16 -2
- data/build.gradle +4 -4
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -3
- data/gradlew +13 -10
- data/gradlew.bat +84 -84
- data/samples/sample1/config.yml +9 -0
- data/samples/sample1/config_set_type.yml +13 -0
- data/samples/sample1/sample1.csv +10 -0
- data/samples/sample2/config_rename.yml +18 -0
- data/samples/sample2/sample2.csv +10 -0
- data/src/main/java/org/embulk/parser/csv_guessable/CsvGuessableParserPlugin.java +3 -1
- data/src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java +165 -24
- metadata +20 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07a5d30689afe074b0db47f61c04651e74b76024
|
4
|
+
data.tar.gz: b16fda4794ed77f2111d4e047210c8966f0fed2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca548cf755f02e6be06bef5664754271ac1f5041e4baa0dd015e546ec4e00396051a6fc8d532603d01a778069baf9398ae132b1770b70c6eb1790954cf6eafb3
|
7
|
+
data.tar.gz: 368cb17eb071324704ff2ac131511c4f96e67f3a8eb109fbe9e7dec549757dc0b10cbed1176246d1db18f43f48ad98a4b4213f8f482d70f12a2ee2f5e7283473
|
@@ -0,0 +1,21 @@
|
|
1
|
+
version: 2
|
2
|
+
jobs:
|
3
|
+
build:
|
4
|
+
docker:
|
5
|
+
- image: circleci/openjdk:8-jdk
|
6
|
+
working_directory: ~/repo
|
7
|
+
environment:
|
8
|
+
JVM_OPTS: -Xmx3200m
|
9
|
+
TERM: dumb
|
10
|
+
steps:
|
11
|
+
- checkout
|
12
|
+
- restore_cache:
|
13
|
+
keys:
|
14
|
+
- v1-dependencies-{{ checksum "build.gradle" }}
|
15
|
+
- v1-dependencies-
|
16
|
+
- run: gradle dependencies
|
17
|
+
- save_cache:
|
18
|
+
paths:
|
19
|
+
- ~/.m2
|
20
|
+
key: v1-dependencies-{{ checksum "build.gradle" }}
|
21
|
+
- run: gradle test
|
data/README.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/embulk-parser-csv_guessable.svg)](https://badge.fury.io/rb/embulk-parser-csv_guessable)
|
2
|
+
[![CircleCI](https://circleci.com/gh/koooge/embulk-parser-csv_guessable.svg?style=svg)](https://circleci.com/gh/koooge/embulk-parser-csv_guessable)
|
3
|
+
|
1
4
|
# Guessable csv parser plugin for Embulk
|
2
5
|
**embulk-parser-csv_guessable** (runtime)guesses and parses csv which has schema in header.
|
3
6
|
|
@@ -24,8 +27,6 @@ It behaves as original csv parser when **embulk-parser-csv_guessable** conifgs(`
|
|
24
27
|
- **date**: Set date part if the format doesn't include date part
|
25
28
|
- any other csv configs: see [www.embulk.org](http://www.embulk.org/docs/built-in.html#csv-parser-plugin)
|
26
29
|
|
27
|
-
The `columns`
|
28
|
-
|
29
30
|
## Example
|
30
31
|
test.csv (There is a schema at the first line.)
|
31
32
|
|
@@ -88,8 +89,21 @@ $ embulk gem install embulk-parser-csv_guessable
|
|
88
89
|
$ embulk guess -g csv_guessable config.yml -o guessed.yml
|
89
90
|
-->
|
90
91
|
|
92
|
+
## Sample
|
93
|
+
|
94
|
+
```
|
95
|
+
$ cd samples/sample2
|
96
|
+
$ embulk run -L ../../ config_rename.yml -l debug
|
97
|
+
```
|
98
|
+
|
91
99
|
## Build
|
92
100
|
|
93
101
|
```
|
94
102
|
$ ./gradlew gem # -t to watch change of files and rebuild continuously
|
95
103
|
```
|
104
|
+
|
105
|
+
## Test
|
106
|
+
|
107
|
+
```
|
108
|
+
$ ./gradlew test
|
109
|
+
```
|
data/build.gradle
CHANGED
@@ -13,16 +13,16 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.5"
|
17
17
|
|
18
18
|
sourceCompatibility = 1.7
|
19
19
|
targetCompatibility = 1.7
|
20
20
|
|
21
21
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.8
|
23
|
-
compile "org.embulk:embulk-standards:0.8
|
22
|
+
compile "org.embulk:embulk-core:0.8.+"
|
23
|
+
compile "org.embulk:embulk-standards:0.8.+"
|
24
24
|
compile "com.opencsv:opencsv:3.9"
|
25
|
-
provided "org.embulk:embulk-core:0.8
|
25
|
+
provided "org.embulk:embulk-core:0.8.+"
|
26
26
|
testCompile "junit:junit:4.+"
|
27
27
|
testCompile "org.embulk:embulk-core:0.8.+:tests"
|
28
28
|
testCompile "org.embulk:embulk-standards:0.8.+:tests"
|
Binary file
|
@@ -1,6 +1,5 @@
|
|
1
|
-
|
1
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-4.2.1-bin.zip
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
|
-
zipStoreBase=GRADLE_USER_HOME
|
5
4
|
zipStorePath=wrapper/dists
|
6
|
-
|
5
|
+
zipStoreBase=GRADLE_USER_HOME
|
data/gradlew
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env sh
|
2
2
|
|
3
3
|
##############################################################################
|
4
4
|
##
|
@@ -33,11 +33,11 @@ DEFAULT_JVM_OPTS=""
|
|
33
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
34
34
|
MAX_FD="maximum"
|
35
35
|
|
36
|
-
warn (
|
36
|
+
warn () {
|
37
37
|
echo "$*"
|
38
38
|
}
|
39
39
|
|
40
|
-
die (
|
40
|
+
die () {
|
41
41
|
echo
|
42
42
|
echo "$*"
|
43
43
|
echo
|
@@ -154,16 +154,19 @@ if $cygwin ; then
|
|
154
154
|
esac
|
155
155
|
fi
|
156
156
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
157
|
+
# Escape application args
|
158
|
+
save () {
|
159
|
+
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
160
|
+
echo " "
|
160
161
|
}
|
161
|
-
|
162
|
-
|
162
|
+
APP_ARGS=$(save "$@")
|
163
|
+
|
164
|
+
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
165
|
+
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
163
166
|
|
164
167
|
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
165
|
-
if [
|
168
|
+
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
|
166
169
|
cd "$(dirname "$0")"
|
167
170
|
fi
|
168
171
|
|
169
|
-
exec "$JAVACMD" "
|
172
|
+
exec "$JAVACMD" "$@"
|
data/gradlew.bat
CHANGED
@@ -1,84 +1,84 @@
|
|
1
|
-
@if "%DEBUG%" == "" @echo off
|
2
|
-
@rem ##########################################################################
|
3
|
-
@rem
|
4
|
-
@rem Gradle startup script for Windows
|
5
|
-
@rem
|
6
|
-
@rem ##########################################################################
|
7
|
-
|
8
|
-
@rem Set local scope for the variables with windows NT shell
|
9
|
-
if "%OS%"=="Windows_NT" setlocal
|
10
|
-
|
11
|
-
set DIRNAME=%~dp0
|
12
|
-
if "%DIRNAME%" == "" set DIRNAME=.
|
13
|
-
set APP_BASE_NAME=%~n0
|
14
|
-
set APP_HOME=%DIRNAME%
|
15
|
-
|
16
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
-
set DEFAULT_JVM_OPTS=
|
18
|
-
|
19
|
-
@rem Find java.exe
|
20
|
-
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
-
|
22
|
-
set JAVA_EXE=java.exe
|
23
|
-
%JAVA_EXE% -version >NUL 2>&1
|
24
|
-
if "%ERRORLEVEL%" == "0" goto init
|
25
|
-
|
26
|
-
echo.
|
27
|
-
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
-
echo.
|
29
|
-
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
-
echo location of your Java installation.
|
31
|
-
|
32
|
-
goto fail
|
33
|
-
|
34
|
-
:findJavaFromJavaHome
|
35
|
-
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
-
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
-
|
38
|
-
if exist "%JAVA_EXE%" goto init
|
39
|
-
|
40
|
-
echo.
|
41
|
-
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
-
echo.
|
43
|
-
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
-
echo location of your Java installation.
|
45
|
-
|
46
|
-
goto fail
|
47
|
-
|
48
|
-
:init
|
49
|
-
@rem Get command-line arguments, handling Windows variants
|
50
|
-
|
51
|
-
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
|
53
|
-
:win9xME_args
|
54
|
-
@rem Slurp the command line arguments.
|
55
|
-
set CMD_LINE_ARGS=
|
56
|
-
set _SKIP=2
|
57
|
-
|
58
|
-
:win9xME_args_slurp
|
59
|
-
if "x%~1" == "x" goto execute
|
60
|
-
|
61
|
-
set CMD_LINE_ARGS=%*
|
62
|
-
|
63
|
-
:execute
|
64
|
-
@rem Setup the command line
|
65
|
-
|
66
|
-
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
67
|
-
|
68
|
-
@rem Execute Gradle
|
69
|
-
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
70
|
-
|
71
|
-
:end
|
72
|
-
@rem End local scope for the variables with windows NT shell
|
73
|
-
if "%ERRORLEVEL%"=="0" goto mainEnd
|
74
|
-
|
75
|
-
:fail
|
76
|
-
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
77
|
-
rem the _cmd.exe /c_ return code!
|
78
|
-
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
79
|
-
exit /b 1
|
80
|
-
|
81
|
-
:mainEnd
|
82
|
-
if "%OS%"=="Windows_NT" endlocal
|
83
|
-
|
84
|
-
:omega
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
set DIRNAME=%~dp0
|
12
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
13
|
+
set APP_BASE_NAME=%~n0
|
14
|
+
set APP_HOME=%DIRNAME%
|
15
|
+
|
16
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
+
set DEFAULT_JVM_OPTS=
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windows variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
|
53
|
+
:win9xME_args
|
54
|
+
@rem Slurp the command line arguments.
|
55
|
+
set CMD_LINE_ARGS=
|
56
|
+
set _SKIP=2
|
57
|
+
|
58
|
+
:win9xME_args_slurp
|
59
|
+
if "x%~1" == "x" goto execute
|
60
|
+
|
61
|
+
set CMD_LINE_ARGS=%*
|
62
|
+
|
63
|
+
:execute
|
64
|
+
@rem Setup the command line
|
65
|
+
|
66
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
67
|
+
|
68
|
+
@rem Execute Gradle
|
69
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
70
|
+
|
71
|
+
:end
|
72
|
+
@rem End local scope for the variables with windows NT shell
|
73
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
74
|
+
|
75
|
+
:fail
|
76
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
77
|
+
rem the _cmd.exe /c_ return code!
|
78
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
79
|
+
exit /b 1
|
80
|
+
|
81
|
+
:mainEnd
|
82
|
+
if "%OS%"=="Windows_NT" endlocal
|
83
|
+
|
84
|
+
:omega
|
@@ -0,0 +1,13 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "./sample"
|
4
|
+
parser:
|
5
|
+
type: csv_guessable
|
6
|
+
schema_file: sample1.csv
|
7
|
+
charset: MS932
|
8
|
+
columns:
|
9
|
+
- {name: 'ken-code', type: long}
|
10
|
+
- {name: 'sityouson-code', type: long}
|
11
|
+
- {name: 'tiiki-code', type: long}
|
12
|
+
out:
|
13
|
+
type: stdout
|
@@ -0,0 +1,10 @@
|
|
1
|
+
ken-code,sityouson-code,tiiki-code,ken-name,sityouson-name1,sityouson-name2,sityouson-name3,yomigana
|
2
|
+
1,0,1000,�k�C��,,,,�ق������ǂ�
|
3
|
+
1,100,1100,�k�C��,�D�y�s,,,�����ۂ낵
|
4
|
+
1,101,1101,�k�C��,�D�y�s,,������,���イ������
|
5
|
+
1,102,1102,�k�C��,�D�y�s,,�k��,������
|
6
|
+
1,103,1103,�k�C��,�D�y�s,,����,�Ђ�����
|
7
|
+
1,104,1104,�k�C��,�D�y�s,,����,���낢����
|
8
|
+
1,105,1105,�k�C��,�D�y�s,,�L����,�Ƃ�Ђ炭
|
9
|
+
1,106,1106,�k�C��,�D�y�s,,���,�݂Ȃ݂�
|
10
|
+
1,107,1107,�k�C��,�D�y�s,,����,�ɂ���
|
@@ -0,0 +1,18 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "./sample"
|
4
|
+
parser:
|
5
|
+
type: csv_guessable
|
6
|
+
schema_file: sample2.csv
|
7
|
+
charset: MS932
|
8
|
+
columns:
|
9
|
+
- {value_name: '県コード', name: 'ken-code', type: long}
|
10
|
+
- {value_name: '市町村コード', name: 'sityouson-code', type: long}
|
11
|
+
- {value_name: '地域コード', name: 'tiiki-code', type: long}
|
12
|
+
- {value_name: '県名', name: 'ken-name', type: string}
|
13
|
+
- {value_name: '市町村名1', name: 'sityouson-name1', type: string}
|
14
|
+
- {value_name: '市町村名2', name: 'sityouson-name2', type: string}
|
15
|
+
- {value_name: '市町村名3', name: 'sityouson-name3', type: string}
|
16
|
+
- {value_name: '読み仮名', name: 'yomigana', type: string}
|
17
|
+
out:
|
18
|
+
type: stdout
|
@@ -0,0 +1,10 @@
|
|
1
|
+
���R�[�h,�s�����R�[�h,�n��R�[�h,����,�s������1,�s������2,�s������3,�ǂ݉���
|
2
|
+
1,0,1000,�k�C��,,,,�ق������ǂ�
|
3
|
+
1,100,1100,�k�C��,�D�y�s,,,�����ۂ낵
|
4
|
+
1,101,1101,�k�C��,�D�y�s,,������,���イ������
|
5
|
+
1,102,1102,�k�C��,�D�y�s,,�k��,������
|
6
|
+
1,103,1103,�k�C��,�D�y�s,,����,�Ђ�����
|
7
|
+
1,104,1104,�k�C��,�D�y�s,,����,���낢����
|
8
|
+
1,105,1105,�k�C��,�D�y�s,,�L����,�Ƃ�Ђ炭
|
9
|
+
1,106,1106,�k�C��,�D�y�s,,���,�݂Ȃ݂�
|
10
|
+
1,107,1107,�k�C��,�D�y�s,,����,�ɂ���
|
@@ -135,7 +135,9 @@ public class CsvGuessableParserPlugin
|
|
135
135
|
|
136
136
|
if (task.getSchemaFile().isPresent()) {
|
137
137
|
int schemaLine = task.getSchemaLine();
|
138
|
-
task.
|
138
|
+
if (schemaLine > task.getSkipHeaderLines()) {
|
139
|
+
task.setSkipHeaderLines(schemaLine);
|
140
|
+
}
|
139
141
|
|
140
142
|
String header = readHeader(task.getSchemaFile().get().getPath(), schemaLine, task.getCharset());
|
141
143
|
log.debug(header);
|
@@ -6,13 +6,16 @@ import org.embulk.config.ConfigException;
|
|
6
6
|
import org.embulk.config.ConfigLoader;
|
7
7
|
import org.embulk.config.ConfigSource;
|
8
8
|
import org.embulk.config.TaskSource;
|
9
|
+
import org.embulk.spi.Column;
|
9
10
|
import org.embulk.spi.Exec;
|
10
11
|
import org.embulk.spi.FileInput;
|
11
12
|
import org.embulk.spi.ParserPlugin;
|
12
13
|
import org.embulk.spi.Schema;
|
13
14
|
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
15
|
+
import org.embulk.spi.type.Types;
|
14
16
|
import org.embulk.spi.util.InputStreamFileInput;
|
15
|
-
import org.embulk.
|
17
|
+
import org.embulk.spi.util.Pages;
|
18
|
+
//import org.embulk.standards.TestCsvParserPlugin;
|
16
19
|
import org.junit.Before;
|
17
20
|
import org.junit.Rule;
|
18
21
|
import org.junit.Test;
|
@@ -22,13 +25,14 @@ import java.io.File;
|
|
22
25
|
import java.io.FileInputStream;
|
23
26
|
import java.io.IOException;
|
24
27
|
import java.io.InputStream;
|
28
|
+
import java.util.List;
|
25
29
|
|
26
30
|
import static org.embulk.parser.csv_guessable.CsvGuessableParserPlugin.PluginTask;
|
27
31
|
import static org.junit.Assert.assertEquals;
|
28
32
|
import static org.junit.Assert.assertNull;
|
29
33
|
|
30
34
|
public class TestCsvGuessableParserPlugin
|
31
|
-
extends TestCsvParserPlugin
|
35
|
+
// extends TestCsvParserPlugin
|
32
36
|
{
|
33
37
|
@Rule
|
34
38
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
@@ -38,6 +42,7 @@ public class TestCsvGuessableParserPlugin
|
|
38
42
|
|
39
43
|
private CsvGuessableParserPlugin plugin;
|
40
44
|
private MockPageOutput output;
|
45
|
+
private Schema schema;
|
41
46
|
|
42
47
|
@Before
|
43
48
|
public void createResouce()
|
@@ -46,12 +51,6 @@ public class TestCsvGuessableParserPlugin
|
|
46
51
|
output = new MockPageOutput();
|
47
52
|
}
|
48
53
|
|
49
|
-
private ConfigSource getConfigFromYaml(String yaml)
|
50
|
-
{
|
51
|
-
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
52
|
-
return loader.fromYamlString(yaml);
|
53
|
-
}
|
54
|
-
|
55
54
|
@Test(expected = ConfigException.class)
|
56
55
|
public void checkColumnsRequired()
|
57
56
|
{
|
@@ -102,27 +101,175 @@ public class TestCsvGuessableParserPlugin
|
|
102
101
|
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" + // TODO: FIX PATH
|
103
102
|
"schema_line: 1";
|
104
103
|
ConfigSource config = getConfigFromYaml(configYaml);
|
105
|
-
|
106
|
-
|
104
|
+
transaction(config, fileInput("data/test.csv"));
|
105
|
+
List<Object[]> records = Pages.toObjects(schema, output.pages);
|
106
|
+
assertEquals(2, records.size());
|
107
|
+
|
108
|
+
Object[] record;
|
109
|
+
{
|
110
|
+
record = records.get(0);
|
111
|
+
assertEquals("100", record[0]);
|
112
|
+
assertEquals("test-title", record[1]);
|
113
|
+
assertEquals("ok", record[2]);
|
114
|
+
}
|
115
|
+
{
|
116
|
+
record = records.get(1);
|
117
|
+
assertEquals("191", record[0]);
|
118
|
+
assertEquals("title2", record[1]);
|
119
|
+
assertEquals("ng", record[2]);
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
@Test
|
124
|
+
public void specifyType()
|
125
|
+
throws Exception
|
126
|
+
{
|
127
|
+
String configYaml = "" +
|
128
|
+
"type: csv_guessable\n" +
|
129
|
+
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" +
|
130
|
+
"columns:\n" +
|
131
|
+
" - {name: 'id', type: long}\n" +
|
132
|
+
" - {name: 'title', type: string}\n" +
|
133
|
+
" - {name: 'status', type: string}";
|
134
|
+
|
135
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
136
|
+
transaction(config, fileInput("data/test.csv"));
|
137
|
+
List<Object[]> records = Pages.toObjects(schema, output.pages);
|
107
138
|
|
108
|
-
|
139
|
+
Column column;
|
140
|
+
{
|
141
|
+
column = schema.getColumn(0);
|
142
|
+
assertEquals(Types.LONG, column.getType());
|
143
|
+
}
|
144
|
+
{
|
145
|
+
column = schema.getColumn(1);
|
146
|
+
assertEquals(Types.STRING, column.getType());
|
147
|
+
}
|
148
|
+
{
|
149
|
+
column = schema.getColumn(2);
|
150
|
+
assertEquals(Types.STRING, column.getType());
|
151
|
+
}
|
152
|
+
|
153
|
+
assertEquals(2, records.size());
|
154
|
+
|
155
|
+
Object[] record;
|
156
|
+
{
|
157
|
+
record = records.get(0);
|
158
|
+
assertEquals(100L, record[0]);
|
159
|
+
assertEquals("test-title", record[1]);
|
160
|
+
assertEquals("ok", record[2]);
|
161
|
+
}
|
162
|
+
{
|
163
|
+
record = records.get(1);
|
164
|
+
assertEquals(191L, record[0]);
|
165
|
+
assertEquals("title2", record[1]);
|
166
|
+
assertEquals("ng", record[2]);
|
167
|
+
}
|
109
168
|
}
|
110
169
|
|
111
170
|
@Test
|
112
|
-
public void
|
171
|
+
public void renameColumn()
|
172
|
+
throws Exception
|
173
|
+
{
|
174
|
+
String configYaml = "" +
|
175
|
+
"type: csv_guessable\n" +
|
176
|
+
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" +
|
177
|
+
"columns:\n" +
|
178
|
+
" - {value_name: 'id', name: 'number', type: long}\n" +
|
179
|
+
" - {value_name: 'title', name: 'description', type: string}\n" +
|
180
|
+
" - {value_name: 'status', name: 'ok?', type: string}";
|
181
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
182
|
+
transaction(config, fileInput("data/test.csv"));
|
183
|
+
|
184
|
+
Column column;
|
185
|
+
{
|
186
|
+
column = schema.getColumn(0);
|
187
|
+
assertEquals("number", column.getName());
|
188
|
+
assertEquals(Types.LONG, column.getType());
|
189
|
+
}
|
190
|
+
{
|
191
|
+
column = schema.getColumn(1);
|
192
|
+
assertEquals("description", column.getName());
|
193
|
+
assertEquals(Types.STRING, column.getType());
|
194
|
+
}
|
195
|
+
{
|
196
|
+
column = schema.getColumn(2);
|
197
|
+
assertEquals("ok?", column.getName());
|
198
|
+
assertEquals(Types.STRING, column.getType());
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
@Test
|
203
|
+
public void renameColumnAndSpecifyType()
|
113
204
|
throws Exception
|
114
205
|
{
|
115
206
|
String configYaml = "" +
|
116
207
|
"type: csv_guessable\n" +
|
117
208
|
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" + // TODO: FIX PATH
|
118
|
-
"schema_line: 1\n" +
|
119
209
|
"columns:\n" +
|
120
|
-
"- {value_name: '
|
121
|
-
"- {value_name: 'title', name: 'description', type: string}\n" +
|
122
|
-
"- {value_name: 'status', name: 'ok?', type: string}";
|
210
|
+
" - {value_name: 'id', name: 'number', type: long}\n" +
|
211
|
+
" - {value_name: 'title', name: 'description', type: string}\n" +
|
212
|
+
" - {value_name: 'status', name: 'ok?', type: string}";
|
123
213
|
ConfigSource config = getConfigFromYaml(configYaml);
|
214
|
+
transaction(config, fileInput("data/test.csv"));
|
124
215
|
|
125
|
-
|
216
|
+
Column column;
|
217
|
+
{
|
218
|
+
column = schema.getColumn(0);
|
219
|
+
assertEquals("number", column.getName());
|
220
|
+
assertEquals(Types.LONG, column.getType());
|
221
|
+
}
|
222
|
+
{
|
223
|
+
column = schema.getColumn(1);
|
224
|
+
assertEquals("description", column.getName());
|
225
|
+
assertEquals(Types.STRING, column.getType());
|
226
|
+
}
|
227
|
+
{
|
228
|
+
column = schema.getColumn(2);
|
229
|
+
assertEquals("ok?", column.getName());
|
230
|
+
assertEquals(Types.STRING, column.getType());
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
234
|
+
@Test
|
235
|
+
public void skipHeaderLinesIsLargerThanHeaderLine()
|
236
|
+
throws Exception
|
237
|
+
{
|
238
|
+
String configYaml = "" +
|
239
|
+
"type: csv_guessable\n" +
|
240
|
+
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" + // TODO: FIX PATH
|
241
|
+
"skip_header_lines: 2\n" +
|
242
|
+
"columnes:\n" +
|
243
|
+
" - {value_name: 'id', type: long}\n" +
|
244
|
+
" - {value_name: 'title', type: string}\n" +
|
245
|
+
" - {value_name: 'status', type: string}";
|
246
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
247
|
+
transaction(config, fileInput("data/test.csv"));
|
248
|
+
|
249
|
+
List<Object[]> records = Pages.toObjects(schema, output.pages);
|
250
|
+
assertEquals(1, records.size());
|
251
|
+
|
252
|
+
Object[] record;
|
253
|
+
{
|
254
|
+
record = records.get(0);
|
255
|
+
assertEquals("191", record[0]);
|
256
|
+
assertEquals("title2", record[1]);
|
257
|
+
assertEquals("ng", record[2]);
|
258
|
+
}
|
259
|
+
}
|
260
|
+
|
261
|
+
private ConfigSource getConfigFromYaml(String yaml)
|
262
|
+
{
|
263
|
+
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
264
|
+
return loader.fromYamlString(yaml);
|
265
|
+
}
|
266
|
+
|
267
|
+
private FileInput fileInput(String path)
|
268
|
+
throws Exception
|
269
|
+
{
|
270
|
+
File file = new File(this.getClass().getResource(path).getPath());
|
271
|
+
FileInputStream in = new FileInputStream(file);
|
272
|
+
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
|
126
273
|
}
|
127
274
|
|
128
275
|
private void transaction(ConfigSource config, final FileInput input)
|
@@ -132,18 +279,12 @@ public class TestCsvGuessableParserPlugin
|
|
132
279
|
@Override
|
133
280
|
public void run(TaskSource taskSource, Schema schema)
|
134
281
|
{
|
282
|
+
TestCsvGuessableParserPlugin.this.schema = schema;
|
135
283
|
plugin.run(taskSource, schema, input, output);
|
136
284
|
}
|
137
285
|
});
|
138
286
|
}
|
139
287
|
|
140
|
-
private FileInput fileInput(File file)
|
141
|
-
throws Exception
|
142
|
-
{
|
143
|
-
FileInputStream in = new FileInputStream(file);
|
144
|
-
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
|
145
|
-
}
|
146
|
-
|
147
288
|
private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
|
148
289
|
throws IOException
|
149
290
|
{
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-csv_guessable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- koooge
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - ~>
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.0'
|
19
|
-
|
20
|
-
prerelease: false
|
21
|
-
type: :development
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
23
21
|
requirements:
|
24
22
|
- - ~>
|
25
23
|
- !ruby/object:Gem::Version
|
26
24
|
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '10.0'
|
33
|
-
|
34
|
-
prerelease: false
|
35
|
-
type: :development
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
37
35
|
requirements:
|
38
36
|
- - '>='
|
39
37
|
- !ruby/object:Gem::Version
|
40
38
|
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
41
|
description: Parses Guessable Csv files read by other file input plugins.
|
42
42
|
email:
|
43
43
|
- koooooge@gmail.com
|
@@ -45,6 +45,7 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
+
- .circleci/config.yml
|
48
49
|
- .gitignore
|
49
50
|
- LICENSE.txt
|
50
51
|
- README.md
|
@@ -57,6 +58,11 @@ files:
|
|
57
58
|
- gradlew.bat
|
58
59
|
- lib/embulk/guess/csv_guessable.rb
|
59
60
|
- lib/embulk/parser/csv_guessable.rb
|
61
|
+
- samples/sample1/config.yml
|
62
|
+
- samples/sample1/config_set_type.yml
|
63
|
+
- samples/sample1/sample1.csv
|
64
|
+
- samples/sample2/config_rename.yml
|
65
|
+
- samples/sample2/sample2.csv
|
60
66
|
- src/main/java/org/embulk/parser/csv_guessable/CsvGuessableParserPlugin.java
|
61
67
|
- src/main/java/org/embulk/parser/csv_guessable/CsvTokenizer.java
|
62
68
|
- src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java
|
@@ -67,11 +73,11 @@ files:
|
|
67
73
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/original-csv.yml
|
68
74
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/replace_column_name.yml
|
69
75
|
- classpath/commons-lang3-3.5.jar
|
70
|
-
- classpath/embulk-parser-csv_guessable-0.1.
|
76
|
+
- classpath/embulk-parser-csv_guessable-0.1.5.jar
|
77
|
+
- classpath/embulk-standards-0.8.35.jar
|
71
78
|
- classpath/opencsv-3.9.jar
|
72
79
|
- classpath/commons-beanutils-1.9.3.jar
|
73
80
|
- classpath/commons-compress-1.10.jar
|
74
|
-
- classpath/embulk-standards-0.8.22.jar
|
75
81
|
- classpath/commons-collections-3.2.2.jar
|
76
82
|
- classpath/commons-logging-1.2.jar
|
77
83
|
homepage: https://github.com/koooge/embulk-parser-csv_guessable
|