embulk-parser-csv_guessable 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +21 -0
- data/README.md +16 -2
- data/build.gradle +4 -4
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -3
- data/gradlew +13 -10
- data/gradlew.bat +84 -84
- data/samples/sample1/config.yml +9 -0
- data/samples/sample1/config_set_type.yml +13 -0
- data/samples/sample1/sample1.csv +10 -0
- data/samples/sample2/config_rename.yml +18 -0
- data/samples/sample2/sample2.csv +10 -0
- data/src/main/java/org/embulk/parser/csv_guessable/CsvGuessableParserPlugin.java +3 -1
- data/src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java +165 -24
- metadata +20 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07a5d30689afe074b0db47f61c04651e74b76024
|
4
|
+
data.tar.gz: b16fda4794ed77f2111d4e047210c8966f0fed2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca548cf755f02e6be06bef5664754271ac1f5041e4baa0dd015e546ec4e00396051a6fc8d532603d01a778069baf9398ae132b1770b70c6eb1790954cf6eafb3
|
7
|
+
data.tar.gz: 368cb17eb071324704ff2ac131511c4f96e67f3a8eb109fbe9e7dec549757dc0b10cbed1176246d1db18f43f48ad98a4b4213f8f482d70f12a2ee2f5e7283473
|
@@ -0,0 +1,21 @@
|
|
1
|
+
version: 2
|
2
|
+
jobs:
|
3
|
+
build:
|
4
|
+
docker:
|
5
|
+
- image: circleci/openjdk:8-jdk
|
6
|
+
working_directory: ~/repo
|
7
|
+
environment:
|
8
|
+
JVM_OPTS: -Xmx3200m
|
9
|
+
TERM: dumb
|
10
|
+
steps:
|
11
|
+
- checkout
|
12
|
+
- restore_cache:
|
13
|
+
keys:
|
14
|
+
- v1-dependencies-{{ checksum "build.gradle" }}
|
15
|
+
- v1-dependencies-
|
16
|
+
- run: gradle dependencies
|
17
|
+
- save_cache:
|
18
|
+
paths:
|
19
|
+
- ~/.m2
|
20
|
+
key: v1-dependencies-{{ checksum "build.gradle" }}
|
21
|
+
- run: gradle test
|
data/README.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
[](https://badge.fury.io/rb/embulk-parser-csv_guessable)
|
2
|
+
[](https://circleci.com/gh/koooge/embulk-parser-csv_guessable)
|
3
|
+
|
1
4
|
# Guessable csv parser plugin for Embulk
|
2
5
|
**embulk-parser-csv_guessable** (runtime)guesses and parses csv which has schema in header.
|
3
6
|
|
@@ -24,8 +27,6 @@ It behaves as original csv parser when **embulk-parser-csv_guessable** conifgs(`
|
|
24
27
|
- **date**: Set date part if the format doesn't include date part
|
25
28
|
- any other csv configs: see [www.embulk.org](http://www.embulk.org/docs/built-in.html#csv-parser-plugin)
|
26
29
|
|
27
|
-
The `columns`
|
28
|
-
|
29
30
|
## Example
|
30
31
|
test.csv (There is a schema at the first line.)
|
31
32
|
|
@@ -88,8 +89,21 @@ $ embulk gem install embulk-parser-csv_guessable
|
|
88
89
|
$ embulk guess -g csv_guessable config.yml -o guessed.yml
|
89
90
|
-->
|
90
91
|
|
92
|
+
## Sample
|
93
|
+
|
94
|
+
```
|
95
|
+
$ cd samples/sample2
|
96
|
+
$ embulk run -L ../../ config_rename.yml -l debug
|
97
|
+
```
|
98
|
+
|
91
99
|
## Build
|
92
100
|
|
93
101
|
```
|
94
102
|
$ ./gradlew gem # -t to watch change of files and rebuild continuously
|
95
103
|
```
|
104
|
+
|
105
|
+
## Test
|
106
|
+
|
107
|
+
```
|
108
|
+
$ ./gradlew test
|
109
|
+
```
|
data/build.gradle
CHANGED
@@ -13,16 +13,16 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.5"
|
17
17
|
|
18
18
|
sourceCompatibility = 1.7
|
19
19
|
targetCompatibility = 1.7
|
20
20
|
|
21
21
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.8
|
23
|
-
compile "org.embulk:embulk-standards:0.8
|
22
|
+
compile "org.embulk:embulk-core:0.8.+"
|
23
|
+
compile "org.embulk:embulk-standards:0.8.+"
|
24
24
|
compile "com.opencsv:opencsv:3.9"
|
25
|
-
provided "org.embulk:embulk-core:0.8
|
25
|
+
provided "org.embulk:embulk-core:0.8.+"
|
26
26
|
testCompile "junit:junit:4.+"
|
27
27
|
testCompile "org.embulk:embulk-core:0.8.+:tests"
|
28
28
|
testCompile "org.embulk:embulk-standards:0.8.+:tests"
|
Binary file
|
@@ -1,6 +1,5 @@
|
|
1
|
-
|
1
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-4.2.1-bin.zip
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
|
-
zipStoreBase=GRADLE_USER_HOME
|
5
4
|
zipStorePath=wrapper/dists
|
6
|
-
|
5
|
+
zipStoreBase=GRADLE_USER_HOME
|
data/gradlew
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env sh
|
2
2
|
|
3
3
|
##############################################################################
|
4
4
|
##
|
@@ -33,11 +33,11 @@ DEFAULT_JVM_OPTS=""
|
|
33
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
34
34
|
MAX_FD="maximum"
|
35
35
|
|
36
|
-
warn (
|
36
|
+
warn () {
|
37
37
|
echo "$*"
|
38
38
|
}
|
39
39
|
|
40
|
-
die (
|
40
|
+
die () {
|
41
41
|
echo
|
42
42
|
echo "$*"
|
43
43
|
echo
|
@@ -154,16 +154,19 @@ if $cygwin ; then
|
|
154
154
|
esac
|
155
155
|
fi
|
156
156
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
157
|
+
# Escape application args
|
158
|
+
save () {
|
159
|
+
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
160
|
+
echo " "
|
160
161
|
}
|
161
|
-
|
162
|
-
|
162
|
+
APP_ARGS=$(save "$@")
|
163
|
+
|
164
|
+
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
165
|
+
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
163
166
|
|
164
167
|
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
165
|
-
if [
|
168
|
+
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
|
166
169
|
cd "$(dirname "$0")"
|
167
170
|
fi
|
168
171
|
|
169
|
-
exec "$JAVACMD" "
|
172
|
+
exec "$JAVACMD" "$@"
|
data/gradlew.bat
CHANGED
@@ -1,84 +1,84 @@
|
|
1
|
-
@if "%DEBUG%" == "" @echo off
|
2
|
-
@rem ##########################################################################
|
3
|
-
@rem
|
4
|
-
@rem Gradle startup script for Windows
|
5
|
-
@rem
|
6
|
-
@rem ##########################################################################
|
7
|
-
|
8
|
-
@rem Set local scope for the variables with windows NT shell
|
9
|
-
if "%OS%"=="Windows_NT" setlocal
|
10
|
-
|
11
|
-
set DIRNAME=%~dp0
|
12
|
-
if "%DIRNAME%" == "" set DIRNAME=.
|
13
|
-
set APP_BASE_NAME=%~n0
|
14
|
-
set APP_HOME=%DIRNAME%
|
15
|
-
|
16
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
-
set DEFAULT_JVM_OPTS=
|
18
|
-
|
19
|
-
@rem Find java.exe
|
20
|
-
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
-
|
22
|
-
set JAVA_EXE=java.exe
|
23
|
-
%JAVA_EXE% -version >NUL 2>&1
|
24
|
-
if "%ERRORLEVEL%" == "0" goto init
|
25
|
-
|
26
|
-
echo.
|
27
|
-
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
-
echo.
|
29
|
-
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
-
echo location of your Java installation.
|
31
|
-
|
32
|
-
goto fail
|
33
|
-
|
34
|
-
:findJavaFromJavaHome
|
35
|
-
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
-
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
-
|
38
|
-
if exist "%JAVA_EXE%" goto init
|
39
|
-
|
40
|
-
echo.
|
41
|
-
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
-
echo.
|
43
|
-
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
-
echo location of your Java installation.
|
45
|
-
|
46
|
-
goto fail
|
47
|
-
|
48
|
-
:init
|
49
|
-
@rem Get command-line arguments, handling Windows variants
|
50
|
-
|
51
|
-
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
|
53
|
-
:win9xME_args
|
54
|
-
@rem Slurp the command line arguments.
|
55
|
-
set CMD_LINE_ARGS=
|
56
|
-
set _SKIP=2
|
57
|
-
|
58
|
-
:win9xME_args_slurp
|
59
|
-
if "x%~1" == "x" goto execute
|
60
|
-
|
61
|
-
set CMD_LINE_ARGS=%*
|
62
|
-
|
63
|
-
:execute
|
64
|
-
@rem Setup the command line
|
65
|
-
|
66
|
-
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
67
|
-
|
68
|
-
@rem Execute Gradle
|
69
|
-
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
70
|
-
|
71
|
-
:end
|
72
|
-
@rem End local scope for the variables with windows NT shell
|
73
|
-
if "%ERRORLEVEL%"=="0" goto mainEnd
|
74
|
-
|
75
|
-
:fail
|
76
|
-
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
77
|
-
rem the _cmd.exe /c_ return code!
|
78
|
-
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
79
|
-
exit /b 1
|
80
|
-
|
81
|
-
:mainEnd
|
82
|
-
if "%OS%"=="Windows_NT" endlocal
|
83
|
-
|
84
|
-
:omega
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
set DIRNAME=%~dp0
|
12
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
13
|
+
set APP_BASE_NAME=%~n0
|
14
|
+
set APP_HOME=%DIRNAME%
|
15
|
+
|
16
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
+
set DEFAULT_JVM_OPTS=
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windows variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
|
53
|
+
:win9xME_args
|
54
|
+
@rem Slurp the command line arguments.
|
55
|
+
set CMD_LINE_ARGS=
|
56
|
+
set _SKIP=2
|
57
|
+
|
58
|
+
:win9xME_args_slurp
|
59
|
+
if "x%~1" == "x" goto execute
|
60
|
+
|
61
|
+
set CMD_LINE_ARGS=%*
|
62
|
+
|
63
|
+
:execute
|
64
|
+
@rem Setup the command line
|
65
|
+
|
66
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
67
|
+
|
68
|
+
@rem Execute Gradle
|
69
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
70
|
+
|
71
|
+
:end
|
72
|
+
@rem End local scope for the variables with windows NT shell
|
73
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
74
|
+
|
75
|
+
:fail
|
76
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
77
|
+
rem the _cmd.exe /c_ return code!
|
78
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
79
|
+
exit /b 1
|
80
|
+
|
81
|
+
:mainEnd
|
82
|
+
if "%OS%"=="Windows_NT" endlocal
|
83
|
+
|
84
|
+
:omega
|
@@ -0,0 +1,13 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "./sample"
|
4
|
+
parser:
|
5
|
+
type: csv_guessable
|
6
|
+
schema_file: sample1.csv
|
7
|
+
charset: MS932
|
8
|
+
columns:
|
9
|
+
- {name: 'ken-code', type: long}
|
10
|
+
- {name: 'sityouson-code', type: long}
|
11
|
+
- {name: 'tiiki-code', type: long}
|
12
|
+
out:
|
13
|
+
type: stdout
|
@@ -0,0 +1,10 @@
|
|
1
|
+
ken-code,sityouson-code,tiiki-code,ken-name,sityouson-name1,sityouson-name2,sityouson-name3,yomigana
|
2
|
+
1,0,1000,�k�C��,,,,�ق������ǂ�
|
3
|
+
1,100,1100,�k�C��,�D�y�s,,,�����ۂ낵
|
4
|
+
1,101,1101,�k�C��,�D�y�s,,������,���イ������
|
5
|
+
1,102,1102,�k�C��,�D�y�s,,�k��,������
|
6
|
+
1,103,1103,�k�C��,�D�y�s,,����,�Ђ�����
|
7
|
+
1,104,1104,�k�C��,�D�y�s,,����,���낢����
|
8
|
+
1,105,1105,�k�C��,�D�y�s,,�L����,�Ƃ�Ђ炭
|
9
|
+
1,106,1106,�k�C��,�D�y�s,,���,�݂Ȃ݂�
|
10
|
+
1,107,1107,�k�C��,�D�y�s,,����,�ɂ���
|
@@ -0,0 +1,18 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "./sample"
|
4
|
+
parser:
|
5
|
+
type: csv_guessable
|
6
|
+
schema_file: sample2.csv
|
7
|
+
charset: MS932
|
8
|
+
columns:
|
9
|
+
- {value_name: '県コード', name: 'ken-code', type: long}
|
10
|
+
- {value_name: '市町村コード', name: 'sityouson-code', type: long}
|
11
|
+
- {value_name: '地域コード', name: 'tiiki-code', type: long}
|
12
|
+
- {value_name: '県名', name: 'ken-name', type: string}
|
13
|
+
- {value_name: '市町村名1', name: 'sityouson-name1', type: string}
|
14
|
+
- {value_name: '市町村名2', name: 'sityouson-name2', type: string}
|
15
|
+
- {value_name: '市町村名3', name: 'sityouson-name3', type: string}
|
16
|
+
- {value_name: '読み仮名', name: 'yomigana', type: string}
|
17
|
+
out:
|
18
|
+
type: stdout
|
@@ -0,0 +1,10 @@
|
|
1
|
+
���R�[�h,�s�����R�[�h,�n��R�[�h,����,�s������1,�s������2,�s������3,�ǂ݉���
|
2
|
+
1,0,1000,�k�C��,,,,�ق������ǂ�
|
3
|
+
1,100,1100,�k�C��,�D�y�s,,,�����ۂ낵
|
4
|
+
1,101,1101,�k�C��,�D�y�s,,������,���イ������
|
5
|
+
1,102,1102,�k�C��,�D�y�s,,�k��,������
|
6
|
+
1,103,1103,�k�C��,�D�y�s,,����,�Ђ�����
|
7
|
+
1,104,1104,�k�C��,�D�y�s,,����,���낢����
|
8
|
+
1,105,1105,�k�C��,�D�y�s,,�L����,�Ƃ�Ђ炭
|
9
|
+
1,106,1106,�k�C��,�D�y�s,,���,�݂Ȃ݂�
|
10
|
+
1,107,1107,�k�C��,�D�y�s,,����,�ɂ���
|
@@ -135,7 +135,9 @@ public class CsvGuessableParserPlugin
|
|
135
135
|
|
136
136
|
if (task.getSchemaFile().isPresent()) {
|
137
137
|
int schemaLine = task.getSchemaLine();
|
138
|
-
task.
|
138
|
+
if (schemaLine > task.getSkipHeaderLines()) {
|
139
|
+
task.setSkipHeaderLines(schemaLine);
|
140
|
+
}
|
139
141
|
|
140
142
|
String header = readHeader(task.getSchemaFile().get().getPath(), schemaLine, task.getCharset());
|
141
143
|
log.debug(header);
|
@@ -6,13 +6,16 @@ import org.embulk.config.ConfigException;
|
|
6
6
|
import org.embulk.config.ConfigLoader;
|
7
7
|
import org.embulk.config.ConfigSource;
|
8
8
|
import org.embulk.config.TaskSource;
|
9
|
+
import org.embulk.spi.Column;
|
9
10
|
import org.embulk.spi.Exec;
|
10
11
|
import org.embulk.spi.FileInput;
|
11
12
|
import org.embulk.spi.ParserPlugin;
|
12
13
|
import org.embulk.spi.Schema;
|
13
14
|
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
15
|
+
import org.embulk.spi.type.Types;
|
14
16
|
import org.embulk.spi.util.InputStreamFileInput;
|
15
|
-
import org.embulk.
|
17
|
+
import org.embulk.spi.util.Pages;
|
18
|
+
//import org.embulk.standards.TestCsvParserPlugin;
|
16
19
|
import org.junit.Before;
|
17
20
|
import org.junit.Rule;
|
18
21
|
import org.junit.Test;
|
@@ -22,13 +25,14 @@ import java.io.File;
|
|
22
25
|
import java.io.FileInputStream;
|
23
26
|
import java.io.IOException;
|
24
27
|
import java.io.InputStream;
|
28
|
+
import java.util.List;
|
25
29
|
|
26
30
|
import static org.embulk.parser.csv_guessable.CsvGuessableParserPlugin.PluginTask;
|
27
31
|
import static org.junit.Assert.assertEquals;
|
28
32
|
import static org.junit.Assert.assertNull;
|
29
33
|
|
30
34
|
public class TestCsvGuessableParserPlugin
|
31
|
-
extends TestCsvParserPlugin
|
35
|
+
// extends TestCsvParserPlugin
|
32
36
|
{
|
33
37
|
@Rule
|
34
38
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
@@ -38,6 +42,7 @@ public class TestCsvGuessableParserPlugin
|
|
38
42
|
|
39
43
|
private CsvGuessableParserPlugin plugin;
|
40
44
|
private MockPageOutput output;
|
45
|
+
private Schema schema;
|
41
46
|
|
42
47
|
@Before
|
43
48
|
public void createResouce()
|
@@ -46,12 +51,6 @@ public class TestCsvGuessableParserPlugin
|
|
46
51
|
output = new MockPageOutput();
|
47
52
|
}
|
48
53
|
|
49
|
-
private ConfigSource getConfigFromYaml(String yaml)
|
50
|
-
{
|
51
|
-
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
52
|
-
return loader.fromYamlString(yaml);
|
53
|
-
}
|
54
|
-
|
55
54
|
@Test(expected = ConfigException.class)
|
56
55
|
public void checkColumnsRequired()
|
57
56
|
{
|
@@ -102,27 +101,175 @@ public class TestCsvGuessableParserPlugin
|
|
102
101
|
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" + // TODO: FIX PATH
|
103
102
|
"schema_line: 1";
|
104
103
|
ConfigSource config = getConfigFromYaml(configYaml);
|
105
|
-
|
106
|
-
|
104
|
+
transaction(config, fileInput("data/test.csv"));
|
105
|
+
List<Object[]> records = Pages.toObjects(schema, output.pages);
|
106
|
+
assertEquals(2, records.size());
|
107
|
+
|
108
|
+
Object[] record;
|
109
|
+
{
|
110
|
+
record = records.get(0);
|
111
|
+
assertEquals("100", record[0]);
|
112
|
+
assertEquals("test-title", record[1]);
|
113
|
+
assertEquals("ok", record[2]);
|
114
|
+
}
|
115
|
+
{
|
116
|
+
record = records.get(1);
|
117
|
+
assertEquals("191", record[0]);
|
118
|
+
assertEquals("title2", record[1]);
|
119
|
+
assertEquals("ng", record[2]);
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
@Test
|
124
|
+
public void specifyType()
|
125
|
+
throws Exception
|
126
|
+
{
|
127
|
+
String configYaml = "" +
|
128
|
+
"type: csv_guessable\n" +
|
129
|
+
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" +
|
130
|
+
"columns:\n" +
|
131
|
+
" - {name: 'id', type: long}\n" +
|
132
|
+
" - {name: 'title', type: string}\n" +
|
133
|
+
" - {name: 'status', type: string}";
|
134
|
+
|
135
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
136
|
+
transaction(config, fileInput("data/test.csv"));
|
137
|
+
List<Object[]> records = Pages.toObjects(schema, output.pages);
|
107
138
|
|
108
|
-
|
139
|
+
Column column;
|
140
|
+
{
|
141
|
+
column = schema.getColumn(0);
|
142
|
+
assertEquals(Types.LONG, column.getType());
|
143
|
+
}
|
144
|
+
{
|
145
|
+
column = schema.getColumn(1);
|
146
|
+
assertEquals(Types.STRING, column.getType());
|
147
|
+
}
|
148
|
+
{
|
149
|
+
column = schema.getColumn(2);
|
150
|
+
assertEquals(Types.STRING, column.getType());
|
151
|
+
}
|
152
|
+
|
153
|
+
assertEquals(2, records.size());
|
154
|
+
|
155
|
+
Object[] record;
|
156
|
+
{
|
157
|
+
record = records.get(0);
|
158
|
+
assertEquals(100L, record[0]);
|
159
|
+
assertEquals("test-title", record[1]);
|
160
|
+
assertEquals("ok", record[2]);
|
161
|
+
}
|
162
|
+
{
|
163
|
+
record = records.get(1);
|
164
|
+
assertEquals(191L, record[0]);
|
165
|
+
assertEquals("title2", record[1]);
|
166
|
+
assertEquals("ng", record[2]);
|
167
|
+
}
|
109
168
|
}
|
110
169
|
|
111
170
|
@Test
|
112
|
-
public void
|
171
|
+
public void renameColumn()
|
172
|
+
throws Exception
|
173
|
+
{
|
174
|
+
String configYaml = "" +
|
175
|
+
"type: csv_guessable\n" +
|
176
|
+
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" +
|
177
|
+
"columns:\n" +
|
178
|
+
" - {value_name: 'id', name: 'number', type: long}\n" +
|
179
|
+
" - {value_name: 'title', name: 'description', type: string}\n" +
|
180
|
+
" - {value_name: 'status', name: 'ok?', type: string}";
|
181
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
182
|
+
transaction(config, fileInput("data/test.csv"));
|
183
|
+
|
184
|
+
Column column;
|
185
|
+
{
|
186
|
+
column = schema.getColumn(0);
|
187
|
+
assertEquals("number", column.getName());
|
188
|
+
assertEquals(Types.LONG, column.getType());
|
189
|
+
}
|
190
|
+
{
|
191
|
+
column = schema.getColumn(1);
|
192
|
+
assertEquals("description", column.getName());
|
193
|
+
assertEquals(Types.STRING, column.getType());
|
194
|
+
}
|
195
|
+
{
|
196
|
+
column = schema.getColumn(2);
|
197
|
+
assertEquals("ok?", column.getName());
|
198
|
+
assertEquals(Types.STRING, column.getType());
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
@Test
|
203
|
+
public void renameColumnAndSpecifyType()
|
113
204
|
throws Exception
|
114
205
|
{
|
115
206
|
String configYaml = "" +
|
116
207
|
"type: csv_guessable\n" +
|
117
208
|
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" + // TODO: FIX PATH
|
118
|
-
"schema_line: 1\n" +
|
119
209
|
"columns:\n" +
|
120
|
-
"- {value_name: '
|
121
|
-
"- {value_name: 'title', name: 'description', type: string}\n" +
|
122
|
-
"- {value_name: 'status', name: 'ok?', type: string}";
|
210
|
+
" - {value_name: 'id', name: 'number', type: long}\n" +
|
211
|
+
" - {value_name: 'title', name: 'description', type: string}\n" +
|
212
|
+
" - {value_name: 'status', name: 'ok?', type: string}";
|
123
213
|
ConfigSource config = getConfigFromYaml(configYaml);
|
214
|
+
transaction(config, fileInput("data/test.csv"));
|
124
215
|
|
125
|
-
|
216
|
+
Column column;
|
217
|
+
{
|
218
|
+
column = schema.getColumn(0);
|
219
|
+
assertEquals("number", column.getName());
|
220
|
+
assertEquals(Types.LONG, column.getType());
|
221
|
+
}
|
222
|
+
{
|
223
|
+
column = schema.getColumn(1);
|
224
|
+
assertEquals("description", column.getName());
|
225
|
+
assertEquals(Types.STRING, column.getType());
|
226
|
+
}
|
227
|
+
{
|
228
|
+
column = schema.getColumn(2);
|
229
|
+
assertEquals("ok?", column.getName());
|
230
|
+
assertEquals(Types.STRING, column.getType());
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
234
|
+
@Test
|
235
|
+
public void skipHeaderLinesIsLargerThanHeaderLine()
|
236
|
+
throws Exception
|
237
|
+
{
|
238
|
+
String configYaml = "" +
|
239
|
+
"type: csv_guessable\n" +
|
240
|
+
"schema_file: src/test/resources/org/embulk/parser/csv_guessable/data/test.csv\n" + // TODO: FIX PATH
|
241
|
+
"skip_header_lines: 2\n" +
|
242
|
+
"columnes:\n" +
|
243
|
+
" - {value_name: 'id', type: long}\n" +
|
244
|
+
" - {value_name: 'title', type: string}\n" +
|
245
|
+
" - {value_name: 'status', type: string}";
|
246
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
247
|
+
transaction(config, fileInput("data/test.csv"));
|
248
|
+
|
249
|
+
List<Object[]> records = Pages.toObjects(schema, output.pages);
|
250
|
+
assertEquals(1, records.size());
|
251
|
+
|
252
|
+
Object[] record;
|
253
|
+
{
|
254
|
+
record = records.get(0);
|
255
|
+
assertEquals("191", record[0]);
|
256
|
+
assertEquals("title2", record[1]);
|
257
|
+
assertEquals("ng", record[2]);
|
258
|
+
}
|
259
|
+
}
|
260
|
+
|
261
|
+
private ConfigSource getConfigFromYaml(String yaml)
|
262
|
+
{
|
263
|
+
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
264
|
+
return loader.fromYamlString(yaml);
|
265
|
+
}
|
266
|
+
|
267
|
+
private FileInput fileInput(String path)
|
268
|
+
throws Exception
|
269
|
+
{
|
270
|
+
File file = new File(this.getClass().getResource(path).getPath());
|
271
|
+
FileInputStream in = new FileInputStream(file);
|
272
|
+
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
|
126
273
|
}
|
127
274
|
|
128
275
|
private void transaction(ConfigSource config, final FileInput input)
|
@@ -132,18 +279,12 @@ public class TestCsvGuessableParserPlugin
|
|
132
279
|
@Override
|
133
280
|
public void run(TaskSource taskSource, Schema schema)
|
134
281
|
{
|
282
|
+
TestCsvGuessableParserPlugin.this.schema = schema;
|
135
283
|
plugin.run(taskSource, schema, input, output);
|
136
284
|
}
|
137
285
|
});
|
138
286
|
}
|
139
287
|
|
140
|
-
private FileInput fileInput(File file)
|
141
|
-
throws Exception
|
142
|
-
{
|
143
|
-
FileInputStream in = new FileInputStream(file);
|
144
|
-
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
|
145
|
-
}
|
146
|
-
|
147
288
|
private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
|
148
289
|
throws IOException
|
149
290
|
{
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-csv_guessable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- koooge
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - ~>
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.0'
|
19
|
-
|
20
|
-
prerelease: false
|
21
|
-
type: :development
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
23
21
|
requirements:
|
24
22
|
- - ~>
|
25
23
|
- !ruby/object:Gem::Version
|
26
24
|
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '10.0'
|
33
|
-
|
34
|
-
prerelease: false
|
35
|
-
type: :development
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
37
35
|
requirements:
|
38
36
|
- - '>='
|
39
37
|
- !ruby/object:Gem::Version
|
40
38
|
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
41
|
description: Parses Guessable Csv files read by other file input plugins.
|
42
42
|
email:
|
43
43
|
- koooooge@gmail.com
|
@@ -45,6 +45,7 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
+
- .circleci/config.yml
|
48
49
|
- .gitignore
|
49
50
|
- LICENSE.txt
|
50
51
|
- README.md
|
@@ -57,6 +58,11 @@ files:
|
|
57
58
|
- gradlew.bat
|
58
59
|
- lib/embulk/guess/csv_guessable.rb
|
59
60
|
- lib/embulk/parser/csv_guessable.rb
|
61
|
+
- samples/sample1/config.yml
|
62
|
+
- samples/sample1/config_set_type.yml
|
63
|
+
- samples/sample1/sample1.csv
|
64
|
+
- samples/sample2/config_rename.yml
|
65
|
+
- samples/sample2/sample2.csv
|
60
66
|
- src/main/java/org/embulk/parser/csv_guessable/CsvGuessableParserPlugin.java
|
61
67
|
- src/main/java/org/embulk/parser/csv_guessable/CsvTokenizer.java
|
62
68
|
- src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java
|
@@ -67,11 +73,11 @@ files:
|
|
67
73
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/original-csv.yml
|
68
74
|
- src/test/resources/org/embulk/parser/csv_guessable/yml/replace_column_name.yml
|
69
75
|
- classpath/commons-lang3-3.5.jar
|
70
|
-
- classpath/embulk-parser-csv_guessable-0.1.
|
76
|
+
- classpath/embulk-parser-csv_guessable-0.1.5.jar
|
77
|
+
- classpath/embulk-standards-0.8.35.jar
|
71
78
|
- classpath/opencsv-3.9.jar
|
72
79
|
- classpath/commons-beanutils-1.9.3.jar
|
73
80
|
- classpath/commons-compress-1.10.jar
|
74
|
-
- classpath/embulk-standards-0.8.22.jar
|
75
81
|
- classpath/commons-collections-3.2.2.jar
|
76
82
|
- classpath/commons-logging-1.2.jar
|
77
83
|
homepage: https://github.com/koooge/embulk-parser-csv_guessable
|