embulk-parser-jsonl 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +7 -0
- data/README.md +3 -3
- data/build.gradle +79 -0
- data/embulk-parser-jsonl.gemspec +1 -1
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/guess/jsonl.rb +16 -67
- data/lib/embulk/parser/jsonl.rb +3 -67
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java +282 -0
- data/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java +228 -0
- metadata +28 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 973fe894b7704f01da0d8d0ba2b93eaa3804ea7d
|
4
|
+
data.tar.gz: fe6ca73d3100595bd64e95e0e08269490671314e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74ad34c4f29980e75f36ac88b9160fd784a975a83b906d6d5c30738edab617211c0653f22d0a9c01ad533bbcc92394b3ceccad20dcc8805b1627298695b5d6ac
|
7
|
+
data.tar.gz: dd611c8bff080d4ca1c61a8c071a5baeb2d982b89b2812457af67f9ecaacd5c3e17b42c230356087c26ed53ecb51a7a0728a1fde14c67a441974483a53fcd58b
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -10,7 +10,7 @@ TODO: Write short description here and embulk-parser-jsonl.gemspec file.
|
|
10
10
|
## Configuration
|
11
11
|
|
12
12
|
- **type**: specify this parser as jsonl
|
13
|
-
- **
|
13
|
+
- **columns**: specify column name and type (array, required)
|
14
14
|
|
15
15
|
## Example
|
16
16
|
|
@@ -19,7 +19,7 @@ in:
|
|
19
19
|
type: any file input plugin type
|
20
20
|
parser:
|
21
21
|
type: jsonl
|
22
|
-
|
22
|
+
columns:
|
23
23
|
- {name: first_name, type: string}
|
24
24
|
- {name: last_name, type: string}
|
25
25
|
- {name: age, type: long}
|
@@ -35,5 +35,5 @@ $ embulk guess -g jsonl config.yml -o guessed.yml
|
|
35
35
|
## Build
|
36
36
|
|
37
37
|
```
|
38
|
-
$
|
38
|
+
$ ./gradlew gem classpath
|
39
39
|
```
|
data/build.gradle
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
plugins {
|
2
|
+
id "com.jfrog.bintray" version "1.1"
|
3
|
+
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
|
+
id "java"
|
5
|
+
id "jacoco"
|
6
|
+
}
|
7
|
+
import com.github.jrubygradle.JRubyExec
|
8
|
+
repositories {
|
9
|
+
mavenCentral()
|
10
|
+
jcenter()
|
11
|
+
}
|
12
|
+
configurations {
|
13
|
+
provided
|
14
|
+
}
|
15
|
+
|
16
|
+
version = "0.1.0"
|
17
|
+
|
18
|
+
compileJava.options.encoding = 'UTF-8' // source encoding
|
19
|
+
sourceCompatibility = 1.7
|
20
|
+
targetCompatibility = 1.7
|
21
|
+
|
22
|
+
dependencies {
|
23
|
+
compile "org.embulk:embulk-core:0.8.2"
|
24
|
+
provided "org.embulk:embulk-core:0.8.2"
|
25
|
+
|
26
|
+
testCompile "junit:junit:4.+"
|
27
|
+
testCompile "org.embulk:embulk-core:0.8.2:tests"
|
28
|
+
testCompile "org.embulk:embulk-standards:0.8.2"
|
29
|
+
}
|
30
|
+
|
31
|
+
task classpath(type: Copy, dependsOn: ["jar"]) {
|
32
|
+
doFirst { file("classpath").deleteDir() }
|
33
|
+
from (configurations.runtime - configurations.provided + files(jar.archivePath))
|
34
|
+
into "classpath"
|
35
|
+
}
|
36
|
+
clean { delete "classpath" }
|
37
|
+
|
38
|
+
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
39
|
+
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
40
|
+
script "${project.name}.gemspec"
|
41
|
+
doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
|
42
|
+
}
|
43
|
+
|
44
|
+
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
45
|
+
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
|
46
|
+
script "pkg/${project.name}-${project.version}.gem"
|
47
|
+
}
|
48
|
+
|
49
|
+
task "package"(dependsOn: ["gemspec", "classpath"]) << {
|
50
|
+
println "> Build succeeded."
|
51
|
+
println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
|
52
|
+
}
|
53
|
+
|
54
|
+
task gemspec {
|
55
|
+
ext.gemspecFile = file("${project.name}.gemspec")
|
56
|
+
inputs.file "build.gradle"
|
57
|
+
outputs.file gemspecFile
|
58
|
+
doLast { gemspecFile.write($/
|
59
|
+
Gem::Specification.new do |spec|
|
60
|
+
spec.name = "${project.name}"
|
61
|
+
spec.version = "${project.version}"
|
62
|
+
spec.authors = ["Shunsuke Mikami"]
|
63
|
+
spec.summary = "Jsonl parser plugin for Embulk"
|
64
|
+
spec.description = "Parses Jsonl files read by other file input plugins."
|
65
|
+
spec.email = ["shun0102@gmail.com"]
|
66
|
+
spec.licenses = ["MIT"]
|
67
|
+
spec.homepage = "https://github.com/shun0102/embulk-parser-jsonl"
|
68
|
+
|
69
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
70
|
+
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
71
|
+
spec.require_paths = ["lib"]
|
72
|
+
|
73
|
+
spec.add_development_dependency 'bundler', ['~> 1.0']
|
74
|
+
spec.add_development_dependency 'rake', ['~> 10.0']
|
75
|
+
end
|
76
|
+
/$)
|
77
|
+
}
|
78
|
+
}
|
79
|
+
clean { delete "${project.name}.gemspec" }
|
data/embulk-parser-jsonl.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-parser-jsonl"
|
4
|
-
spec.version = "0.0
|
4
|
+
spec.version = "0.1.0"
|
5
5
|
spec.authors = ["Shunsuke Mikami"]
|
6
6
|
spec.summary = "Jsonl parser plugin for Embulk"
|
7
7
|
spec.description = "Parses Jsonl files read by other file input plugins."
|
Binary file
|
data/gradlew
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
##############################################################################
|
4
|
+
##
|
5
|
+
## Gradle start up script for UN*X
|
6
|
+
##
|
7
|
+
##############################################################################
|
8
|
+
|
9
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
10
|
+
DEFAULT_JVM_OPTS=""
|
11
|
+
|
12
|
+
APP_NAME="Gradle"
|
13
|
+
APP_BASE_NAME=`basename "$0"`
|
14
|
+
|
15
|
+
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
|
+
MAX_FD="maximum"
|
17
|
+
|
18
|
+
warn ( ) {
|
19
|
+
echo "$*"
|
20
|
+
}
|
21
|
+
|
22
|
+
die ( ) {
|
23
|
+
echo
|
24
|
+
echo "$*"
|
25
|
+
echo
|
26
|
+
exit 1
|
27
|
+
}
|
28
|
+
|
29
|
+
# OS specific support (must be 'true' or 'false').
|
30
|
+
cygwin=false
|
31
|
+
msys=false
|
32
|
+
darwin=false
|
33
|
+
case "`uname`" in
|
34
|
+
CYGWIN* )
|
35
|
+
cygwin=true
|
36
|
+
;;
|
37
|
+
Darwin* )
|
38
|
+
darwin=true
|
39
|
+
;;
|
40
|
+
MINGW* )
|
41
|
+
msys=true
|
42
|
+
;;
|
43
|
+
esac
|
44
|
+
|
45
|
+
# For Cygwin, ensure paths are in UNIX format before anything is touched.
|
46
|
+
if $cygwin ; then
|
47
|
+
[ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
48
|
+
fi
|
49
|
+
|
50
|
+
# Attempt to set APP_HOME
|
51
|
+
# Resolve links: $0 may be a link
|
52
|
+
PRG="$0"
|
53
|
+
# Need this for relative symlinks.
|
54
|
+
while [ -h "$PRG" ] ; do
|
55
|
+
ls=`ls -ld "$PRG"`
|
56
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
57
|
+
if expr "$link" : '/.*' > /dev/null; then
|
58
|
+
PRG="$link"
|
59
|
+
else
|
60
|
+
PRG=`dirname "$PRG"`"/$link"
|
61
|
+
fi
|
62
|
+
done
|
63
|
+
SAVED="`pwd`"
|
64
|
+
cd "`dirname \"$PRG\"`/" >&-
|
65
|
+
APP_HOME="`pwd -P`"
|
66
|
+
cd "$SAVED" >&-
|
67
|
+
|
68
|
+
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
69
|
+
|
70
|
+
# Determine the Java command to use to start the JVM.
|
71
|
+
if [ -n "$JAVA_HOME" ] ; then
|
72
|
+
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
73
|
+
# IBM's JDK on AIX uses strange locations for the executables
|
74
|
+
JAVACMD="$JAVA_HOME/jre/sh/java"
|
75
|
+
else
|
76
|
+
JAVACMD="$JAVA_HOME/bin/java"
|
77
|
+
fi
|
78
|
+
if [ ! -x "$JAVACMD" ] ; then
|
79
|
+
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
80
|
+
|
81
|
+
Please set the JAVA_HOME variable in your environment to match the
|
82
|
+
location of your Java installation."
|
83
|
+
fi
|
84
|
+
else
|
85
|
+
JAVACMD="java"
|
86
|
+
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
87
|
+
|
88
|
+
Please set the JAVA_HOME variable in your environment to match the
|
89
|
+
location of your Java installation."
|
90
|
+
fi
|
91
|
+
|
92
|
+
# Increase the maximum file descriptors if we can.
|
93
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
94
|
+
MAX_FD_LIMIT=`ulimit -H -n`
|
95
|
+
if [ $? -eq 0 ] ; then
|
96
|
+
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
97
|
+
MAX_FD="$MAX_FD_LIMIT"
|
98
|
+
fi
|
99
|
+
ulimit -n $MAX_FD
|
100
|
+
if [ $? -ne 0 ] ; then
|
101
|
+
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
102
|
+
fi
|
103
|
+
else
|
104
|
+
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
105
|
+
fi
|
106
|
+
fi
|
107
|
+
|
108
|
+
# For Darwin, add options to specify how the application appears in the dock
|
109
|
+
if $darwin; then
|
110
|
+
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
111
|
+
fi
|
112
|
+
|
113
|
+
# For Cygwin, switch paths to Windows format before running java
|
114
|
+
if $cygwin ; then
|
115
|
+
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
116
|
+
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
117
|
+
|
118
|
+
# We build the pattern for arguments to be converted via cygpath
|
119
|
+
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
120
|
+
SEP=""
|
121
|
+
for dir in $ROOTDIRSRAW ; do
|
122
|
+
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
123
|
+
SEP="|"
|
124
|
+
done
|
125
|
+
OURCYGPATTERN="(^($ROOTDIRS))"
|
126
|
+
# Add a user-defined pattern to the cygpath arguments
|
127
|
+
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
128
|
+
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
129
|
+
fi
|
130
|
+
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
131
|
+
i=0
|
132
|
+
for arg in "$@" ; do
|
133
|
+
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
134
|
+
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
135
|
+
|
136
|
+
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
137
|
+
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
138
|
+
else
|
139
|
+
eval `echo args$i`="\"$arg\""
|
140
|
+
fi
|
141
|
+
i=$((i+1))
|
142
|
+
done
|
143
|
+
case $i in
|
144
|
+
(0) set -- ;;
|
145
|
+
(1) set -- "$args0" ;;
|
146
|
+
(2) set -- "$args0" "$args1" ;;
|
147
|
+
(3) set -- "$args0" "$args1" "$args2" ;;
|
148
|
+
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
149
|
+
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
150
|
+
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
151
|
+
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
152
|
+
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
153
|
+
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
154
|
+
esac
|
155
|
+
fi
|
156
|
+
|
157
|
+
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
|
158
|
+
function splitJvmOpts() {
|
159
|
+
JVM_OPTS=("$@")
|
160
|
+
}
|
161
|
+
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
162
|
+
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
163
|
+
|
164
|
+
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
data/gradlew.bat
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
+
set DEFAULT_JVM_OPTS=
|
13
|
+
|
14
|
+
set DIRNAME=%~dp0
|
15
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
+
set APP_BASE_NAME=%~n0
|
17
|
+
set APP_HOME=%DIRNAME%
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windowz variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
+
|
54
|
+
:win9xME_args
|
55
|
+
@rem Slurp the command line arguments.
|
56
|
+
set CMD_LINE_ARGS=
|
57
|
+
set _SKIP=2
|
58
|
+
|
59
|
+
:win9xME_args_slurp
|
60
|
+
if "x%~1" == "x" goto execute
|
61
|
+
|
62
|
+
set CMD_LINE_ARGS=%*
|
63
|
+
goto execute
|
64
|
+
|
65
|
+
:4NT_args
|
66
|
+
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
+
set CMD_LINE_ARGS=%$
|
68
|
+
|
69
|
+
:execute
|
70
|
+
@rem Setup the command line
|
71
|
+
|
72
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
+
|
74
|
+
@rem Execute Gradle
|
75
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
+
|
77
|
+
:end
|
78
|
+
@rem End local scope for the variables with windows NT shell
|
79
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
+
|
81
|
+
:fail
|
82
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
+
rem the _cmd.exe /c_ return code!
|
84
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
+
exit /b 1
|
86
|
+
|
87
|
+
:mainEnd
|
88
|
+
if "%OS%"=="Windows_NT" endlocal
|
89
|
+
|
90
|
+
:omega
|
data/lib/embulk/guess/jsonl.rb
CHANGED
@@ -1,84 +1,33 @@
|
|
1
1
|
require 'json'
|
2
|
+
require "embulk/parser/jsonl.rb"
|
2
3
|
|
3
4
|
module Embulk
|
4
5
|
module Guess
|
6
|
+
# $ embulk guess -g "jsonl" partial-config.yml
|
5
7
|
|
6
|
-
# TODO
|
7
|
-
# $ embulk guess -g "jsonl" partial-config.yml
|
8
|
-
#
|
9
|
-
# Depending on the file format the plugin uses, you can use choose
|
10
|
-
# one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
|
11
|
-
# or line guess (LineGuessPlugin).
|
12
|
-
|
13
|
-
require "embulk/parser/jsonl.rb"
|
14
|
-
|
15
|
-
#class JsonlParserGuessPlugin < GuessPlugin
|
16
|
-
# Plugin.register_guess("jsonl", self)
|
17
|
-
#
|
18
|
-
# def guess(config, sample_buffer)
|
19
|
-
# if sample_buffer[0,2] == GZIP_HEADER
|
20
|
-
# guessed = {}
|
21
|
-
# guessed["type"] = "jsonl"
|
22
|
-
# guessed["property1"] = "guessed-value"
|
23
|
-
# return {"parser" => guessed}
|
24
|
-
# else
|
25
|
-
# return {}
|
26
|
-
# end
|
27
|
-
# end
|
28
|
-
#end
|
29
|
-
|
30
|
-
#class JsonlParserGuessPlugin < TextGuessPlugin
|
31
|
-
# Plugin.register_guess("jsonl", self)
|
32
|
-
#
|
33
|
-
# def guess_text(config, sample_text)
|
34
|
-
# js = JSON.parse(sample_text) rescue nil
|
35
|
-
# if js && js["mykeyword"] == "keyword"
|
36
|
-
# guessed = {}
|
37
|
-
# guessed["type"] = "jsonl"
|
38
|
-
# guessed["property1"] = "guessed-value"
|
39
|
-
# return {"parser" => guessed}
|
40
|
-
# else
|
41
|
-
# return {}
|
42
|
-
# end
|
43
|
-
# end
|
44
|
-
#end
|
45
|
-
|
46
|
-
class JsonlParserGuessPlugin < LineGuessPlugin
|
8
|
+
class Jsonl < LineGuessPlugin # TODO should use GuessPlugin instead of LineGuessPlugin
|
47
9
|
Plugin.register_guess("jsonl", self)
|
48
10
|
|
49
11
|
def guess_lines(config, sample_lines)
|
12
|
+
#return {} unless config.fetch("parser", {}).fetch("type", "jsonl") == "jsonl"
|
13
|
+
|
14
|
+
rows = []
|
15
|
+
|
50
16
|
columns = {}
|
51
17
|
sample_lines.each do |line|
|
52
|
-
|
53
|
-
hash.each do |k, v|
|
54
|
-
columns[k] = get_embulk_type(v)
|
55
|
-
end
|
18
|
+
rows << JSON.parse(line)
|
56
19
|
end
|
57
|
-
schema = []
|
58
|
-
columns.each do |k,v|
|
59
|
-
schema << {'name' => k, 'type' => v}
|
60
|
-
end
|
61
|
-
guessed = {}
|
62
|
-
guessed["type"] = "jsonl"
|
63
|
-
guessed["schema"] = schema
|
64
|
-
return {"parser" => guessed}
|
65
|
-
end
|
66
20
|
|
67
|
-
|
21
|
+
return {} if rows.size <= 3
|
68
22
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
when FalseClass
|
74
|
-
return "boolean"
|
75
|
-
when Integer
|
76
|
-
return "long"
|
77
|
-
when Float
|
78
|
-
return "double"
|
79
|
-
else
|
80
|
-
return "string"
|
23
|
+
columns = Embulk::Guess::SchemaGuess.from_hash_records(rows).map do |c|
|
24
|
+
column = {name: c.name, type: c.type}
|
25
|
+
column[:format] = c.format if c.format
|
26
|
+
column
|
81
27
|
end
|
28
|
+
parser_guessed = {"type" => "jsonl"}
|
29
|
+
parser_guessed["columns"] = columns
|
30
|
+
return {"parser" => parser_guessed}
|
82
31
|
end
|
83
32
|
end
|
84
33
|
end
|
data/lib/embulk/parser/jsonl.rb
CHANGED
@@ -1,67 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
module Parser
|
5
|
-
|
6
|
-
class JsonlParserPlugin < ParserPlugin
|
7
|
-
Plugin.register_parser("jsonl", self)
|
8
|
-
|
9
|
-
def self.transaction(config, &control)
|
10
|
-
parser_task = config.load_config(Java::LineDecoder::DecoderTask)
|
11
|
-
task = {
|
12
|
-
"decoder_task" => DataSource.from_java(parser_task.dump),
|
13
|
-
"schema" => config.param("schema", :array)
|
14
|
-
}
|
15
|
-
columns = task["schema"].each_with_index.map do |c, i|
|
16
|
-
Column.new(i, c["name"], c["type"].to_sym)
|
17
|
-
end
|
18
|
-
yield(task, columns)
|
19
|
-
end
|
20
|
-
|
21
|
-
def init
|
22
|
-
@decoder_task = task.param("decoder_task", :hash).load_task(Java::LineDecoder::DecoderTask)
|
23
|
-
end
|
24
|
-
|
25
|
-
def run(file_input)
|
26
|
-
decoder = Java::LineDecoder.new(file_input.instance_eval { @java_file_input }, @decoder_task)
|
27
|
-
schema = @task["schema"]
|
28
|
-
|
29
|
-
while decoder.nextFile
|
30
|
-
while line = decoder.poll
|
31
|
-
begin
|
32
|
-
hash = JSON.parse(line)
|
33
|
-
@page_builder.add(make_record(schema, hash))
|
34
|
-
rescue
|
35
|
-
# TODO: logging
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
page_builder.finish
|
40
|
-
end
|
41
|
-
|
42
|
-
private
|
43
|
-
|
44
|
-
def make_record(schema, e)
|
45
|
-
schema.map do |c|
|
46
|
-
val = e[c["name"]]
|
47
|
-
v = val.nil? ? "" : val
|
48
|
-
case c["type"]
|
49
|
-
when "string"
|
50
|
-
v
|
51
|
-
when "long"
|
52
|
-
v.to_i
|
53
|
-
when "double"
|
54
|
-
v.to_f
|
55
|
-
when "boolean"
|
56
|
-
["yes", "true", "1"].include?(v.downcase)
|
57
|
-
when "timestamp"
|
58
|
-
v.empty? ? nil : Time.strptime(v, c["time_format"])
|
59
|
-
else
|
60
|
-
raise "Unsupported type #{c['type']}"
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
end
|
1
|
+
Embulk::JavaPlugin.register_parser(
|
2
|
+
"jsonl", "org.embulk.parser.jsonl.JsonlParserPlugin",
|
3
|
+
File.expand_path('../../../../classpath', __FILE__))
|
data/settings.gradle
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rootProject.name = 'embulk-parser-jsonl'
|
@@ -0,0 +1,282 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.collect.ImmutableMap;
|
5
|
+
import org.embulk.config.Config;
|
6
|
+
import org.embulk.config.ConfigDefault;
|
7
|
+
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.config.ConfigSource;
|
9
|
+
import org.embulk.config.Task;
|
10
|
+
import org.embulk.config.TaskSource;
|
11
|
+
import org.embulk.spi.Column;
|
12
|
+
import org.embulk.spi.ColumnVisitor;
|
13
|
+
import org.embulk.spi.DataException;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.FileInput;
|
16
|
+
import org.embulk.spi.PageBuilder;
|
17
|
+
import org.embulk.spi.PageOutput;
|
18
|
+
import org.embulk.spi.ParserPlugin;
|
19
|
+
import org.embulk.spi.Schema;
|
20
|
+
import org.embulk.spi.SchemaConfig;
|
21
|
+
import org.embulk.spi.json.JsonParser;
|
22
|
+
import org.embulk.spi.time.TimestampParser;
|
23
|
+
import org.embulk.spi.util.LineDecoder;
|
24
|
+
import org.embulk.spi.util.Timestamps;
|
25
|
+
import org.msgpack.core.MessageTypeException;
|
26
|
+
import org.msgpack.value.BooleanValue;
|
27
|
+
import org.msgpack.value.FloatValue;
|
28
|
+
import org.msgpack.value.IntegerValue;
|
29
|
+
import org.msgpack.value.Value;
|
30
|
+
import org.slf4j.Logger;
|
31
|
+
|
32
|
+
import java.util.Map;
|
33
|
+
|
34
|
+
import static org.msgpack.value.ValueFactory.newString;
|
35
|
+
|
36
|
+
public class JsonlParserPlugin
|
37
|
+
implements ParserPlugin
|
38
|
+
{
|
39
|
+
public interface PluginTask
|
40
|
+
extends Task, LineDecoder.DecoderTask, TimestampParser.Task
|
41
|
+
{
|
42
|
+
@Config("columns")
|
43
|
+
@ConfigDefault("null")
|
44
|
+
Optional<SchemaConfig> getSchemaConfig();
|
45
|
+
|
46
|
+
@Config("schema")
|
47
|
+
@ConfigDefault("null")
|
48
|
+
@Deprecated
|
49
|
+
Optional<SchemaConfig> getOldSchemaConfig();
|
50
|
+
|
51
|
+
@Config("stop_on_invalid_record")
|
52
|
+
@ConfigDefault("false")
|
53
|
+
boolean getStopOnInvalidRecord();
|
54
|
+
}
|
55
|
+
|
56
|
+
private final Logger log;
|
57
|
+
|
58
|
+
private String line = null;
|
59
|
+
private long lineNumber = 0;
|
60
|
+
private Map<String, Value> columnNameValues;
|
61
|
+
|
62
|
+
public JsonlParserPlugin()
|
63
|
+
{
|
64
|
+
this.log = Exec.getLogger(JsonlParserPlugin.class);
|
65
|
+
}
|
66
|
+
|
67
|
+
@Override
|
68
|
+
public void transaction(ConfigSource configSource, Control control)
|
69
|
+
{
|
70
|
+
PluginTask task = configSource.loadConfig(PluginTask.class);
|
71
|
+
control.run(task.dump(), getSchemaConfig(task).toSchema());
|
72
|
+
}
|
73
|
+
|
74
|
+
// this method is to keep the backward compatibility of 'schema' option.
|
75
|
+
private SchemaConfig getSchemaConfig(PluginTask task)
|
76
|
+
{
|
77
|
+
if (task.getOldSchemaConfig().isPresent()) {
|
78
|
+
log.warn("Please use 'columns' option instead of 'schema' because the 'schema' option is deprecated. The next version will stop 'schema' option support.");
|
79
|
+
}
|
80
|
+
|
81
|
+
if (task.getSchemaConfig().isPresent()) {
|
82
|
+
return task.getSchemaConfig().get();
|
83
|
+
}
|
84
|
+
else if (task.getOldSchemaConfig().isPresent()) {
|
85
|
+
return task.getOldSchemaConfig().get();
|
86
|
+
}
|
87
|
+
else {
|
88
|
+
throw new ConfigException("Attribute 'columns' is required but not set");
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
@Override
|
93
|
+
public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutput output)
|
94
|
+
{
|
95
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
96
|
+
|
97
|
+
setColumnNameValues(schema);
|
98
|
+
|
99
|
+
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, getSchemaConfig(task));
|
100
|
+
final LineDecoder decoder = newLineDecoder(input, task);
|
101
|
+
final JsonParser jsonParser = newJsonParser();
|
102
|
+
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
103
|
+
|
104
|
+
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
105
|
+
while (decoder.nextFile()) { // TODO this implementation should be improved with new JsonParser API on Embulk v0.8.3
|
106
|
+
lineNumber = 0;
|
107
|
+
|
108
|
+
while ((line = decoder.poll()) != null) {
|
109
|
+
lineNumber++;
|
110
|
+
|
111
|
+
try {
|
112
|
+
Value value = jsonParser.parse(line);
|
113
|
+
|
114
|
+
if (!value.isMapValue()) {
|
115
|
+
throw new JsonRecordValidateException("Json string is not representing map value.");
|
116
|
+
}
|
117
|
+
|
118
|
+
final Map<Value, Value> record = value.asMapValue().map();
|
119
|
+
|
120
|
+
schema.visitColumns(new ColumnVisitor() {
|
121
|
+
@Override
|
122
|
+
public void booleanColumn(Column column)
|
123
|
+
{
|
124
|
+
Value v = record.get(getColumnNameValue(column));
|
125
|
+
if (isNil(v)) {
|
126
|
+
pageBuilder.setNull(column);
|
127
|
+
}
|
128
|
+
else {
|
129
|
+
try {
|
130
|
+
pageBuilder.setBoolean(column, ((BooleanValue) v).getBoolean());
|
131
|
+
}
|
132
|
+
catch (MessageTypeException e) {
|
133
|
+
throw new JsonRecordValidateException(e);
|
134
|
+
}
|
135
|
+
}
|
136
|
+
}
|
137
|
+
|
138
|
+
@Override
|
139
|
+
public void longColumn(Column column)
|
140
|
+
{
|
141
|
+
Value v = record.get(getColumnNameValue(column));
|
142
|
+
if (isNil(v)) {
|
143
|
+
pageBuilder.setNull(column);
|
144
|
+
}
|
145
|
+
else {
|
146
|
+
try {
|
147
|
+
pageBuilder.setLong(column, ((IntegerValue) v).asLong());
|
148
|
+
}
|
149
|
+
catch (MessageTypeException e) {
|
150
|
+
throw new JsonRecordValidateException(e);
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
@Override
|
156
|
+
public void doubleColumn(Column column)
|
157
|
+
{
|
158
|
+
Value v = record.get(getColumnNameValue(column));
|
159
|
+
if (isNil(v)) {
|
160
|
+
pageBuilder.setNull(column);
|
161
|
+
}
|
162
|
+
else {
|
163
|
+
try {
|
164
|
+
pageBuilder.setDouble(column, ((FloatValue) v).toDouble());
|
165
|
+
}
|
166
|
+
catch (MessageTypeException e) {
|
167
|
+
throw new JsonRecordValidateException(e);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
@Override
|
173
|
+
public void stringColumn(Column column)
|
174
|
+
{
|
175
|
+
Value v = record.get(getColumnNameValue(column));
|
176
|
+
if (isNil(v)) {
|
177
|
+
pageBuilder.setNull(column);
|
178
|
+
}
|
179
|
+
else {
|
180
|
+
try {
|
181
|
+
pageBuilder.setString(column, v.toString());
|
182
|
+
}
|
183
|
+
catch (MessageTypeException e) {
|
184
|
+
throw new JsonRecordValidateException(e);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
@Override
|
190
|
+
public void timestampColumn(Column column)
|
191
|
+
{
|
192
|
+
Value v = record.get(getColumnNameValue(column));
|
193
|
+
if (isNil(v)) {
|
194
|
+
pageBuilder.setNull(column);
|
195
|
+
}
|
196
|
+
else {
|
197
|
+
try {
|
198
|
+
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v.toString()));
|
199
|
+
}
|
200
|
+
catch (MessageTypeException e) {
|
201
|
+
throw new JsonRecordValidateException(e);
|
202
|
+
}
|
203
|
+
}
|
204
|
+
}
|
205
|
+
|
206
|
+
@Override
|
207
|
+
public void jsonColumn(Column column)
|
208
|
+
{
|
209
|
+
Value v = record.get(getColumnNameValue(column));
|
210
|
+
if (isNil(v)) {
|
211
|
+
pageBuilder.setNull(column);
|
212
|
+
}
|
213
|
+
else {
|
214
|
+
try {
|
215
|
+
pageBuilder.setJson(column, v);
|
216
|
+
}
|
217
|
+
catch (MessageTypeException e) {
|
218
|
+
throw new JsonRecordValidateException(e);
|
219
|
+
}
|
220
|
+
}
|
221
|
+
}
|
222
|
+
|
223
|
+
private boolean isNil(Value v)
|
224
|
+
{
|
225
|
+
return v == null || v.isNilValue();
|
226
|
+
}
|
227
|
+
});
|
228
|
+
|
229
|
+
pageBuilder.addRecord();
|
230
|
+
}
|
231
|
+
catch (JsonRecordValidateException e) {
|
232
|
+
if (stopOnInvalidRecord) {
|
233
|
+
throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, line), e);
|
234
|
+
}
|
235
|
+
log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), line));
|
236
|
+
}
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
pageBuilder.finish();
|
241
|
+
}
|
242
|
+
}
|
243
|
+
|
244
|
+
private void setColumnNameValues(Schema schema)
|
245
|
+
{
|
246
|
+
ImmutableMap.Builder<String, Value> builder = ImmutableMap.builder();
|
247
|
+
for (Column column : schema.getColumns()) {
|
248
|
+
String name = column.getName();
|
249
|
+
builder.put(name, newString(name));
|
250
|
+
}
|
251
|
+
columnNameValues = builder.build();
|
252
|
+
}
|
253
|
+
|
254
|
+
private Value getColumnNameValue(Column column)
|
255
|
+
{
|
256
|
+
return columnNameValues.get(column.getName());
|
257
|
+
}
|
258
|
+
|
259
|
+
public LineDecoder newLineDecoder(FileInput input, PluginTask task)
|
260
|
+
{
|
261
|
+
return new LineDecoder(input, task);
|
262
|
+
}
|
263
|
+
|
264
|
+
public JsonParser newJsonParser()
|
265
|
+
{
|
266
|
+
return new JsonParser();
|
267
|
+
}
|
268
|
+
|
269
|
+
static class JsonRecordValidateException
|
270
|
+
extends DataException
|
271
|
+
{
|
272
|
+
JsonRecordValidateException(String message)
|
273
|
+
{
|
274
|
+
super(message);
|
275
|
+
}
|
276
|
+
|
277
|
+
JsonRecordValidateException(Throwable cause)
|
278
|
+
{
|
279
|
+
super(cause);
|
280
|
+
}
|
281
|
+
}
|
282
|
+
}
|
@@ -0,0 +1,228 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.Lists;
|
5
|
+
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.config.ConfigSource;
|
7
|
+
import org.embulk.config.TaskSource;
|
8
|
+
import org.embulk.spi.ColumnConfig;
|
9
|
+
import org.embulk.spi.DataException;
|
10
|
+
import org.embulk.spi.FileInput;
|
11
|
+
import org.embulk.spi.ParserPlugin;
|
12
|
+
import org.embulk.spi.Schema;
|
13
|
+
import org.embulk.spi.SchemaConfig;
|
14
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
15
|
+
import org.embulk.spi.time.Timestamp;
|
16
|
+
import org.embulk.spi.type.Type;
|
17
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
18
|
+
import org.embulk.spi.util.Pages;
|
19
|
+
import org.junit.Before;
|
20
|
+
import org.junit.Rule;
|
21
|
+
import org.junit.Test;
|
22
|
+
|
23
|
+
import java.io.ByteArrayInputStream;
|
24
|
+
import java.io.IOException;
|
25
|
+
import java.io.InputStream;
|
26
|
+
import java.util.List;
|
27
|
+
|
28
|
+
import static org.embulk.spi.type.Types.BOOLEAN;
|
29
|
+
import static org.embulk.spi.type.Types.DOUBLE;
|
30
|
+
import static org.embulk.spi.type.Types.JSON;
|
31
|
+
import static org.embulk.spi.type.Types.LONG;
|
32
|
+
import static org.embulk.spi.type.Types.STRING;
|
33
|
+
import static org.embulk.spi.type.Types.TIMESTAMP;
|
34
|
+
import static org.junit.Assert.assertEquals;
|
35
|
+
import static org.junit.Assert.assertNull;
|
36
|
+
import static org.junit.Assert.assertTrue;
|
37
|
+
import static org.junit.Assert.fail;
|
38
|
+
import static org.msgpack.value.ValueFactory.newArray;
|
39
|
+
import static org.msgpack.value.ValueFactory.newMap;
|
40
|
+
import static org.msgpack.value.ValueFactory.newString;
|
41
|
+
|
42
|
+
public class TestJsonlParserPlugin
|
43
|
+
{
|
44
|
+
@Rule
|
45
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
46
|
+
|
47
|
+
private ConfigSource config;
|
48
|
+
private JsonlParserPlugin plugin;
|
49
|
+
private MockPageOutput output;
|
50
|
+
|
51
|
+
@Before
|
52
|
+
public void createResource()
|
53
|
+
{
|
54
|
+
config = config().set("type", "jsonl");
|
55
|
+
plugin = new JsonlParserPlugin();
|
56
|
+
recreatePageOutput();
|
57
|
+
}
|
58
|
+
|
59
|
+
private void recreatePageOutput()
|
60
|
+
{
|
61
|
+
output = new MockPageOutput();
|
62
|
+
}
|
63
|
+
|
64
|
+
@Test
|
65
|
+
public void skipRecords()
|
66
|
+
throws Exception
|
67
|
+
{
|
68
|
+
SchemaConfig schema = schema(
|
69
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
70
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
71
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
72
|
+
|
73
|
+
transaction(config, fileInput(
|
74
|
+
"[]",
|
75
|
+
"\"embulk\"",
|
76
|
+
"10",
|
77
|
+
"true",
|
78
|
+
"false",
|
79
|
+
"null"
|
80
|
+
));
|
81
|
+
|
82
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
83
|
+
assertEquals(0, records.size());
|
84
|
+
}
|
85
|
+
|
86
|
+
@Test
|
87
|
+
public void throwDataException()
|
88
|
+
throws Exception
|
89
|
+
{
|
90
|
+
SchemaConfig schema = schema(
|
91
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
92
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
93
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).set("stop_on_invalid_record", true);
|
94
|
+
|
95
|
+
try {
|
96
|
+
transaction(config, fileInput(
|
97
|
+
"\"not_map_value\""
|
98
|
+
));
|
99
|
+
fail();
|
100
|
+
}
|
101
|
+
catch (Throwable t) {
|
102
|
+
assertTrue(t instanceof DataException);
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
@Test
|
107
|
+
public void writeNils()
|
108
|
+
throws Exception
|
109
|
+
{
|
110
|
+
SchemaConfig schema = schema(
|
111
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
112
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
113
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
114
|
+
|
115
|
+
transaction(config, fileInput(
|
116
|
+
"{}",
|
117
|
+
"{\"_c0\":null,\"_c1\":null,\"_c2\":null}",
|
118
|
+
"{\"_c3\":null,\"_c4\":null,\"_c5\":null}",
|
119
|
+
"{}"
|
120
|
+
));
|
121
|
+
|
122
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
123
|
+
assertEquals(4, records.size());
|
124
|
+
|
125
|
+
for (Object[] record : records) {
|
126
|
+
for (int i = 0; i < 6; i++) {
|
127
|
+
assertNull(record[i]);
|
128
|
+
}
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
@Test
|
133
|
+
public void useNormal()
|
134
|
+
throws Exception
|
135
|
+
{
|
136
|
+
SchemaConfig schema = schema(
|
137
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
138
|
+
column("_c3", STRING), column("_c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z")), column("_c5", JSON));
|
139
|
+
List<ConfigSource> configs = Lists.newArrayList(
|
140
|
+
this.config.deepCopy().set("columns", schema),
|
141
|
+
this.config.deepCopy().set("schema", schema)
|
142
|
+
);
|
143
|
+
|
144
|
+
for (ConfigSource config : configs) {
|
145
|
+
transaction(config, fileInput(
|
146
|
+
"{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
|
147
|
+
"[1, 2, 3]",
|
148
|
+
"{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}"
|
149
|
+
));
|
150
|
+
|
151
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
152
|
+
assertEquals(2, records.size());
|
153
|
+
|
154
|
+
Object[] record;
|
155
|
+
{
|
156
|
+
record = records.get(0);
|
157
|
+
assertEquals(true, record[0]);
|
158
|
+
assertEquals(10L, record[1]);
|
159
|
+
assertEquals(0.1, (Double) record[2], 0.0001);
|
160
|
+
assertEquals("embulk", record[3]);
|
161
|
+
assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
|
162
|
+
assertEquals(newMap(newString("k"), newString("v")), record[5]);
|
163
|
+
}
|
164
|
+
{
|
165
|
+
record = records.get(1);
|
166
|
+
assertEquals(false, record[0]);
|
167
|
+
assertEquals(-10L, record[1]);
|
168
|
+
assertEquals(1.0, (Double) record[2], 0.0001);
|
169
|
+
assertEquals("エンバルク", record[3]);
|
170
|
+
assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
|
171
|
+
assertEquals(newArray(newString("e0"), newString("e1")), record[5]);
|
172
|
+
}
|
173
|
+
|
174
|
+
recreatePageOutput();
|
175
|
+
}
|
176
|
+
}
|
177
|
+
|
178
|
+
private ConfigSource config()
|
179
|
+
{
|
180
|
+
return runtime.getExec().newConfigSource();
|
181
|
+
}
|
182
|
+
|
183
|
+
private void transaction(ConfigSource config, final FileInput input)
|
184
|
+
{
|
185
|
+
plugin.transaction(config, new ParserPlugin.Control()
|
186
|
+
{
|
187
|
+
@Override
|
188
|
+
public void run(TaskSource taskSource, Schema schema)
|
189
|
+
{
|
190
|
+
plugin.run(taskSource, schema, input, output);
|
191
|
+
}
|
192
|
+
});
|
193
|
+
}
|
194
|
+
|
195
|
+
private FileInput fileInput(String... lines)
|
196
|
+
throws Exception
|
197
|
+
{
|
198
|
+
StringBuilder sb = new StringBuilder();
|
199
|
+
for (String line : lines) {
|
200
|
+
sb.append(line).append("\n");
|
201
|
+
}
|
202
|
+
|
203
|
+
ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes());
|
204
|
+
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
|
205
|
+
}
|
206
|
+
|
207
|
+
private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
|
208
|
+
throws IOException
|
209
|
+
{
|
210
|
+
return new InputStreamFileInput.IteratorProvider(
|
211
|
+
ImmutableList.copyOf(inputStreams));
|
212
|
+
}
|
213
|
+
|
214
|
+
private SchemaConfig schema(ColumnConfig... columns)
|
215
|
+
{
|
216
|
+
return new SchemaConfig(Lists.newArrayList(columns));
|
217
|
+
}
|
218
|
+
|
219
|
+
private ColumnConfig column(String name, Type type)
|
220
|
+
{
|
221
|
+
return column(name, type, config());
|
222
|
+
}
|
223
|
+
|
224
|
+
private ColumnConfig column(String name, Type type, ConfigSource option)
|
225
|
+
{
|
226
|
+
return new ColumnConfig(name, type, option);
|
227
|
+
}
|
228
|
+
}
|
metadata
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-jsonl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shunsuke Mikami
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
|
-
- -
|
16
|
+
- - ~>
|
18
17
|
- !ruby/object:Gem::Version
|
19
18
|
version: '1.0'
|
20
|
-
|
19
|
+
name: bundler
|
21
20
|
prerelease: false
|
21
|
+
type: :development
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
|
-
- -
|
30
|
+
- - ~>
|
32
31
|
- !ruby/object:Gem::Version
|
33
32
|
version: '10.0'
|
34
|
-
|
33
|
+
name: rake
|
35
34
|
prerelease: false
|
35
|
+
type: :development
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
41
|
description: Parses Jsonl files read by other file input plugins.
|
@@ -45,36 +45,46 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
-
|
48
|
+
- .gitignore
|
49
|
+
- CHANGELOG.md
|
49
50
|
- Gemfile
|
50
51
|
- LICENSE.txt
|
51
52
|
- README.md
|
52
53
|
- Rakefile
|
54
|
+
- build.gradle
|
53
55
|
- embulk-parser-jsonl.gemspec
|
56
|
+
- gradle/wrapper/gradle-wrapper.jar
|
57
|
+
- gradle/wrapper/gradle-wrapper.properties
|
58
|
+
- gradlew
|
59
|
+
- gradlew.bat
|
54
60
|
- lib/embulk/guess/jsonl.rb
|
55
61
|
- lib/embulk/parser/jsonl.rb
|
62
|
+
- settings.gradle
|
63
|
+
- src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java
|
64
|
+
- src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java
|
65
|
+
- classpath/embulk-parser-jsonl-0.1.0.jar
|
56
66
|
homepage: https://github.com/shun0102/embulk-parser-jsonl
|
57
67
|
licenses:
|
58
68
|
- MIT
|
59
69
|
metadata: {}
|
60
|
-
post_install_message:
|
70
|
+
post_install_message:
|
61
71
|
rdoc_options: []
|
62
72
|
require_paths:
|
63
73
|
- lib
|
64
74
|
required_ruby_version: !ruby/object:Gem::Requirement
|
65
75
|
requirements:
|
66
|
-
- -
|
76
|
+
- - '>='
|
67
77
|
- !ruby/object:Gem::Version
|
68
78
|
version: '0'
|
69
79
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
80
|
requirements:
|
71
|
-
- -
|
81
|
+
- - '>='
|
72
82
|
- !ruby/object:Gem::Version
|
73
83
|
version: '0'
|
74
84
|
requirements: []
|
75
|
-
rubyforge_project:
|
76
|
-
rubygems_version: 2.
|
77
|
-
signing_key:
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 2.1.9
|
87
|
+
signing_key:
|
78
88
|
specification_version: 4
|
79
89
|
summary: Jsonl parser plugin for Embulk
|
80
90
|
test_files: []
|