embulk-formatter-fast_jsonl 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e484178b1ad2c59bcf8174c64e2ab5971ea66c4d
4
+ data.tar.gz: 750dafa0093696f5d5dd6dc70adb29ba75a25b43
5
+ SHA512:
6
+ metadata.gz: 7f87f64eefd47578fbf0f5d3c352ddc9a2297769b0579ae70d434ea107d51ac5f182044dfb7ec6c3791e531a94a569f88bdc492ef5c845d533170ac050a03ec7
7
+ data.tar.gz: 89ec108c4f83583bde29bcdd8be5efd4387ea04cabaa6e0a051b1c805740e253cdb7131bab2e8d2c39eaf1bd5738533b04ab79d8e6e56d36daa65c14561efd1a
@@ -0,0 +1,27 @@
1
+ version: 2
2
+ jobs:
3
+ build:
4
+ executorType: docker
5
+ docker:
6
+ - image: hseeberger/scala-sbt
7
+ working_directory: /root/embulk-formatter-fast_jsonl/
8
+ steps:
9
+ - checkout
10
+ - restore_cache:
11
+ name: Restoring Cache
12
+ keys:
13
+ - sbt
14
+ - setup_remote_docker
15
+ - run:
16
+ name: prepare
17
+ command: sbt update exit
18
+ - save_cache:
19
+ name: Saving Cache sbt
20
+ key: sbt
21
+ paths:
22
+ - "/root/.sbt"
23
+ - "/root/.ivy2"
24
+ - run:
25
+ name: compile
26
+ command: |
27
+ sbt compile test scalafmt::test exit
data/.gitignore ADDED
@@ -0,0 +1,80 @@
1
+ /pkg/
2
+ /tmp/
3
+ *.gemspec
4
+ .gradle/
5
+ /classpath/
6
+ build/
7
+ .idea
8
+ /.settings/
9
+ /.metadata/
10
+ .classpath
11
+ .project
12
+
13
+ .settings
14
+ .classpath
15
+ .project
16
+ *.iml
17
+ *.ipr
18
+ *.iws
19
+ dist/
20
+ lib_managed/
21
+ project/boot/
22
+ project/plugins/project/
23
+ target/
24
+
25
+ # use glob syntax.
26
+ syntax: glob
27
+ *.ser
28
+ *.class
29
+ *~
30
+ *.bak
31
+ #*.off
32
+ *.old
33
+
34
+ # eclipse conf file
35
+ .settings
36
+ .classpath
37
+ .project
38
+ .manager
39
+ .scala_dependencies
40
+
41
+ # idea
42
+ .idea
43
+ *.iml
44
+
45
+ # building
46
+ target
47
+ build
48
+ null
49
+ tmp*
50
+ temp*
51
+ !templates/
52
+ dist
53
+ test-output
54
+ build.log
55
+
56
+ # other scm
57
+ .svn
58
+ .CVS
59
+ .hg*
60
+
61
+ # switch to regexp syntax.
62
+ # syntax: regexp
63
+ # ^\.pc/
64
+
65
+ #SHITTY output not in target directory
66
+ build.log
67
+ .DS_Store
68
+ derby.log
69
+
70
+ *.db
71
+
72
+ .lib
73
+ sbt
74
+
75
+ logs
76
+ sandbox/db
77
+
78
+
79
+ .ensime*⏎
80
+ project/project/
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # Fast JSONL formatter plugin for Embulk
2
+
3
+ Format json as 1 json in single line.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: formatter
8
+
9
+ ## Configuration
10
+
11
+ - **encoding**: output encoding. must be one of "UTF-8", "UTF-16LE", "UTF-32BE", "UTF-32LE" or "UTF-32BE" (string default: 'UTF-8')
12
+ - **newline**: newline character. (string default: 'LF')
13
+ - CRLF, LF, CR
14
+ - **default_timezone**: timezone. "JST" (string default: UTC)
15
+ - **default_timestamp_format**: date format,. (string default: '%Y-%m-%d %H:%M:%S.%6N %z')
16
+ - **explode_json_columns**: json column's explode to top fields. (array default:[])
17
+ - **json_columns**: json formatted as string column name. (array default:[])
18
+
19
+ ## Example
20
+
21
+ ```yaml
22
+ out:
23
+ type: any output input plugin type
24
+ formatter:
25
+ type: fast_jsonl
26
+ explode_json_columns:
27
+ - JSON_COLUMN_1
28
+ - JSON_COLUMN_2
29
+ ```
30
+
31
+ ## Run Examples
32
+
33
+ ```
34
+ ./gradlew classpath
35
+ embulk run example/config.yml -Ilib
36
+ ```
37
+
38
+ ## Build
39
+
40
+ ```
41
+ $ ./gradlew gem # -t to watch change of files and rebuild continuously
42
+ ```
data/build.gradle ADDED
@@ -0,0 +1,81 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ id "scala"
6
+ }
7
+ import com.github.jrubygradle.JRubyExec
8
+ repositories {
9
+ mavenCentral()
10
+ jcenter()
11
+ }
12
+ configurations {
13
+ provided
14
+ }
15
+
16
+ version = "0.1.0"
17
+
18
+ sourceCompatibility = 1.7
19
+ targetCompatibility = 1.7
20
+
21
+ dependencies {
22
+ compile "org.embulk:embulk-core:0.8.22"
23
+ compile "org.scala-lang:scala-library:2.11.11"
24
+ compile group: 'io.circe', name: 'circe-core_2.11', version: '0.8.0'
25
+ compile group: 'io.circe', name: 'circe-generic_2.11', version: '0.8.0'
26
+ compile group: 'io.circe', name: 'circe-parser_2.11', version: '0.8.0'
27
+ provided "org.embulk:embulk-core:0.8.22"
28
+ }
29
+
30
+ task classpath(type: Copy, dependsOn: ["jar"]) {
31
+ doFirst { file("classpath").deleteDir() }
32
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
33
+ into "classpath"
34
+ }
35
+ clean { delete "classpath" }
36
+
37
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
38
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
39
+ script "${project.name}.gemspec"
40
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
41
+ }
42
+
43
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
44
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
45
+ script "pkg/${project.name}-${project.version}.gem"
46
+ }
47
+
48
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
49
+ doLast {
50
+ println "> Build succeeded."
51
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
52
+ }
53
+ }
54
+
55
+ task gemspec {
56
+ ext.gemspecFile = file("${project.name}.gemspec")
57
+ inputs.file "build.gradle"
58
+ outputs.file gemspecFile
59
+ doLast { gemspecFile.write($/
60
+ Gem::Specification.new do |spec|
61
+ spec.name = "${project.name}"
62
+ spec.version = "${project.version}"
63
+ spec.authors = ["smdmts"]
64
+ spec.summary = %[fast_jsonl]
65
+ spec.description = %[fast_jsonl.]
66
+ spec.email = ["smdmts@gmail.com"]
67
+ spec.licenses = ["MIT"]
68
+ spec.homepage = "https://github.com/smdmts/embulk-formatter-fast_jsonl"
69
+
70
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
71
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
72
+ spec.require_paths = ["lib"]
73
+
74
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
75
+ spec.add_development_dependency 'bundler', ['~> 1.0']
76
+ spec.add_development_dependency 'rake', ['>= 10.0']
77
+ end
78
+ /$)
79
+ }
80
+ }
81
+ clean { delete "${project.name}.gemspec" }
data/build.sbt ADDED
@@ -0,0 +1,32 @@
1
+ enablePlugins(ScalafmtPlugin)
2
+
3
+ lazy val root = (project in file(".")).settings(
4
+ inThisBuild(
5
+ List(
6
+ organization := "com.example",
7
+ scalaVersion := "2.11.11",
8
+ version := "0.1.0-SNAPSHOT"
9
+ )),
10
+ name := "embulk-formatter-fast_jsonl",
11
+ scalafmtOnCompile in ThisBuild := true,
12
+ scalafmtTestOnCompile in ThisBuild := true
13
+ )
14
+
15
+ enablePlugins(ScalafmtPlugin)
16
+
17
+ resolvers += Resolver.jcenterRepo
18
+ resolvers += Resolver.sonatypeRepo("releases")
19
+
20
+ lazy val circeVersion = "0.8.0"
21
+ libraryDependencies ++= Seq(
22
+ "org.jruby" % "jruby-complete" % "1.6.5",
23
+ "org.embulk" % "embulk-core" % "0.8.28",
24
+ "com.chuusai" %% "shapeless" % "2.3.2",
25
+ "io.circe" %% "circe-core" % circeVersion,
26
+ "io.circe" %% "circe-generic" % circeVersion,
27
+ "io.circe" %% "circe-parser" % circeVersion,
28
+ "org.scalacheck" %% "scalacheck" % "1.13.4" % Test,
29
+ "org.scalatest" %% "scalatest" % "3.0.1" % Test,
30
+ "org.scalamock" %% "scalamock-scalatest-support" % "3.6.0" % Test,
31
+ "com.github.alexarchambault" %% "scalacheck-shapeless_1.13" % "1.1.5" % Test
32
+ )
@@ -0,0 +1,24 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./example/data.tsv
4
+ parser:
5
+ type: csv
6
+ delimiter: "\t"
7
+ skip_header_lines: 0
8
+ null_string: ""
9
+ columns:
10
+ - { name: id, type: long }
11
+ - { name: description, type: string }
12
+ - { name: name, type: string }
13
+ - { name: payload, type: json}
14
+ stop_on_invalid_record: true
15
+
16
+ out:
17
+ type: file
18
+ path_prefix: /tmp/embulk-formatter-fast_jsonl
19
+ file_ext: aaa
20
+ formatter:
21
+ type: fast_jsonl
22
+ encoding: UTF-8
23
+ newline: LF
24
+ explode_json_columns: []
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./example/data.tsv
4
+ parser:
5
+ type: csv
6
+ delimiter: "\t"
7
+ skip_header_lines: 0
8
+ null_string: ""
9
+ columns:
10
+ - { name: id, type: long }
11
+ - { name: description, type: string }
12
+ - { name: name, type: string }
13
+ - { name: payload, type: json }
14
+ stop_on_invalid_record: true
15
+
16
+ out:
17
+ type: file
18
+ path_prefix: /tmp/embulk-formatter-fast_jsonl
19
+ file_ext: aaa
20
+ formatter:
21
+ type: fast_jsonl
22
+ explode_json_columns: [payload]
@@ -0,0 +1,24 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./example/data.tsv
4
+ parser:
5
+ type: csv
6
+ delimiter: "\t"
7
+ skip_header_lines: 0
8
+ null_string: ""
9
+ columns:
10
+ - { name: id, type: long }
11
+ - { name: description, type: string }
12
+ - { name: name, type: string }
13
+ - { name: payload, type: string }
14
+ stop_on_invalid_record: true
15
+
16
+ out:
17
+ type: file
18
+ path_prefix: /tmp/embulk-formatter-fast_jsonl
19
+ file_ext: aaa
20
+ formatter:
21
+ type: fast_jsonl
22
+ encoding: UTF-8
23
+ newline: LF
24
+ json_columns: [payload]
@@ -0,0 +1,25 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./example/data.tsv
4
+ parser:
5
+ type: csv
6
+ delimiter: "\t"
7
+ skip_header_lines: 0
8
+ null_string: ""
9
+ columns:
10
+ - { name: id, type: long }
11
+ - { name: description, type: string }
12
+ - { name: name, type: string }
13
+ - { name: payload, type: string }
14
+ stop_on_invalid_record: true
15
+
16
+ out:
17
+ type: file
18
+ path_prefix: /tmp/embulk-formatter-fast_jsonl
19
+ file_ext: aaa
20
+ formatter:
21
+ type: fast_jsonl
22
+ encoding: UTF-8
23
+ newline: LF
24
+ json_columns: [payload]
25
+ explode_json_columns: [payload]
data/example/data.tsv ADDED
@@ -0,0 +1,5 @@
1
+ 0 c20ef94602 c212c89f91 {"a":0,"b":"99"}
2
+ 1 330a9fc33a e25b33b616 {"a":1,"b":"a9"}
3
+ 2 707b3b7588 90823c6a1f {"a":2,"b":"96"}
4
+ 3 8d8288e66f {"a":3,"b":"86"}
5
+ 4 c54d8b6481 e56a40571c {"a":4,"b":"d2"}
Binary file
@@ -0,0 +1,6 @@
1
+ #Mon Jun 19 14:41:12 JST 2017
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-all.zip
data/gradlew ADDED
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
26
+
27
+ APP_NAME="Gradle"
28
+ APP_BASE_NAME=`basename "$0"`
29
+
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
33
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
34
+ MAX_FD="maximum"
35
+
36
+ warn ( ) {
37
+ echo "$*"
38
+ }
39
+
40
+ die ( ) {
41
+ echo
42
+ echo "$*"
43
+ echo
44
+ exit 1
45
+ }
46
+
47
+ # OS specific support (must be 'true' or 'false').
48
+ cygwin=false
49
+ msys=false
50
+ darwin=false
51
+ nonstop=false
52
+ case "`uname`" in
53
+ CYGWIN* )
54
+ cygwin=true
55
+ ;;
56
+ Darwin* )
57
+ darwin=true
58
+ ;;
59
+ MINGW* )
60
+ msys=true
61
+ ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
65
+ esac
66
+
67
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68
+
69
+ # Determine the Java command to use to start the JVM.
70
+ if [ -n "$JAVA_HOME" ] ; then
71
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72
+ # IBM's JDK on AIX uses strange locations for the executables
73
+ JAVACMD="$JAVA_HOME/jre/sh/java"
74
+ else
75
+ JAVACMD="$JAVA_HOME/bin/java"
76
+ fi
77
+ if [ ! -x "$JAVACMD" ] ; then
78
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79
+
80
+ Please set the JAVA_HOME variable in your environment to match the
81
+ location of your Java installation."
82
+ fi
83
+ else
84
+ JAVACMD="java"
85
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86
+
87
+ Please set the JAVA_HOME variable in your environment to match the
88
+ location of your Java installation."
89
+ fi
90
+
91
+ # Increase the maximum file descriptors if we can.
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93
+ MAX_FD_LIMIT=`ulimit -H -n`
94
+ if [ $? -eq 0 ] ; then
95
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96
+ MAX_FD="$MAX_FD_LIMIT"
97
+ fi
98
+ ulimit -n $MAX_FD
99
+ if [ $? -ne 0 ] ; then
100
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
101
+ fi
102
+ else
103
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104
+ fi
105
+ fi
106
+
107
+ # For Darwin, add options to specify how the application appears in the dock
108
+ if $darwin; then
109
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110
+ fi
111
+
112
+ # For Cygwin, switch paths to Windows format before running java
113
+ if $cygwin ; then
114
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
165
+ if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
166
+ cd "$(dirname "$0")"
167
+ fi
168
+
169
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,84 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ set DIRNAME=%~dp0
12
+ if "%DIRNAME%" == "" set DIRNAME=.
13
+ set APP_BASE_NAME=%~n0
14
+ set APP_HOME=%DIRNAME%
15
+
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windows variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+
53
+ :win9xME_args
54
+ @rem Slurp the command line arguments.
55
+ set CMD_LINE_ARGS=
56
+ set _SKIP=2
57
+
58
+ :win9xME_args_slurp
59
+ if "x%~1" == "x" goto execute
60
+
61
+ set CMD_LINE_ARGS=%*
62
+
63
+ :execute
64
+ @rem Setup the command line
65
+
66
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67
+
68
+ @rem Execute Gradle
69
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70
+
71
+ :end
72
+ @rem End local scope for the variables with windows NT shell
73
+ if "%ERRORLEVEL%"=="0" goto mainEnd
74
+
75
+ :fail
76
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77
+ rem the _cmd.exe /c_ return code!
78
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79
+ exit /b 1
80
+
81
+ :mainEnd
82
+ if "%OS%"=="Windows_NT" endlocal
83
+
84
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_formatter(
2
+ "fast_jsonl", "org.embulk.formatter.fast_jsonl.FastJsonlFormatterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1 @@
1
+ sbt.version=0.13.15
@@ -0,0 +1,3 @@
1
+ addSbtPlugin("com.julianpeeters" % "sbt-avrohugger" % "0.16.0")
2
+ addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.7")
3
+ addSbtPlugin("io.get-coursier" % "sbt-coursier" % "1.0.0-RC6")
data/setting.gradle ADDED
@@ -0,0 +1 @@
1
+ rootProject.name = 'embulk-formatter-fast_jsonl'
@@ -0,0 +1,38 @@
1
+ package org.embulk.formatter.fast_jsonl
2
+
3
+ import java.nio.charset.{Charset, StandardCharsets}
4
+
5
+ import org.embulk.config.{ConfigSource, TaskSource}
6
+ import org.embulk.spi._
7
+
8
+ class FastJsonlFormatterPlugin extends FormatterPlugin {
9
+
10
+ override def transaction(config: ConfigSource,
11
+ schema: Schema,
12
+ control: FormatterPlugin.Control): Unit = {
13
+ val task = config.loadConfig(classOf[PluginTask])
14
+ validateCharset(task)
15
+ control.run(task.dump())
16
+ }
17
+
18
+ def validateCharset(task: PluginTask): Unit =
19
+ task.getCharset match {
20
+ case v if v == StandardCharsets.UTF_8 =>
21
+ case v if v == StandardCharsets.UTF_16BE =>
22
+ case v if v == StandardCharsets.UTF_16LE =>
23
+ case v if v == StandardCharsets.UTF_16 =>
24
+ case v if v == Charset.forName("UTF-32") =>
25
+ case v if v == Charset.forName("UTF-32BE") =>
26
+ case v if v == Charset.forName("UTF-32LE") =>
27
+ case _ =>
28
+ sys.error("unmatch json character set.")
29
+ }
30
+
31
+ override def open(taskSource: TaskSource,
32
+ schema: Schema,
33
+ output: FileOutput): PageOutput = {
34
+ val task = taskSource.loadTask(classOf[PluginTask])
35
+ PageOutput(schema, task, output)
36
+ }
37
+
38
+ }
@@ -0,0 +1,49 @@
1
+ package org.embulk.formatter.fast_jsonl
2
+
3
+ import com.google.common.base.Optional
4
+ import scala.collection.JavaConverters._
5
+ import org.embulk.formatter.fast_jsonl.json.ColumnVisitor
6
+ import org.embulk.spi.{
7
+ FileOutput,
8
+ Page,
9
+ PageReader,
10
+ Schema,
11
+ PageOutput => EmbulkPageOutput
12
+ }
13
+ import org.embulk.spi.time.TimestampFormatter
14
+ import org.embulk.spi.util.LineEncoder
15
+
16
+ case class PageOutput(schema: Schema, task: PluginTask, output: FileOutput)
17
+ extends EmbulkPageOutput {
18
+ val encoder = new LineEncoder(output, task)
19
+ val reader: PageReader = new PageReader(schema)
20
+ val explodeColumns: Seq[String] = task.getExplodeJsonColumns().asScala
21
+ val jsonColumns: Seq[String] = task.getJsonColumns().asScala
22
+ private var opened:Boolean = false
23
+
24
+ val timestampFormatter: TimestampFormatter =
25
+ new TimestampFormatter(task, Optional.absent())
26
+
27
+ override def add(page: Page): Unit = {
28
+ if (!opened) {
29
+ encoder.nextFile()
30
+ opened = true
31
+ }
32
+ val reader: PageReader = new PageReader(schema)
33
+ reader.setPage(page)
34
+ while (reader.nextRecord()) {
35
+ val visitor =
36
+ ColumnVisitor(reader,
37
+ timestampFormatter,
38
+ explodeColumns,
39
+ jsonColumns)
40
+ schema.visitColumns(visitor)
41
+ encoder.addLine(visitor.getLine)
42
+ }
43
+ ()
44
+ }
45
+
46
+ override def finish(): Unit = encoder.finish()
47
+ override def close(): Unit = encoder.finish()
48
+
49
+ }
@@ -0,0 +1,24 @@
1
+ package org.embulk.formatter.fast_jsonl
2
+
3
+ import org.embulk.config.{Config, ConfigDefault, Task}
4
+ import org.embulk.spi.time.TimestampFormatter
5
+ import org.embulk.spi.util.{LineEncoder, Newline}
6
+
7
+ trait PluginTask
8
+ extends Task
9
+ with LineEncoder.EncoderTask
10
+ with TimestampFormatter.Task {
11
+
12
+ @Config("newline")
13
+ @ConfigDefault("\"LF\"")
14
+ def getNewline: Newline
15
+
16
+ @Config("explode_json_columns")
17
+ @ConfigDefault("[]")
18
+ def getExplodeJsonColumns(): java.util.List[String]
19
+
20
+ @Config("json_columns")
21
+ @ConfigDefault("[]")
22
+ def getJsonColumns(): java.util.List[String]
23
+
24
+ }
@@ -0,0 +1,86 @@
1
+ package org.embulk.formatter.fast_jsonl.json
2
+
3
+ import io.circe.Json
4
+ import org.embulk.spi.time.TimestampFormatter
5
+ import org.embulk.spi.{
6
+ Column,
7
+ PageReader,
8
+ ColumnVisitor => EmbulkColumnVisitor
9
+ }
10
+
11
+ case class ColumnVisitor(reader: PageReader,
12
+ timestampFormatter: TimestampFormatter,
13
+ explodeColumns: Seq[String],
14
+ jsonColumns: Seq[String])
15
+ extends EmbulkColumnVisitor {
16
+ import scala.collection.mutable
17
+
18
+ private val recordMap = mutable.LinkedHashMap[String, Json]()
19
+ private val explodeRecord = mutable.LinkedHashMap[String, Json]()
20
+
21
+ override def timestampColumn(column: Column): Unit =
22
+ value(column, reader.getTimestamp).foreach(v =>
23
+ put(column, Json.fromString(timestampFormatter.format(v))))
24
+
25
+ override def stringColumn(column: Column): Unit =
26
+ value(column, reader.getString).foreach { v =>
27
+ val columnName = column.getName
28
+ if (jsonColumns.contains(columnName)) {
29
+ if (explodeColumns.contains(columnName)) {
30
+ JsonParser(v).foreach {
31
+ case (key, value) =>
32
+ explodeRecord.put(key, value)
33
+ }
34
+ } else {
35
+ explodeRecord.put(columnName, JsonParser.toJson(v))
36
+ }
37
+ } else {
38
+ put(column, Json.fromString(v))
39
+ }
40
+ }
41
+
42
+ override def longColumn(column: Column): Unit =
43
+ value(column, reader.getLong).foreach(v => put(column, Json.fromBigInt(v)))
44
+
45
+ override def doubleColumn(column: Column): Unit =
46
+ value(column, reader.getDouble).foreach(v =>
47
+ put(column, Json.fromBigDecimal(v)))
48
+
49
+ override def booleanColumn(column: Column): Unit =
50
+ value(column, reader.getBoolean).foreach(v =>
51
+ put(column, Json.fromBoolean(v)))
52
+
53
+ override def jsonColumn(column: Column): Unit = {
54
+ value(column, reader.getJson).foreach { v =>
55
+ if (explodeColumns.contains(column.getName)) {
56
+ JsonParser(v.toString).foreach {
57
+ case (key, value) =>
58
+ explodeRecord.put(key, value)
59
+ }
60
+ } else {
61
+ put(column, JsonParser.toJson(v.toJson))
62
+ }
63
+ }
64
+ }
65
+
66
+ def value[A](column: Column, method: => (Column => A)): Option[A] =
67
+ if (reader.isNull(column)) {
68
+ None
69
+ } else {
70
+ Some(method(column))
71
+ }
72
+
73
+ def put(column: Column, value: Json): Unit = {
74
+ recordMap.put(column.getName, value)
75
+ ()
76
+ }
77
+
78
+ def getLine: String = {
79
+ explodeRecord.foreach {
80
+ case (key, json) =>
81
+ recordMap.put(key, json)
82
+ }
83
+ JsonEncoder(recordMap).noSpaces
84
+ }
85
+
86
+ }
@@ -0,0 +1,15 @@
1
+ package org.embulk.formatter.fast_jsonl.json
2
+
3
+ import io.circe._
4
+ import io.circe.syntax._
5
+
6
+ import scala.collection.mutable
7
+
8
+ object JsonEncoder {
9
+ def apply(value: mutable.LinkedHashMap[String, Json]): Json =
10
+ Json.fromFields(value)
11
+
12
+ def apply(value: String): Json = {
13
+ value.asJson
14
+ }
15
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.formatter.fast_jsonl.json
2
+
3
+ import io.circe.Json
4
+ import io.circe.parser._
5
+
6
+ object JsonParser {
7
+ def apply(value: String): Seq[(String, Json)] =
8
+ decode[Map[String, Json]](value) match {
9
+ case Right(v: Map[String, Json]) =>
10
+ v.toIterator.toSeq
11
+ case _ =>
12
+ sys.error(s"could not parse json. $value")
13
+ }
14
+
15
+ def toJson(value: String):Json = {
16
+ parse(value) match {
17
+ case Right(v) =>
18
+ v
19
+ case _ =>
20
+ sys.error(s"could not parse json. $value")
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,30 @@
1
+ package org.embulk.formatter.fast_jsonl.json
2
+
3
+ import io.circe.Json
4
+ import org.scalatest.{FlatSpec, Matchers}
5
+
6
+ import scala.collection.mutable
7
+
8
+ class JsonEncoderSpec extends FlatSpec with Matchers {
9
+ it should "be encode as map" in {
10
+ val testData =
11
+ """
12
+ |{
13
+ | "salutation" : "Hey",
14
+ | "person" : {
15
+ | "name" : "Chris"
16
+ | },
17
+ | "exclamationMarks" : 3
18
+ |}
19
+ """.stripMargin
20
+ val sequence = JsonParser(testData)
21
+ val map = new mutable.LinkedHashMap[String, Json]()
22
+ sequence.foreach {
23
+ case ((string, json)) =>
24
+ map.put(string, json)
25
+ }
26
+ val jsonString = JsonEncoder(map).noSpaces
27
+ jsonString should be("{\"salutation\":\"Hey\",\"person\":{\"name\":\"Chris\"},\"exclamationMarks\":3}")
28
+ }
29
+
30
+ }
@@ -0,0 +1,10 @@
1
+ package org.embulk.formatter.fast_jsonl.json
2
+
3
+ import org.scalatest._
4
+
5
+ class JsonParserSpec extends FlatSpec with Matchers {
6
+ it should "be parse" in {
7
+ val sequence = JsonParser("{\"a\":\"b\", \"c\":\"d\"}")
8
+ sequence.size should be(2)
9
+ }
10
+ }
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-formatter-fast_jsonl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - smdmts
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-08-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '10.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: fast_jsonl.
42
+ email:
43
+ - smdmts@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .circleci/config.yml
49
+ - .gitignore
50
+ - LICENSE.txt
51
+ - README.md
52
+ - build.gradle
53
+ - build.sbt
54
+ - example/config.yml
55
+ - example/config_explode.yml
56
+ - example/config_json_column.yml
57
+ - example/config_json_column_with_explode.yml
58
+ - example/data.tsv
59
+ - gradle/wrapper/gradle-wrapper.jar
60
+ - gradle/wrapper/gradle-wrapper.properties
61
+ - gradlew
62
+ - gradlew.bat
63
+ - lib/embulk/formatter/fast_jsonl.rb
64
+ - project/build.properties
65
+ - project/plugins.sbt
66
+ - setting.gradle
67
+ - src/main/scala/org/embulk/formatter/fast_jsonl/FastJsonlFormatterPlugin.scala
68
+ - src/main/scala/org/embulk/formatter/fast_jsonl/PageOutput.scala
69
+ - src/main/scala/org/embulk/formatter/fast_jsonl/PluginTask.scala
70
+ - src/main/scala/org/embulk/formatter/fast_jsonl/json/ColumnVisitor.scala
71
+ - src/main/scala/org/embulk/formatter/fast_jsonl/json/JsonEncoder.scala
72
+ - src/main/scala/org/embulk/formatter/fast_jsonl/json/JsonParser.scala
73
+ - src/test/scala/org/embulk/formatter/fast_jsonl/json/JsonEncoderSpec.scala
74
+ - src/test/scala/org/embulk/formatter/fast_jsonl/json/JsonParserSpec.scala
75
+ - classpath/cats-core_2.11-0.9.0.jar
76
+ - classpath/cats-kernel_2.11-0.9.0.jar
77
+ - classpath/cats-macros_2.11-0.9.0.jar
78
+ - classpath/circe-core_2.11-0.8.0.jar
79
+ - classpath/circe-generic_2.11-0.8.0.jar
80
+ - classpath/circe-jawn_2.11-0.8.0.jar
81
+ - classpath/circe-numbers_2.11-0.8.0.jar
82
+ - classpath/circe-parser_2.11-0.8.0.jar
83
+ - classpath/embulk-formatter-fast_jsonl-0.1.0.jar
84
+ - classpath/jawn-parser_2.11-0.10.4.jar
85
+ - classpath/machinist_2.11-0.6.1.jar
86
+ - classpath/macro-compat_2.11-1.1.1.jar
87
+ - classpath/scala-library-2.11.11.jar
88
+ - classpath/scala-reflect-2.11.8.jar
89
+ - classpath/shapeless_2.11-2.3.2.jar
90
+ - classpath/simulacrum_2.11-0.10.0.jar
91
+ homepage: https://github.com/smdmts/embulk-formatter-fast_jsonl
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.1.9
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: fast_jsonl
115
+ test_files: []