embulk-parser-none-bin 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: be377e98bbff18fed793abc5d8ade3a091d89d27
4
+ data.tar.gz: d5697f6e95654352042db185262c16a8affca1e7
5
+ SHA512:
6
+ metadata.gz: f0c2072a4b0905309a84bf50b44cebe6182768b69ea55089e4d748b4260ed204efa638a2685279e5aad859945ac31c0ff82017468355306b62db0d6c6fa6a25b
7
+ data.tar.gz: d511993a276cec88ef4d96f91664031d52ad5e7e1cf66c905808e52dc0b8df882da298ed5b7a5ba4172beec719a232541fd01e4a1c032a138bfe3a521f390fc9
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
9
+ example.txt.out
data/CHANGELOG.md ADDED
@@ -0,0 +1,11 @@
1
+ # 0.2.0 (2016-02-22)
2
+
3
+ Incompatible changes:
4
+
5
+ * Rename mesasge\_key option to column\_name option
6
+ * Also, change the default value from `message` to `payload`
7
+
8
+ # 0.1.0 (2015-10-27)
9
+
10
+ first version
11
+
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,100 @@
1
+ # None parser plugin for Embulk
2
+
3
+ Embulk parser plugin not to parse at all for binary files
4
+
5
+ ## Install
6
+
7
+ ```
8
+ $ embulk gem install embulk-parser-none-bin
9
+ ```
10
+
11
+ ## Overview
12
+
13
+ * **Plugin type**: parser
14
+ * **Guess supported**: no
15
+
16
+ ## Configuration
17
+
18
+ - **column_name**: A column name which this plugin outputs (string, default: "payload")
19
+
20
+ ## Example
21
+
22
+ ```yaml
23
+ in:
24
+ type: file
25
+ path_prefix: example.txt
26
+ parser:
27
+ type: none-bin
28
+ column_name: payload
29
+ ```
30
+
31
+ Assume the input file (example.txt) is as following:
32
+
33
+ ```
34
+ foo bar baz
35
+ foo bar baz
36
+ ```
37
+
38
+ then this plugin treats as:
39
+
40
+ ```
41
+ +----------------+
42
+ | payload:string |
43
+ +----------------+
44
+ | foo bar baz |
45
+ | foo bar baz |
46
+ +----------------+
47
+ ```
48
+
49
+ To recover a file, you may use [embulk-formatter-single_value](https://github.com/sonots/embulk-formatter-single_value) as:
50
+
51
+ ```
52
+ out:
53
+ type: file
54
+ path_prefix: example.txt
55
+ sequence_format: ""
56
+ file_ext: .out
57
+ formatter:
58
+ type: single_value
59
+ ```
60
+
61
+ or csv formatter as:
62
+
63
+ ```
64
+ out:
65
+ type: file
66
+ path_prefix: example.txt
67
+ sequence_format: ""
68
+ file_ext: .out
69
+ formatter:
70
+ type: csv
71
+ delimiter: 0
72
+ quote_policy: NONE
73
+ header_line: false
74
+ ```
75
+
76
+ ## ChangeLOG
77
+
78
+ [CHANGELOG.md](CHANGELOG.md)
79
+
80
+ ## Development
81
+
82
+ Run example:
83
+
84
+ ```
85
+ $ embulk gem install embulk-formatter-single_value
86
+ $ ./gradlew classpath
87
+ $ embulk run -I lib example.yml
88
+ ```
89
+
90
+ Run test:
91
+
92
+ ```
93
+ $ ./gradlew test
94
+ ```
95
+
96
+ Release gem:
97
+
98
+ ```
99
+ $ ./gradlew gemPush
100
+ ```
data/build.gradle ADDED
@@ -0,0 +1,85 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+ repositories {
8
+ mavenCentral()
9
+ jcenter()
10
+ maven {
11
+ url "https://repo.maven.apache.org/maven2/"
12
+ }
13
+ }
14
+ configurations {
15
+ provided
16
+ }
17
+
18
+ version = "0.2.0"
19
+
20
+ dependencies {
21
+ compile "org.embulk:embulk-core:0.6.18"
22
+ compile "commons-codec:commons-codec:1.9"
23
+ provided "org.embulk:embulk-core:0.6.18"
24
+ // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
25
+ testCompile "junit:junit:4.+"
26
+ }
27
+
28
+ task classpath(type: Copy, dependsOn: ["jar"]) {
29
+ doFirst { file("classpath").deleteDir() }
30
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
31
+ into "classpath"
32
+ }
33
+ clean { delete "classpath" }
34
+
35
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
36
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
37
+ script "${project.name}.gemspec"
38
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
39
+ }
40
+
41
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
42
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
43
+ script "pkg/${project.name}-${project.version}.gem"
44
+ }
45
+
46
+ task "package"(dependsOn: ["gemspec", "classpath"]) << {
47
+ println "> Build succeeded."
48
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
49
+ }
50
+
51
+ task gemspec {
52
+ ext.gemspecFile = file("${project.name}.gemspec")
53
+ inputs.file "build.gradle"
54
+ outputs.file gemspecFile
55
+ doLast { gemspecFile.write($/
56
+ Gem::Specification.new do |spec|
57
+ spec.name = "${project.name}"
58
+ spec.version = "${project.version}"
59
+ spec.authors = ["Klaus Woelfel"]
60
+ spec.summary = %[None bin parser plugin for Embulk]
61
+ spec.description = %[Embulk parser plugin not to parse at all.]
62
+ spec.email = ["klaus@nexedi.com"]
63
+ spec.licenses = ["MIT"]
64
+ spec.homepage = "https://lab.nexedi.com/klaus/embulk-parser-none-bin"
65
+
66
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
67
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
68
+ spec.require_paths = ["lib"]
69
+
70
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
71
+ spec.add_development_dependency 'bundler', ['~> 1.0']
72
+ spec.add_development_dependency 'rake', ['>= 10.0']
73
+ end
74
+ /$)
75
+ }
76
+ }
77
+ clean { delete "${project.name}.gemspec" }
78
+
79
+ // for deprecation check
80
+ //
81
+ //allprojects {
82
+ // tasks.withType(JavaCompile) {
83
+ // options.compilerArgs << "-Xlint:deprecation"
84
+ // }
85
+ //}
data/example.txt ADDED
@@ -0,0 +1,3 @@
1
+ foo,bar,baz
2
+
3
+ foo,bar,baz
data/example.yml ADDED
@@ -0,0 +1,13 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example.txt
4
+ parser:
5
+ type: none
6
+ column_name: payload
7
+ out:
8
+ type: file
9
+ path_prefix: example.txt
10
+ sequence_format: ""
11
+ file_ext: .out
12
+ formatter:
13
+ type: single_value
Binary file
@@ -0,0 +1,6 @@
1
+ #Wed Feb 04 13:46:12 PST 2015
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
data/gradlew ADDED
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
+ if $cygwin ; then
47
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
+ fi
49
+
50
+ # Attempt to set APP_HOME
51
+ # Resolve links: $0 may be a link
52
+ PRG="$0"
53
+ # Need this for relative symlinks.
54
+ while [ -h "$PRG" ] ; do
55
+ ls=`ls -ld "$PRG"`
56
+ link=`expr "$ls" : '.*-> \(.*\)$'`
57
+ if expr "$link" : '/.*' > /dev/null; then
58
+ PRG="$link"
59
+ else
60
+ PRG=`dirname "$PRG"`"/$link"
61
+ fi
62
+ done
63
+ SAVED="`pwd`"
64
+ cd "`dirname \"$PRG\"`/" >&-
65
+ APP_HOME="`pwd -P`"
66
+ cd "$SAVED" >&-
67
+
68
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
+
70
+ # Determine the Java command to use to start the JVM.
71
+ if [ -n "$JAVA_HOME" ] ; then
72
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
73
+ # IBM's JDK on AIX uses strange locations for the executables
74
+ JAVACMD="$JAVA_HOME/jre/sh/java"
75
+ else
76
+ JAVACMD="$JAVA_HOME/bin/java"
77
+ fi
78
+ if [ ! -x "$JAVACMD" ] ; then
79
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
80
+
81
+ Please set the JAVA_HOME variable in your environment to match the
82
+ location of your Java installation."
83
+ fi
84
+ else
85
+ JAVACMD="java"
86
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
87
+
88
+ Please set the JAVA_HOME variable in your environment to match the
89
+ location of your Java installation."
90
+ fi
91
+
92
+ # Increase the maximum file descriptors if we can.
93
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
94
+ MAX_FD_LIMIT=`ulimit -H -n`
95
+ if [ $? -eq 0 ] ; then
96
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
97
+ MAX_FD="$MAX_FD_LIMIT"
98
+ fi
99
+ ulimit -n $MAX_FD
100
+ if [ $? -ne 0 ] ; then
101
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
102
+ fi
103
+ else
104
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
105
+ fi
106
+ fi
107
+
108
+ # For Darwin, add options to specify how the application appears in the dock
109
+ if $darwin; then
110
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
111
+ fi
112
+
113
+ # For Cygwin, switch paths to Windows format before running java
114
+ if $cygwin ; then
115
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,5 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ end
5
+ end
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_parser(
2
+ "none-bin", "org.embulk.parser.NoneBinParserPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
data/settings.gradle ADDED
@@ -0,0 +1 @@
1
+ rootProject.name = 'embulk-parser-none-bin'
@@ -0,0 +1,110 @@
1
+ package org.embulk.parser;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.ConfigDiff;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.Task;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.spi.ParserPlugin;
10
+ import org.embulk.spi.FileInput;
11
+ import org.embulk.spi.PageOutput;
12
+ import org.embulk.spi.Schema;
13
+ import org.embulk.spi.SchemaConfig;
14
+
15
+ import org.embulk.spi.Exec;
16
+ import org.embulk.spi.PageBuilder;
17
+ import org.embulk.spi.util.FileInputInputStream;
18
+ import org.embulk.spi.ColumnConfig;
19
+ import java.io.IOException;
20
+ import java.util.Arrays;
21
+ import java.util.ArrayList;
22
+ import org.apache.commons.codec.binary.Base64;
23
+
24
+ import static org.embulk.spi.type.Types.STRING;
25
+
26
+ import org.slf4j.Logger;
27
+
28
+
29
+ public class NoneBinParserPlugin
30
+ implements ParserPlugin
31
+ {
32
+ static int MAX_NAME_LENGTH = 255;
33
+
34
+ public interface PluginTask
35
+ extends Task //, LineDecoder.DecoderTask //, TimestampParser.Task
36
+ {
37
+ @Config("column_name")
38
+ @ConfigDefault("\"payload\"")
39
+ public String getColumnName();
40
+ }
41
+
42
+ private final Logger log;
43
+
44
+ public NoneBinParserPlugin()
45
+ {
46
+ this.log = Exec.getLogger(NoneBinParserPlugin.class);
47
+ }
48
+
49
+ @Override
50
+ public void transaction(ConfigSource config, ParserPlugin.Control control)
51
+ {
52
+ PluginTask task = config.loadConfig(PluginTask.class);
53
+ ArrayList<ColumnConfig> columns = new ArrayList<ColumnConfig>();
54
+ final String columnName = task.getColumnName();
55
+
56
+ columns.add(new ColumnConfig(columnName, STRING, config));
57
+ columns.add(new ColumnConfig("tag", STRING, config));
58
+
59
+ Schema schema = new SchemaConfig(columns).toSchema();
60
+ control.run(task.dump(), schema);
61
+ }
62
+
63
+ @Override
64
+ public void run(TaskSource taskSource, Schema schema,
65
+ FileInput input, PageOutput output)
66
+ {
67
+ PluginTask task = taskSource.loadTask(PluginTask.class);
68
+ FileInputInputStream dataIn = new FileInputInputStream(input);
69
+ PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output);
70
+ int chunksize = 1024 * 1024 * 10;
71
+
72
+ while( input.nextFile() ){
73
+ byte[] pathBytesArray = new byte[MAX_NAME_LENGTH];
74
+ int i = 0;
75
+ int c;
76
+ for (; i < MAX_NAME_LENGTH; i++) {
77
+ c = dataIn.read();
78
+ if ( c == -1) {
79
+ break;
80
+ } else if ( c == 0 ) {
81
+ // read empty bytes until MAX_NAME_LENGTH;
82
+ for (int j = i + 1; j < MAX_NAME_LENGTH; j++) {
83
+ dataIn.read();
84
+ }
85
+ break;
86
+ }
87
+ pathBytesArray[i] = (byte)c;
88
+ }
89
+ String path = new String(Arrays.copyOfRange(pathBytesArray, 0, i));
90
+ int bytes_read = 0;
91
+ while( bytes_read != -1 ) {
92
+ byte bytesArray[] = new byte[chunksize];
93
+ int offset = 0;
94
+ while ( offset < chunksize ) {
95
+ bytes_read = dataIn.read(bytesArray, offset, chunksize);
96
+ if( bytes_read == -1 ) {
97
+ break;
98
+ }
99
+ offset += bytes_read;
100
+ }
101
+
102
+ log.info(offset + path);
103
+ pageBuilder.setString(0, Base64.encodeBase64String(Arrays.copyOfRange(bytesArray, 0, offset)));
104
+ pageBuilder.setString(1, path);
105
+ pageBuilder.addRecord();
106
+ }
107
+ }
108
+ pageBuilder.finish();
109
+ }
110
+ }
@@ -0,0 +1,5 @@
1
+ package org.embulk.parser;
2
+
3
+ public class TestNoneBinParserPlugin
4
+ {
5
+ }
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-none-bin
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Klaus Woelfel
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-03-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
+ description: Embulk parser plugin not to parse at all.
42
+ email:
43
+ - klaus@nexedi.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - CHANGELOG.md
50
+ - LICENSE.txt
51
+ - README.md
52
+ - build.gradle
53
+ - example.txt
54
+ - example.yml
55
+ - gradle/wrapper/gradle-wrapper.jar
56
+ - gradle/wrapper/gradle-wrapper.properties
57
+ - gradlew
58
+ - gradlew.bat
59
+ - lib/embulk/guess/none.rb
60
+ - lib/embulk/parser/none-bin.rb
61
+ - settings.gradle
62
+ - src/main/java/org/embulk/parser/NoneBinParserPlugin.java
63
+ - src/test/java/org/embulk/parser/TestNoneBinParserPlugin.java
64
+ - classpath/commons-codec-1.9.jar
65
+ - classpath/embulk-parser-none-bin-0.2.0.jar
66
+ homepage: https://lab.nexedi.com/klaus/embulk-parser-none-bin
67
+ licenses:
68
+ - MIT
69
+ metadata: {}
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubyforge_project:
86
+ rubygems_version: 2.1.9
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: None bin parser plugin for Embulk
90
+ test_files: []