embulk-filter-row 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/CHANGELOG.md +3 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.md +79 -0
  6. data/build.gradle +73 -0
  7. data/classpath/embulk-filter-row-0.1.0.jar +0 -0
  8. data/example.yml +37 -0
  9. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  10. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  11. data/gradlew +164 -0
  12. data/gradlew.bat +90 -0
  13. data/lib/embulk/filter/row.rb +3 -0
  14. data/src/main/java/org/embulk/filter/RowFilterPlugin.java +231 -0
  15. data/src/main/java/org/embulk/filter/row/BooleanCondition.java +43 -0
  16. data/src/main/java/org/embulk/filter/row/Condition.java +5 -0
  17. data/src/main/java/org/embulk/filter/row/ConditionConfig.java +32 -0
  18. data/src/main/java/org/embulk/filter/row/ConditionFactory.java +164 -0
  19. data/src/main/java/org/embulk/filter/row/DoubleCondition.java +63 -0
  20. data/src/main/java/org/embulk/filter/row/LongCondition.java +63 -0
  21. data/src/main/java/org/embulk/filter/row/StringCondition.java +58 -0
  22. data/src/main/java/org/embulk/filter/row/TimestampCondition.java +64 -0
  23. data/src/test/java/org/embulk/filter/TestRowFilterPlugin.java +5 -0
  24. data/src/test/java/org/embulk/filter/row/TestBooleanCondition.java +64 -0
  25. data/src/test/java/org/embulk/filter/row/TestConditionFactory.java +250 -0
  26. data/src/test/java/org/embulk/filter/row/TestDoubleCondition.java +81 -0
  27. data/src/test/java/org/embulk/filter/row/TestLongCondition.java +81 -0
  28. data/src/test/java/org/embulk/filter/row/TestStringCondition.java +73 -0
  29. data/src/test/java/org/embulk/filter/row/TestTimestampCondition.java +83 -0
  30. metadata +100 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f66bb07654c8eddf0af81a5cb4997c5f072ee195
4
+ data.tar.gz: 135e7b573d9523b9247317d93f3efe7ed80b0e5b
5
+ SHA512:
6
+ metadata.gz: 8ffb18672c9f526930ce39afa369ba284977f2ce2fea8a3de4a838ae583926535d9baea13862c6c3fb9f53178b4e9545cb9c2c135e59138ef9d4632517c4ac75
7
+ data.tar.gz: ebe24a6c5852a53e6c6b1c3606f1c8f88585ad33461bd585d35b987377a35f8c5f60812c8200de64d6eca08137d6d0c57febc0b12b23cfdb568f8a05a4f0db35
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
9
+ *.csv
10
+ .tags
11
+ .ruby-version
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ # 0.1.0
2
+
3
+ first version
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ MIT License
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # Row filter plugin for Embulk
2
+
3
+ A filter plugin for Embulk to filter out rows
4
+
5
+ ## Configuration
6
+
7
+ * **conditions**: select only rows which matches with conditions. (support only **AND** conditions)
8
+ * **column**: column name (string, required)
9
+ * **operator** operator (string, optional, default: ==)
10
+ * boolean operator
11
+ * ==
12
+ * !=
13
+ * numeric operator
14
+ * ==
15
+ * !=
16
+ * >
17
+ * >=
18
+ * <=
19
+ * <
20
+ * string operator
21
+ * ==
22
+ * !=
23
+ * start_with
24
+ * end_with
25
+ * include
26
+ * unary operator
27
+ * "IS NULL"
28
+ * "IS NOT NULL"
29
+ * **argument**: argument for the operation (string, required for non-unary operators)
30
+ * **not**: not (boolean, optional, default: false)
31
+ * **format**: special option for timestamp column. (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
32
+ * **timezone**: special option for timestamp column. (string, default is `UTC`)
33
+
34
+ NOTE: column type is automatically retrieved from input data (inputSchema)
35
+
36
+ ## Example
37
+
38
+ ```yaml
39
+ filters:
40
+ - type: row
41
+ conditions:
42
+ - {column: foo, operator: "IS NOT NULL"}
43
+ - {column: id, operator: ">=", argument: 10}
44
+ - {column: id, operator: "<", argument: 20}
45
+ - {column: name, opeartor: "==", argument: foo, not: true}
46
+ - {column: time, operator: "==", argument: "2015-07-13", format: "%Y-%m-%d"}
47
+ ```
48
+
49
+ NOTE: column type is automatically retrieved from input data (inputSchema)
50
+
51
+ ## ToDo
52
+
53
+ * Support OR condition
54
+ * It should be better to think using Query engine like [Apache Drill](https://drill.apache.org/) or [Presto](https://prestodb.io/)
55
+
56
+ ## ChangeLog
57
+
58
+ [CHANGELOG.md](./CHANGELOG.md)
59
+
60
+ ## Development
61
+
62
+ Run example:
63
+
64
+ ```
65
+ $ ./gradlew classpath
66
+ $ embulk run -I lib example.yml
67
+ ```
68
+
69
+ Run test:
70
+
71
+ ```
72
+ $ ./gradlew test
73
+ ```
74
+
75
+ Release gem:
76
+
77
+ ```
78
+ $ ./gradlew gemPush
79
+ ```
data/build.gradle ADDED
@@ -0,0 +1,73 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+ repositories {
8
+ mavenCentral()
9
+ jcenter()
10
+ }
11
+ configurations {
12
+ provided
13
+ }
14
+
15
+ version = "0.1.0"
16
+
17
+ dependencies {
18
+ compile "org.embulk:embulk-core:0.6.16"
19
+ provided "org.embulk:embulk-core:0.6.16"
20
+ // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
21
+ testCompile "junit:junit:4.+"
22
+ }
23
+
24
+ task classpath(type: Copy, dependsOn: ["jar"]) {
25
+ doFirst { file("classpath").deleteDir() }
26
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
27
+ into "classpath"
28
+ }
29
+ clean { delete "classpath" }
30
+
31
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
32
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
33
+ script "${project.name}.gemspec"
34
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
35
+ }
36
+
37
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
38
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
39
+ script "pkg/${project.name}-${project.version}.gem"
40
+ }
41
+
42
+ task "package"(dependsOn: ["gemspec", "classpath"]) << {
43
+ println "> Build succeeded."
44
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
45
+ }
46
+
47
+ task gemspec {
48
+ ext.gemspecFile = file("${project.name}.gemspec")
49
+ inputs.file "build.gradle"
50
+ outputs.file gemspecFile
51
+ doLast { gemspecFile.write($/
52
+ Gem::Specification.new do |spec|
53
+ spec.name = "${project.name}"
54
+ spec.version = "${project.version}"
55
+ spec.authors = ["Naotoshi Seo"]
56
+ spec.summary = %[A filter plugin for Embulk to filter out rows]
57
+ spec.description = %[A filter plugin for Embulk to filter out rows.]
58
+ spec.email = ["sonots@gmail.com"]
59
+ spec.licenses = ["MIT"]
60
+ spec.homepage = "https://github.com/sonots/embulk-filter-row"
61
+
62
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
63
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
64
+ spec.require_paths = ["lib"]
65
+
66
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
67
+ spec.add_development_dependency 'bundler', ['~> 1.0']
68
+ spec.add_development_dependency 'rake', ['>= 10.0']
69
+ end
70
+ /$)
71
+ }
72
+ }
73
+ clean { delete "${project.name}.gemspec" }
data/example.yml ADDED
@@ -0,0 +1,37 @@
1
+ # in:
2
+ # type: random
3
+ # rows: 100
4
+ # schema:
5
+ # id: primary_key
6
+ # name: string
7
+ # score: integer
8
+ in:
9
+ type: file
10
+ path_prefix: example.csv
11
+ parser:
12
+ type: csv
13
+ charset: UTF-8
14
+ newline: CRLF
15
+ null_string: 'NULL'
16
+ skip_header_lines: 1
17
+ comment_line_marker: '#'
18
+ columns:
19
+ - {name: time, type: timestamp, format: "%Y-%m-%d"}
20
+ - {name: foo, type: string}
21
+ - {name: bar, type: string}
22
+ - {name: flag, type: boolean}
23
+ - {name: id, type: long}
24
+ - {name: name, type: string}
25
+ - {name: score, type: double}
26
+ filters:
27
+ - type: row
28
+ conditions:
29
+ - {column: flag, operator: ==}
30
+ - {column: foo, operator: "IS NULL"}
31
+ - {column: id, operator: ==, argument: 97}
32
+ - {column: name, operator: ==, argument: "xxxx", not: true}
33
+ - {column: score, operator: ">", argument: 2000}
34
+ - {column: score, operator: "<", argument: 6000}
35
+ - {column: time, operator: ==, argument: "2015-07-13", format: "%Y-%m-%d"}
36
+ out:
37
+ type: stdout
Binary file
@@ -0,0 +1,6 @@
1
+ #Wed Feb 04 13:46:12 PST 2015
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
data/gradlew ADDED
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
+ if $cygwin ; then
47
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
+ fi
49
+
50
+ # Attempt to set APP_HOME
51
+ # Resolve links: $0 may be a link
52
+ PRG="$0"
53
+ # Need this for relative symlinks.
54
+ while [ -h "$PRG" ] ; do
55
+ ls=`ls -ld "$PRG"`
56
+ link=`expr "$ls" : '.*-> \(.*\)$'`
57
+ if expr "$link" : '/.*' > /dev/null; then
58
+ PRG="$link"
59
+ else
60
+ PRG=`dirname "$PRG"`"/$link"
61
+ fi
62
+ done
63
+ SAVED="`pwd`"
64
+ cd "`dirname \"$PRG\"`/" >&-
65
+ APP_HOME="`pwd -P`"
66
+ cd "$SAVED" >&-
67
+
68
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
+
70
+ # Determine the Java command to use to start the JVM.
71
+ if [ -n "$JAVA_HOME" ] ; then
72
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
73
+ # IBM's JDK on AIX uses strange locations for the executables
74
+ JAVACMD="$JAVA_HOME/jre/sh/java"
75
+ else
76
+ JAVACMD="$JAVA_HOME/bin/java"
77
+ fi
78
+ if [ ! -x "$JAVACMD" ] ; then
79
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
80
+
81
+ Please set the JAVA_HOME variable in your environment to match the
82
+ location of your Java installation."
83
+ fi
84
+ else
85
+ JAVACMD="java"
86
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
87
+
88
+ Please set the JAVA_HOME variable in your environment to match the
89
+ location of your Java installation."
90
+ fi
91
+
92
+ # Increase the maximum file descriptors if we can.
93
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
94
+ MAX_FD_LIMIT=`ulimit -H -n`
95
+ if [ $? -eq 0 ] ; then
96
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
97
+ MAX_FD="$MAX_FD_LIMIT"
98
+ fi
99
+ ulimit -n $MAX_FD
100
+ if [ $? -ne 0 ] ; then
101
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
102
+ fi
103
+ else
104
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
105
+ fi
106
+ fi
107
+
108
+ # For Darwin, add options to specify how the application appears in the dock
109
+ if $darwin; then
110
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
111
+ fi
112
+
113
+ # For Cygwin, switch paths to Windows format before running java
114
+ if $cygwin ; then
115
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "row", "org.embulk.filter.RowFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))