embulk-parser-firebase_avro 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +27 -0
  3. data/.gitignore +80 -0
  4. data/.scalafmt.conf +2 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +52 -0
  7. data/build.gradle +81 -0
  8. data/build.sbt +29 -0
  9. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  10. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  11. data/gradlew +169 -0
  12. data/gradlew.bat +84 -0
  13. data/lib/embulk/parser/firebase_avro.rb +3 -0
  14. data/project/build.properties +1 -0
  15. data/project/plugins.sbt +3 -0
  16. data/src/main/scala/org/embulk/parser/firebase_avro/FirebaseAvroParserPlugin.scala +66 -0
  17. data/src/main/scala/org/embulk/parser/firebase_avro/LoanPattern.scala +19 -0
  18. data/src/main/scala/org/embulk/parser/firebase_avro/Parser.scala +111 -0
  19. data/src/main/scala/org/embulk/parser/firebase_avro/PluginTask.scala +5 -0
  20. data/src/main/scala/org/embulk/parser/firebase_avro/ValueHolder.scala +5 -0
  21. data/src/main/scala/org/embulk/parser/firebase_avro/column/Column.scala +61 -0
  22. data/src/main/scala/org/embulk/parser/firebase_avro/column/Columns.scala +47 -0
  23. data/src/main/scala/org/embulk/parser/firebase_avro/column/EventDimension.scala +19 -0
  24. data/src/main/scala/org/embulk/parser/firebase_avro/column/UserDimension.scala +28 -0
  25. data/src/main/scala/org/embulk/parser/firebase_avro/define/Root.scala +5 -0
  26. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/Event_Dim.scala +8 -0
  27. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/User_Dim.scala +11 -0
  28. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/event_dim/Params.scala +3 -0
  29. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/event_dim/params/Value.scala +6 -0
  30. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/App_Info.scala +7 -0
  31. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/Bundle_Info.scala +3 -0
  32. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/Device_Info.scala +13 -0
  33. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/Geo_Info.scala +3 -0
  34. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/Ltv_Info.scala +3 -0
  35. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/Traffic_Source.scala +5 -0
  36. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/User_Properties.scala +3 -0
  37. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/user_properties/Value.scala +5 -0
  38. data/src/main/scala/org/embulk/parser/firebase_avro/define/root/user_dim/user_properties/value/Value.scala +6 -0
  39. data/src/main/scala/org/embulk/parser/firebase_avro/json/CustomEncoder.scala +21 -0
  40. data/src/main/scala/org/embulk/parser/firebase_avro/json/event_dim/EventParmsJsonSerializer.scala +29 -0
  41. data/src/main/scala/org/embulk/parser/firebase_avro/json/user_dim/UserPropertiesJsonSerializer.scala +34 -0
  42. data/src/test/scala/org/embulk/parser/firebase_avro/Implicitly.scala +9 -0
  43. data/src/test/scala/org/embulk/parser/firebase_avro/ParserTest.scala +22 -0
  44. data/src/test/scala/org/embulk/parser/firebase_avro/column/ColumnsTest.scala +18 -0
  45. data/src/test/scala/org/embulk/parser/firebase_avro/json/event_dim/EventParmsJsonSerializerTest.scala +19 -0
  46. metadata +138 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bc37d39c561c3baffe826d4863039ec08b50c8c6
4
+ data.tar.gz: 777a25aa85bfde53d3e1cc1700af2ebda49dfb2a
5
+ SHA512:
6
+ metadata.gz: 08eab50559504dc4a69c1f283328d9dafda3fac44b8c1541c428414bc84b32031dcd626c91f49498c92271612fdd2b4376bce50beef955df68409b5e0b59776d
7
+ data.tar.gz: f358e043d34b744f373671a614e0c9a1735a18420126c99844acb5dcc537e8a31be055bb73307708204acdc0e3be22509b70b9395c210d8c76e6ec360eff9870
@@ -0,0 +1,27 @@
1
+ version: 2
2
+ jobs:
3
+ build:
4
+ executorType: docker
5
+ docker:
6
+ - image: hseeberger/scala-sbt
7
+ working_directory: /root/embulk-parser-firebase_avro/
8
+ steps:
9
+ - checkout
10
+ - restore_cache:
11
+ name: Restoring Cache
12
+ keys:
13
+ - sbt
14
+ - setup_remote_docker
15
+ - run:
16
+ name: prepare
17
+ command: sbt update exit
18
+ - save_cache:
19
+ name: Saving Cache sbt
20
+ key: sbt
21
+ paths:
22
+ - "/root/.sbt"
23
+ - "/root/.ivy2"
24
+ - run:
25
+ name: compile
26
+ command: |
27
+ sbt compile test scalafmt::test exit
data/.gitignore ADDED
@@ -0,0 +1,80 @@
1
+ /pkg/
2
+ /tmp/
3
+ *.gemspec
4
+ .gradle/
5
+ /classpath/
6
+ build/
7
+ .idea
8
+ /.settings/
9
+ /.metadata/
10
+ .classpath
11
+ .project
12
+
13
+ .settings
14
+ .classpath
15
+ .project
16
+ *.iml
17
+ *.ipr
18
+ *.iws
19
+ dist/
20
+ lib_managed/
21
+ project/boot/
22
+ project/plugins/project/
23
+ target/
24
+
25
+ # use glob syntax.
26
+ syntax: glob
27
+ *.ser
28
+ *.class
29
+ *~
30
+ *.bak
31
+ #*.off
32
+ *.old
33
+
34
+ # eclipse conf file
35
+ .settings
36
+ .classpath
37
+ .project
38
+ .manager
39
+ .scala_dependencies
40
+
41
+ # idea
42
+ .idea
43
+ *.iml
44
+
45
+ # building
46
+ target
47
+ build
48
+ null
49
+ tmp*
50
+ temp*
51
+ !templates/
52
+ dist
53
+ test-output
54
+ build.log
55
+
56
+ # other scm
57
+ .svn
58
+ .CVS
59
+ .hg*
60
+
61
+ # switch to regexp syntax.
62
+ # syntax: regexp
63
+ # ^\.pc/
64
+
65
+ #SHITTY output not in target directory
66
+ build.log
67
+ .DS_Store
68
+ derby.log
69
+
70
+ *.db
71
+
72
+ .lib
73
+ sbt
74
+
75
+ logs
76
+ sandbox/db
77
+
78
+
79
+ .ensime*⏎
80
+ project/project/
data/.scalafmt.conf ADDED
@@ -0,0 +1,2 @@
1
+ style = defaultWithAlign
2
+ maxColumn = 120
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # Firebase Avro parser plugin for Embulk
2
+
3
+ This parser plug-in supported to Firebase extracted file of *AVRO* format from Google-BigQuery.
4
+ Currently (2017/6), Firebase could extract only GCS(Google Cloud Storage) to JSON or AVRO format.
5
+ If you want to export Firebase's data, it has to need data pipelining for in the below steps.
6
+
7
+ 1. Link BigQuery with Firebase.
8
+ - https://support.google.com/firebase/answer/6318765
9
+ 1. Dump Firebase Data to Google Cloud Storage.
10
+ - Use data pipelining with job scheduler and extract data from BigQuery.
11
+ - https://support.google.com/firebase/answer/7029846
12
+
13
+ 1. Use input-gcs plugin with this parser-plugin.
14
+ - https://github.com/embulk/embulk-input-gcs
15
+
16
+
17
+ ## Overview
18
+
19
+ * **Plugin type**: parser
20
+ * **Guess supported**: no
21
+
22
+ ## Configuration
23
+ - No configuration
24
+
25
+ ## Example
26
+
27
+ ```yaml
28
+ in:
29
+ type: any file input plugin type
30
+ parser:
31
+ type: firebase_avro
32
+ ```
33
+
34
+
35
+ ```
36
+ $ embulk gem install embulk-parser-firebase_avro
37
+ ```
38
+
39
+ ## Build
40
+
41
+ ```
42
+ $ ./gradlew gem # -t to watch change of files and rebuild continuously
43
+ ```
44
+
45
+ ## Developing or Testing
46
+
47
+ This plug-in is written by Scala. You could use sbt.
48
+
49
+ ```
50
+ $ ./sbt
51
+ $ ./sbt test
52
+ ```
data/build.gradle ADDED
@@ -0,0 +1,81 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ id "scala"
6
+ }
7
+ import com.github.jrubygradle.JRubyExec
8
+ repositories {
9
+ mavenCentral()
10
+ jcenter()
11
+ }
12
+ configurations {
13
+ provided
14
+ }
15
+
16
+ version = "0.1.0"
17
+
18
+ sourceCompatibility = 1.7
19
+ targetCompatibility = 1.7
20
+
21
+ dependencies {
22
+ compile "org.embulk:embulk-core:0.8.22"
23
+ compile "org.scala-lang:scala-library:2.11.11"
24
+ compile group: 'com.sksamuel.avro4s', name: 'avro4s-core_2.11', version: '1.6.4'
25
+ compile group: 'io.circe', name: 'circe-core_2.11', version: '0.8.0'
26
+ compile group: 'io.circe', name: 'circe-generic_2.11', version: '0.8.0'
27
+ provided "org.embulk:embulk-core:0.8.22"
28
+ }
29
+
30
+ task classpath(type: Copy, dependsOn: ["jar"]) {
31
+ doFirst { file("classpath").deleteDir() }
32
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
33
+ into "classpath"
34
+ }
35
+ clean { delete "classpath" }
36
+
37
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
38
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
39
+ script "${project.name}.gemspec"
40
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
41
+ }
42
+
43
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
44
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
45
+ script "pkg/${project.name}-${project.version}.gem"
46
+ }
47
+
48
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
49
+ doLast {
50
+ println "> Build succeeded."
51
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
52
+ }
53
+ }
54
+
55
+ task gemspec {
56
+ ext.gemspecFile = file("${project.name}.gemspec")
57
+ inputs.file "build.gradle"
58
+ outputs.file gemspecFile
59
+ doLast { gemspecFile.write($/
60
+ Gem::Specification.new do |spec|
61
+ spec.name = "${project.name}"
62
+ spec.version = "${project.version}"
63
+ spec.authors = ["smdmts"]
64
+ spec.summary = %[Firebase Avro parser plugin for Embulk]
65
+ spec.description = %[Parses Firebase Avro files read by other file input plugins.]
66
+ spec.email = ["smdmts@gmail.com"]
67
+ spec.licenses = ["MIT"]
68
+ spec.homepage = "https://github.com/smdmts/embulk-parser-firebase_avro"
69
+
70
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
71
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
72
+ spec.require_paths = ["lib"]
73
+
74
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
75
+ spec.add_development_dependency 'bundler', ['~> 1.0']
76
+ spec.add_development_dependency 'rake', ['>= 10.0']
77
+ end
78
+ /$)
79
+ }
80
+ }
81
+ clean { delete "${project.name}.gemspec" }
data/build.sbt ADDED
@@ -0,0 +1,29 @@
1
+ lazy val root = (project in file(".")).
2
+ settings(
3
+ inThisBuild(List(
4
+ organization := "com.example",
5
+ scalaVersion := "2.11.11",
6
+ version := "0.1.0-SNAPSHOT"
7
+ )),
8
+ name := "embulk-parser-firebase_avro"
9
+ )
10
+
11
+ enablePlugins(ScalafmtPlugin)
12
+
13
+ resolvers += Resolver.jcenterRepo
14
+ resolvers += Resolver.sonatypeRepo("releases")
15
+
16
+
17
+ lazy val circeVersion = "0.8.0"
18
+ libraryDependencies ++= Seq(
19
+ "com.sksamuel.avro4s" %% "avro4s-core" % "1.6.4",
20
+ "org.jruby" % "jruby-complete" % "1.6.5",
21
+ "org.embulk" % "embulk-core" % "0.8.22",
22
+ "com.chuusai" %% "shapeless" % "2.3.2",
23
+ "io.circe" %% "circe-core" % circeVersion,
24
+ "io.circe" %% "circe-generic" % circeVersion,
25
+ "org.scalacheck" %% "scalacheck" % "1.13.4" % Test,
26
+ "org.scalatest" %% "scalatest" % "3.0.1" % Test,
27
+ "org.scalamock" %% "scalamock-scalatest-support" % "3.6.0" % Test,
28
+ "com.github.alexarchambault" %% "scalacheck-shapeless_1.13" % "1.1.5" % Test
29
+ )
Binary file
@@ -0,0 +1,6 @@
1
+ #Fri Jun 09 11:40:40 JST 2017
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-all.zip
data/gradlew ADDED
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
26
+
27
+ APP_NAME="Gradle"
28
+ APP_BASE_NAME=`basename "$0"`
29
+
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
33
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
34
+ MAX_FD="maximum"
35
+
36
+ warn ( ) {
37
+ echo "$*"
38
+ }
39
+
40
+ die ( ) {
41
+ echo
42
+ echo "$*"
43
+ echo
44
+ exit 1
45
+ }
46
+
47
+ # OS specific support (must be 'true' or 'false').
48
+ cygwin=false
49
+ msys=false
50
+ darwin=false
51
+ nonstop=false
52
+ case "`uname`" in
53
+ CYGWIN* )
54
+ cygwin=true
55
+ ;;
56
+ Darwin* )
57
+ darwin=true
58
+ ;;
59
+ MINGW* )
60
+ msys=true
61
+ ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
65
+ esac
66
+
67
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68
+
69
+ # Determine the Java command to use to start the JVM.
70
+ if [ -n "$JAVA_HOME" ] ; then
71
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72
+ # IBM's JDK on AIX uses strange locations for the executables
73
+ JAVACMD="$JAVA_HOME/jre/sh/java"
74
+ else
75
+ JAVACMD="$JAVA_HOME/bin/java"
76
+ fi
77
+ if [ ! -x "$JAVACMD" ] ; then
78
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79
+
80
+ Please set the JAVA_HOME variable in your environment to match the
81
+ location of your Java installation."
82
+ fi
83
+ else
84
+ JAVACMD="java"
85
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86
+
87
+ Please set the JAVA_HOME variable in your environment to match the
88
+ location of your Java installation."
89
+ fi
90
+
91
+ # Increase the maximum file descriptors if we can.
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93
+ MAX_FD_LIMIT=`ulimit -H -n`
94
+ if [ $? -eq 0 ] ; then
95
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96
+ MAX_FD="$MAX_FD_LIMIT"
97
+ fi
98
+ ulimit -n $MAX_FD
99
+ if [ $? -ne 0 ] ; then
100
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
101
+ fi
102
+ else
103
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104
+ fi
105
+ fi
106
+
107
+ # For Darwin, add options to specify how the application appears in the dock
108
+ if $darwin; then
109
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110
+ fi
111
+
112
+ # For Cygwin, switch paths to Windows format before running java
113
+ if $cygwin ; then
114
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
165
+ if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
166
+ cd "$(dirname "$0")"
167
+ fi
168
+
169
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"