embulk-parser-twitter_ads_stats 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.scalafmt.conf +7 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +79 -0
  6. data/build.gradle +79 -0
  7. data/build.sbt +15 -0
  8. data/classpath/embulk-parser-twitter_ads_stats-0.1.0.jar +0 -0
  9. data/classpath/scala-library-2.12.3.jar +0 -0
  10. data/classpath/spray-json_2.12-1.3.3.jar +0 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +5 -0
  13. data/gradlew +172 -0
  14. data/gradlew.bat +84 -0
  15. data/lib/embulk/guess/twitter_ads_stats.rb +61 -0
  16. data/lib/embulk/parser/twitter_ads_stats.rb +3 -0
  17. data/project/Dependencies.scala +11 -0
  18. data/project/build.properties +1 -0
  19. data/project/plugins.sbt +2 -0
  20. data/src/main/scala/org/embulk/parser/twitter_ads_stats/Column.scala +50 -0
  21. data/src/main/scala/org/embulk/parser/twitter_ads_stats/LoanPattern.scala +19 -0
  22. data/src/main/scala/org/embulk/parser/twitter_ads_stats/MetricElementNames.scala +47 -0
  23. data/src/main/scala/org/embulk/parser/twitter_ads_stats/MetricsGroupJson.scala +19 -0
  24. data/src/main/scala/org/embulk/parser/twitter_ads_stats/ParseException.scala +27 -0
  25. data/src/main/scala/org/embulk/parser/twitter_ads_stats/PluginTask.scala +9 -0
  26. data/src/main/scala/org/embulk/parser/twitter_ads_stats/TwitterAdsStatsParserPlugin.scala +98 -0
  27. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Data.scala +19 -0
  28. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/FieldNameUtil.scala +9 -0
  29. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/IDData.scala +36 -0
  30. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Metrics.scala +23 -0
  31. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Request.scala +42 -0
  32. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Root.scala +21 -0
  33. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/RootJson.scala +107 -0
  34. data/src/main/scala/org/embulk/parser/twitter_ads_stats/package.scala +192 -0
  35. data/src/test/resources/test.json +759 -0
  36. data/src/test/scala/org/embulk/parser/twitter_ads_stats/ColumnSpec.scala +33 -0
  37. data/src/test/scala/org/embulk/parser/twitter_ads_stats/MetricElementNamesSpec.scala +14 -0
  38. data/src/test/scala/org/embulk/parser/twitter_ads_stats/MetricsGroupJsonSpec.scala +20 -0
  39. data/src/test/scala/org/embulk/parser/twitter_ads_stats/UnitSpec.scala +5 -0
  40. data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/MetricsJsonSpec.scala +57 -0
  41. data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/ParamsSpec.scala +26 -0
  42. data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/RootJsonSpec.scala +20 -0
  43. data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/RootSpec.scala +356 -0
  44. metadata +114 -0
@@ -0,0 +1,61 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "twitter_ads_stats" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ # class TwitterAdsStats < GuessPlugin
12
+ # Plugin.register_guess("twitter_ads_stats", self)
13
+ #
14
+ # def guess(config, sample_buffer)
15
+ # if sample_buffer[0,2] == GZIP_HEADER
16
+ # guessed = {}
17
+ # guessed["type"] = "twitter_ads_stats"
18
+ # guessed["property1"] = "guessed-value"
19
+ # return {"parser" => guessed}
20
+ # else
21
+ # return {}
22
+ # end
23
+ # end
24
+ # end
25
+
26
+ # class TwitterAdsStats < TextGuessPlugin
27
+ # Plugin.register_guess("twitter_ads_stats", self)
28
+ #
29
+ # def guess_text(config, sample_text)
30
+ # js = JSON.parse(sample_text) rescue nil
31
+ # if js && js["mykeyword"] == "keyword"
32
+ # guessed = {}
33
+ # guessed["type"] = "twitter_ads_stats"
34
+ # guessed["property1"] = "guessed-value"
35
+ # return {"parser" => guessed}
36
+ # else
37
+ # return {}
38
+ # end
39
+ # end
40
+ # end
41
+
42
+ # class TwitterAdsStats < LineGuessPlugin
43
+ # Plugin.register_guess("twitter_ads_stats", self)
44
+ #
45
+ # def guess_lines(config, sample_lines)
46
+ # all_line_matched = sample_lines.all? do |line|
47
+ # line =~ /mypattern/
48
+ # end
49
+ # if all_line_matched
50
+ # guessed = {}
51
+ # guessed["type"] = "twitter_ads_stats"
52
+ # guessed["property1"] = "guessed-value"
53
+ # return {"parser" => guessed}
54
+ # else
55
+ # return {}
56
+ # end
57
+ # end
58
+ # end
59
+
60
+ end
61
+ end
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_parser(
2
+ "twitter_ads_stats", "org.embulk.parser.twitter_ads_stats.TwitterAdsStatsParserPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,11 @@
1
+ import sbt._
2
+
3
+ object Dependencies {
4
+
5
+ val value = Seq(
6
+ "org.scalatest" %% "scalatest" % "3.0.1" % Test,
7
+ "org.embulk" % "embulk-core" % "0.8.35",
8
+ "org.embulk" % "embulk-core" % "0.8.35" classifier "tests",
9
+ "io.spray" %% "spray-json" % "1.3.3"
10
+ )
11
+ }
@@ -0,0 +1 @@
1
+ sbt.version=0.13.16
@@ -0,0 +1,2 @@
1
+ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2")
2
+ addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.12")
@@ -0,0 +1,50 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import java.time.LocalDate
4
+
5
+ import org.embulk.spi.`type`.Types
6
+ import org.embulk.spi.{Column => EmbulkColumn}
7
+
8
+ object Column {
9
+ def createEmbulkColumns(metricElementNames: MetricElementNames): Seq[EmbulkColumn] = {
10
+ @scala.annotation.tailrec
11
+ def loop(
12
+ curIndex: Int,
13
+ curNames: List[String],
14
+ acc: Seq[EmbulkColumn]
15
+ ): Seq[EmbulkColumn] = {
16
+ curNames match {
17
+ case Nil => acc.reverse
18
+ case x :: xs =>
19
+ loop(
20
+ curIndex + 1,
21
+ xs,
22
+ new EmbulkColumn(curIndex, x, Types.JSON) +: acc
23
+ )
24
+ }
25
+ }
26
+
27
+
28
+ val baseColumns = Seq(
29
+ new EmbulkColumn(0, "id", Types.STRING),
30
+ new EmbulkColumn(1, "date", Types.STRING),
31
+ new EmbulkColumn(2, "segment", Types.STRING),
32
+ new EmbulkColumn(3, "placement", Types.STRING)
33
+ )
34
+
35
+ val metricsColumns = loop(
36
+ 4,
37
+ metricElementNames.getSortedMetricsGroupNames,
38
+ Nil
39
+ )
40
+ baseColumns ++ metricsColumns
41
+ }
42
+ }
43
+
44
+ case class Column(
45
+ id: String,
46
+ date: LocalDate,
47
+ segment: Option[String],
48
+ placement: String,
49
+ metricsGroup: Map[String, MetricsGroup]
50
+ )
@@ -0,0 +1,19 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import scala.util.control.Exception.ignoring
4
+
5
+ object LoanPattern {
6
+
7
+ type Closable = { def close(): Unit }
8
+
9
+ def apply[R <: Closable, A](resource: R)(f: R => A): A = {
10
+ try {
11
+ f(resource)
12
+ } finally {
13
+ ignoring(classOf[Throwable]) apply {
14
+ resource.close()
15
+ }
16
+ }
17
+ }
18
+
19
+ }
@@ -0,0 +1,47 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import org.embulk.parser.twitter_ads_stats.define.Metrics
4
+ import spray.json.{JsObject, JsValue}
5
+
6
+ /**
7
+ * Metricの要素名
8
+ *
9
+ *
10
+ * @param names key: MetricsGroup名 value: Metricの要素s
11
+ * (example)
12
+ * Map(
13
+ * "engagement" -> Seq("engagements", "impressions"),
14
+ * "web_conversion" -> Seq("conversion_purchases.assisted", "conversion_sign_ups.assisted")
15
+ * )
16
+ */
17
+ case class MetricElementNames(names: Map[String, Seq[String]]) {
18
+
19
+ import MetricElementNames._
20
+
21
+ def getSortedMetricsGroupNames:List[String] = names.keys.toList.sorted
22
+
23
+ private[twitter_ads_stats] def resolveMetrics(resolveMetricTimeSeries: (List[String], Option[JsValue]) => MetricTimeSeries, json: JsObject): Metrics =
24
+ Metrics(
25
+ names.flatMap { v =>
26
+ v._2.map { value =>
27
+ val keys = splitSeparator(value)
28
+ (keys.mkString(separator), resolveMetricTimeSeries(keys, Some(json)))
29
+ }
30
+ }
31
+ )
32
+ }
33
+
34
+ object MetricElementNames {
35
+
36
+ private val separator = "_"
37
+
38
+ private def splitSeparator(name: String): List[String] = {
39
+ name.split("[.+]").toList
40
+ }
41
+
42
+ // @todo MetricElementNamesのスコープ内に閉じられるように
43
+ // @todo 例外処理
44
+ def replaceSeparator(name: String): String = {
45
+ name.replaceAll("[.]", separator)
46
+ }
47
+ }
@@ -0,0 +1,19 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import spray.json.DefaultJsonProtocol._
4
+ import spray.json._
5
+
6
+ object MetricsGroupJson {
7
+ implicit object MetricsGroupJsonWriter extends RootJsonWriter[MetricsGroup] {
8
+
9
+ private def toJValue(element: Option[Long]): JsValue = element match {
10
+ case Some(e) => e.toJson
11
+ case None => JsNull
12
+ }
13
+
14
+ override def write(obj: MetricsGroup): JsValue = {
15
+ val jsFields = obj.map(v => (v._1, toJValue(v._2)))
16
+ JsObject(jsFields)
17
+ }
18
+ }
19
+ }
@@ -0,0 +1,27 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ sealed abstract class ParseException(
4
+ message: String,
5
+ cause: Throwable
6
+ ) extends Throwable
7
+
8
+
9
+ case class InvalidMetricTimeSeriesException(
10
+ message: String,
11
+ cause: Throwable
12
+ ) extends ParseException(message, cause) {
13
+ def this(cause: Throwable, index: Int) = {
14
+ this(s"Not Found index: $index", cause)
15
+ }
16
+ }
17
+
18
+ case class InvalidInputFileException(
19
+ message: String,
20
+ cause: Throwable
21
+ )
22
+ extends ParseException(message, cause) {
23
+
24
+ def this(cause: Throwable) = {
25
+ this(s"Input file can't parse", cause)
26
+ }
27
+ }
@@ -0,0 +1,9 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import org.embulk.config.{Config, ConfigDefault, Task}
4
+
5
+ trait PluginTask extends Task {
6
+ @Config("stop_on_invalid_record")
7
+ @ConfigDefault("false")
8
+ def getStopOnInvalidRecord: Boolean
9
+ }
@@ -0,0 +1,98 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import org.embulk.config._
4
+ import org.embulk.spi._
5
+ import org.embulk.spi.json.JsonParser
6
+ import org.embulk.spi.util.FileInputInputStream
7
+ import org.slf4j.Logger
8
+ import spray.json.{pimpAny, pimpString}
9
+
10
+ import scala.util.control.NonFatal
11
+ import scala.collection.JavaConverters._
12
+ import MetricsGroupJson._
13
+ import org.embulk.parser.twitter_ads_stats.define.{Root, RootJson}
14
+
15
+ class TwitterAdsStatsParserPlugin extends ParserPlugin {
16
+
17
+ import TwitterAdsStatsParserPlugin._
18
+
19
+ override def transaction(config: ConfigSource, control: ParserPlugin.Control): Unit = {
20
+ val task = config.loadConfig(classOf[PluginTask])
21
+ control.run(
22
+ task.dump(),
23
+ new Schema(Column.createEmbulkColumns(metricElementNames).asJava)
24
+ )
25
+ }
26
+
27
+ override def run(taskSource: TaskSource, schema: Schema, input: FileInput, output: PageOutput): Unit = {
28
+
29
+ val task = taskSource.loadTask(classOf[PluginTask])
30
+ val stopOnInvalidRecord = task.getStopOnInvalidRecord
31
+
32
+ LoanPattern(new PageBuilder(Exec.getBufferAllocator, schema, output)) { pb =>
33
+ while (input.nextFile()) {
34
+ (for {
35
+ root <- createRootFrom(input)
36
+ columns <- root.resolveColumns(metricElementNames)
37
+ } yield addRecord(pb, columns, root)) match {
38
+ case Right(_) =>
39
+ case Left(e) =>
40
+ if (stopOnInvalidRecord) {
41
+ throw new DataException(e.getMessage, e)
42
+ } else {
43
+ logger.warn(s"Skipped invalid record $e")
44
+ }
45
+ }
46
+ }
47
+ pb.finish()
48
+ }
49
+ }
50
+
51
+ private def addRecord(pb: PageBuilder, columns: Seq[Column], root: Root): Unit = {
52
+ columns.foreach { column =>
53
+ Column.createEmbulkColumns(metricElementNames).foreach { embulkColumn =>
54
+ (column, embulkColumn.getName) match {
55
+ case (Column(id, _, _, _, _), "id") =>
56
+ pb.setString(embulkColumn, id)
57
+ case (Column(_, date, _, _,_), "date") =>
58
+ pb.setString(embulkColumn, date.toString)
59
+ case (Column(_, _, Some(segment),_, _), "segment") =>
60
+ pb.setString(embulkColumn, segment)
61
+ case (Column(_, _, None,_, _), "segment") =>
62
+ pb.setNull(embulkColumn)
63
+ case (Column(_, _, _,placement, _), "placement") =>
64
+ pb.setString(embulkColumn, placement)
65
+ case (Column(_, _, _, _,metricsGroup), key) =>
66
+ metricsGroup.get(key) match {
67
+ case Some(m) =>
68
+ pb.setJson(
69
+ embulkColumn,
70
+ jsonParser.parse(m.toJson.compactPrint)
71
+ )
72
+ case None =>
73
+ pb.setNull(embulkColumn)
74
+
75
+ }
76
+ case _ => throw new RuntimeException
77
+ }
78
+ }
79
+ pb.addRecord()
80
+ }
81
+ }
82
+
83
+ private def createRootFrom(input: FileInput): Either[ParseException, Root] = {
84
+ val stream = new FileInputInputStream(input)
85
+ try {
86
+ val jsValue = scala.io.Source.fromInputStream(stream).mkString.parseJson
87
+ val root = new RootJson(metricElementNames).RootReader.read(jsValue)
88
+ Right(root)
89
+ } catch {
90
+ case NonFatal(e) => Left(new InvalidInputFileException(e))
91
+ }
92
+ }
93
+ }
94
+
95
+ object TwitterAdsStatsParserPlugin {
96
+ val logger: Logger = Exec.getLogger(classOf[TwitterAdsStatsParserPlugin])
97
+ val jsonParser = new JsonParser
98
+ }
@@ -0,0 +1,19 @@
1
+ package org.embulk.parser.twitter_ads_stats.define
2
+
3
+ import org.embulk.parser.twitter_ads_stats.{Column, MetricElementNames, ParseException}
4
+
5
+ case class Data(id: String, id_data: Seq[IDData]) {
6
+ private[define] def resolveColumns(metricElementNames: MetricElementNames, request: Request): Either[ParseException, Seq[Column]] = {
7
+ id_data.map { idData =>
8
+ idData.resolveColumns(id, metricElementNames, request)
9
+ }.foldRight[Either[ParseException, Seq[Column]]](Right(Nil)) {
10
+ case (Left(e), _) => Left(e)
11
+ case (Right(_), Left(e)) => Left(e)
12
+ case (Right(seq1), Right(seq2)) => Right(seq1 ++: seq2)
13
+ }
14
+ }
15
+ }
16
+
17
+ object Data {
18
+ val fieldNames:Array[String] = FieldNameUtil.fieldList[Data]
19
+ }
@@ -0,0 +1,9 @@
1
+ package org.embulk.parser.twitter_ads_stats.define
2
+
3
+ import scala.reflect.ClassTag
4
+
5
+ object FieldNameUtil {
6
+ //Innerクラスは利用しないこと
7
+ def fieldList[T](implicit tag: ClassTag[T]): Array[String] =
8
+ tag.runtimeClass.getDeclaredFields.map(_.getName)
9
+ }
@@ -0,0 +1,36 @@
1
+ package org.embulk.parser.twitter_ads_stats.define
2
+
3
+ import java.time.LocalDate
4
+
5
+ import org.embulk.parser.twitter_ads_stats._
6
+
7
+ case class IDData(metrics: Metrics, segment: Option[String]) {
8
+ private def resolveColumn(
9
+ id: String,
10
+ metricElementNames: MetricElementNames,
11
+ date: (LocalDate, Int),
12
+ placement: String
13
+ ): Either[ParseException, Column] = {
14
+ metricElementNames.names.map { name =>
15
+ (name._1, metrics.findMetricsGroup(date._2, name._2))
16
+ }.foldRight[Either[ParseException, Map[String, MetricsGroup]]](Right(Map.empty)) {
17
+ case ((_, Left(e)), _) => Left(e)
18
+ case ((_, Right(_)), Left(e)) => Left(e)
19
+ case ((s, Right(r)), Right(acc)) => Right(acc + (s -> r))
20
+ }.map(Column(id, date._1, segment, placement, _))
21
+ }
22
+
23
+ private[define] def resolveColumns(id: String, metricElementNames: MetricElementNames, request: Request): Either[ParseException, Seq[Column]] = {
24
+ request.params.targetDates.zipWithIndex.map { date =>
25
+ resolveColumn(id, metricElementNames, date, request.params.placement)
26
+ }.foldRight[Either[ParseException, Seq[Column]]](Right(Nil)) {
27
+ case (Left(e), _) => Left(e)
28
+ case (Right(_), Left(e)) => Left(e)
29
+ case (Right(a), Right(seq)) => Right(a +: seq)
30
+ }
31
+ }
32
+ }
33
+
34
+ object IDData {
35
+ val fieldNames: Array[String] = FieldNameUtil.fieldList[IDData]
36
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.parser.twitter_ads_stats.define
2
+
3
+ import org.embulk.parser.twitter_ads_stats._
4
+
5
+ import scala.util.control.NonFatal
6
+
7
+ case class Metrics(map: Map[String, MetricTimeSeries]) {
8
+
9
+ private[define] def findMetricsGroup(index: Int, metricNames: Seq[String]): Either[ParseException, MetricsGroup] = {
10
+ try {
11
+ val metricsGroup = metricNames.map { name =>
12
+ val n = MetricElementNames.replaceSeparator(name)
13
+ (
14
+ n,
15
+ map.get(n).flatten.map(v => v(index))
16
+ )
17
+ }.toMap
18
+ Right(metricsGroup)
19
+ } catch {
20
+ case NonFatal(e) => Left(new InvalidMetricTimeSeriesException(e, index))
21
+ }
22
+ }
23
+ }