embulk-parser-twitter_ads_stats 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.scalafmt.conf +7 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +79 -0
  6. data/build.gradle +79 -0
  7. data/build.sbt +15 -0
  8. data/classpath/embulk-parser-twitter_ads_stats-0.1.0.jar +0 -0
  9. data/classpath/scala-library-2.12.3.jar +0 -0
  10. data/classpath/spray-json_2.12-1.3.3.jar +0 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +5 -0
  13. data/gradlew +172 -0
  14. data/gradlew.bat +84 -0
  15. data/lib/embulk/guess/twitter_ads_stats.rb +61 -0
  16. data/lib/embulk/parser/twitter_ads_stats.rb +3 -0
  17. data/project/Dependencies.scala +11 -0
  18. data/project/build.properties +1 -0
  19. data/project/plugins.sbt +2 -0
  20. data/src/main/scala/org/embulk/parser/twitter_ads_stats/Column.scala +50 -0
  21. data/src/main/scala/org/embulk/parser/twitter_ads_stats/LoanPattern.scala +19 -0
  22. data/src/main/scala/org/embulk/parser/twitter_ads_stats/MetricElementNames.scala +47 -0
  23. data/src/main/scala/org/embulk/parser/twitter_ads_stats/MetricsGroupJson.scala +19 -0
  24. data/src/main/scala/org/embulk/parser/twitter_ads_stats/ParseException.scala +27 -0
  25. data/src/main/scala/org/embulk/parser/twitter_ads_stats/PluginTask.scala +9 -0
  26. data/src/main/scala/org/embulk/parser/twitter_ads_stats/TwitterAdsStatsParserPlugin.scala +98 -0
  27. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Data.scala +19 -0
  28. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/FieldNameUtil.scala +9 -0
  29. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/IDData.scala +36 -0
  30. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Metrics.scala +23 -0
  31. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Request.scala +42 -0
  32. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Root.scala +21 -0
  33. data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/RootJson.scala +107 -0
  34. data/src/main/scala/org/embulk/parser/twitter_ads_stats/package.scala +192 -0
  35. data/src/test/resources/test.json +759 -0
  36. data/src/test/scala/org/embulk/parser/twitter_ads_stats/ColumnSpec.scala +33 -0
  37. data/src/test/scala/org/embulk/parser/twitter_ads_stats/MetricElementNamesSpec.scala +14 -0
  38. data/src/test/scala/org/embulk/parser/twitter_ads_stats/MetricsGroupJsonSpec.scala +20 -0
  39. data/src/test/scala/org/embulk/parser/twitter_ads_stats/UnitSpec.scala +5 -0
  40. data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/MetricsJsonSpec.scala +57 -0
  41. data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/ParamsSpec.scala +26 -0
  42. data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/RootJsonSpec.scala +20 -0
  43. data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/RootSpec.scala +356 -0
  44. metadata +114 -0
@@ -0,0 +1,61 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "twitter_ads_stats" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ # class TwitterAdsStats < GuessPlugin
12
+ # Plugin.register_guess("twitter_ads_stats", self)
13
+ #
14
+ # def guess(config, sample_buffer)
15
+ # if sample_buffer[0,2] == GZIP_HEADER
16
+ # guessed = {}
17
+ # guessed["type"] = "twitter_ads_stats"
18
+ # guessed["property1"] = "guessed-value"
19
+ # return {"parser" => guessed}
20
+ # else
21
+ # return {}
22
+ # end
23
+ # end
24
+ # end
25
+
26
+ # class TwitterAdsStats < TextGuessPlugin
27
+ # Plugin.register_guess("twitter_ads_stats", self)
28
+ #
29
+ # def guess_text(config, sample_text)
30
+ # js = JSON.parse(sample_text) rescue nil
31
+ # if js && js["mykeyword"] == "keyword"
32
+ # guessed = {}
33
+ # guessed["type"] = "twitter_ads_stats"
34
+ # guessed["property1"] = "guessed-value"
35
+ # return {"parser" => guessed}
36
+ # else
37
+ # return {}
38
+ # end
39
+ # end
40
+ # end
41
+
42
+ # class TwitterAdsStats < LineGuessPlugin
43
+ # Plugin.register_guess("twitter_ads_stats", self)
44
+ #
45
+ # def guess_lines(config, sample_lines)
46
+ # all_line_matched = sample_lines.all? do |line|
47
+ # line =~ /mypattern/
48
+ # end
49
+ # if all_line_matched
50
+ # guessed = {}
51
+ # guessed["type"] = "twitter_ads_stats"
52
+ # guessed["property1"] = "guessed-value"
53
+ # return {"parser" => guessed}
54
+ # else
55
+ # return {}
56
+ # end
57
+ # end
58
+ # end
59
+
60
+ end
61
+ end
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_parser(
2
+ "twitter_ads_stats", "org.embulk.parser.twitter_ads_stats.TwitterAdsStatsParserPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,11 @@
1
+ import sbt._
2
+
3
+ object Dependencies {
4
+
5
+ val value = Seq(
6
+ "org.scalatest" %% "scalatest" % "3.0.1" % Test,
7
+ "org.embulk" % "embulk-core" % "0.8.35",
8
+ "org.embulk" % "embulk-core" % "0.8.35" classifier "tests",
9
+ "io.spray" %% "spray-json" % "1.3.3"
10
+ )
11
+ }
@@ -0,0 +1 @@
1
+ sbt.version=0.13.16
@@ -0,0 +1,2 @@
1
+ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2")
2
+ addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.12")
@@ -0,0 +1,50 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import java.time.LocalDate
4
+
5
+ import org.embulk.spi.`type`.Types
6
+ import org.embulk.spi.{Column => EmbulkColumn}
7
+
8
+ object Column {
9
+ def createEmbulkColumns(metricElementNames: MetricElementNames): Seq[EmbulkColumn] = {
10
+ @scala.annotation.tailrec
11
+ def loop(
12
+ curIndex: Int,
13
+ curNames: List[String],
14
+ acc: Seq[EmbulkColumn]
15
+ ): Seq[EmbulkColumn] = {
16
+ curNames match {
17
+ case Nil => acc.reverse
18
+ case x :: xs =>
19
+ loop(
20
+ curIndex + 1,
21
+ xs,
22
+ new EmbulkColumn(curIndex, x, Types.JSON) +: acc
23
+ )
24
+ }
25
+ }
26
+
27
+
28
+ val baseColumns = Seq(
29
+ new EmbulkColumn(0, "id", Types.STRING),
30
+ new EmbulkColumn(1, "date", Types.STRING),
31
+ new EmbulkColumn(2, "segment", Types.STRING),
32
+ new EmbulkColumn(3, "placement", Types.STRING)
33
+ )
34
+
35
+ val metricsColumns = loop(
36
+ 4,
37
+ metricElementNames.getSortedMetricsGroupNames,
38
+ Nil
39
+ )
40
+ baseColumns ++ metricsColumns
41
+ }
42
+ }
43
+
44
+ case class Column(
45
+ id: String,
46
+ date: LocalDate,
47
+ segment: Option[String],
48
+ placement: String,
49
+ metricsGroup: Map[String, MetricsGroup]
50
+ )
@@ -0,0 +1,19 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import scala.util.control.Exception.ignoring
4
+
5
+ object LoanPattern {
6
+
7
+ type Closable = { def close(): Unit }
8
+
9
+ def apply[R <: Closable, A](resource: R)(f: R => A): A = {
10
+ try {
11
+ f(resource)
12
+ } finally {
13
+ ignoring(classOf[Throwable]) apply {
14
+ resource.close()
15
+ }
16
+ }
17
+ }
18
+
19
+ }
@@ -0,0 +1,47 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import org.embulk.parser.twitter_ads_stats.define.Metrics
4
+ import spray.json.{JsObject, JsValue}
5
+
6
+ /**
7
+ * Metricの要素名
8
+ *
9
+ *
10
+ * @param names key: MetricsGroup名 value: Metricの要素s
11
+ * (example)
12
+ * Map(
13
+ * "engagement" -> Seq("engagements", "impressions"),
14
+ * "web_conversion" -> Seq("conversion_purchases.assisted", "conversion_sign_ups.assisted")
15
+ * )
16
+ */
17
+ case class MetricElementNames(names: Map[String, Seq[String]]) {
18
+
19
+ import MetricElementNames._
20
+
21
+ def getSortedMetricsGroupNames:List[String] = names.keys.toList.sorted
22
+
23
+ private[twitter_ads_stats] def resolveMetrics(resolveMetricTimeSeries: (List[String], Option[JsValue]) => MetricTimeSeries, json: JsObject): Metrics =
24
+ Metrics(
25
+ names.flatMap { v =>
26
+ v._2.map { value =>
27
+ val keys = splitSeparator(value)
28
+ (keys.mkString(separator), resolveMetricTimeSeries(keys, Some(json)))
29
+ }
30
+ }
31
+ )
32
+ }
33
+
34
+ object MetricElementNames {
35
+
36
+ private val separator = "_"
37
+
38
+ private def splitSeparator(name: String): List[String] = {
39
+ name.split("[.+]").toList
40
+ }
41
+
42
+ // @todo MetricElementNamesのスコープ内に閉じられるように
43
+ // @todo 例外処理
44
+ def replaceSeparator(name: String): String = {
45
+ name.replaceAll("[.]", separator)
46
+ }
47
+ }
@@ -0,0 +1,19 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import spray.json.DefaultJsonProtocol._
4
+ import spray.json._
5
+
6
+ object MetricsGroupJson {
7
+ implicit object MetricsGroupJsonWriter extends RootJsonWriter[MetricsGroup] {
8
+
9
+ private def toJValue(element: Option[Long]): JsValue = element match {
10
+ case Some(e) => e.toJson
11
+ case None => JsNull
12
+ }
13
+
14
+ override def write(obj: MetricsGroup): JsValue = {
15
+ val jsFields = obj.map(v => (v._1, toJValue(v._2)))
16
+ JsObject(jsFields)
17
+ }
18
+ }
19
+ }
@@ -0,0 +1,27 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ sealed abstract class ParseException(
4
+ message: String,
5
+ cause: Throwable
6
+ ) extends Throwable
7
+
8
+
9
+ case class InvalidMetricTimeSeriesException(
10
+ message: String,
11
+ cause: Throwable
12
+ ) extends ParseException(message, cause) {
13
+ def this(cause: Throwable, index: Int) = {
14
+ this(s"Not Found index: $index", cause)
15
+ }
16
+ }
17
+
18
+ case class InvalidInputFileException(
19
+ message: String,
20
+ cause: Throwable
21
+ )
22
+ extends ParseException(message, cause) {
23
+
24
+ def this(cause: Throwable) = {
25
+ this(s"Input file can't parse", cause)
26
+ }
27
+ }
@@ -0,0 +1,9 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import org.embulk.config.{Config, ConfigDefault, Task}
4
+
5
+ trait PluginTask extends Task {
6
+ @Config("stop_on_invalid_record")
7
+ @ConfigDefault("false")
8
+ def getStopOnInvalidRecord: Boolean
9
+ }
@@ -0,0 +1,98 @@
1
+ package org.embulk.parser.twitter_ads_stats
2
+
3
+ import org.embulk.config._
4
+ import org.embulk.spi._
5
+ import org.embulk.spi.json.JsonParser
6
+ import org.embulk.spi.util.FileInputInputStream
7
+ import org.slf4j.Logger
8
+ import spray.json.{pimpAny, pimpString}
9
+
10
+ import scala.util.control.NonFatal
11
+ import scala.collection.JavaConverters._
12
+ import MetricsGroupJson._
13
+ import org.embulk.parser.twitter_ads_stats.define.{Root, RootJson}
14
+
15
+ class TwitterAdsStatsParserPlugin extends ParserPlugin {
16
+
17
+ import TwitterAdsStatsParserPlugin._
18
+
19
+ override def transaction(config: ConfigSource, control: ParserPlugin.Control): Unit = {
20
+ val task = config.loadConfig(classOf[PluginTask])
21
+ control.run(
22
+ task.dump(),
23
+ new Schema(Column.createEmbulkColumns(metricElementNames).asJava)
24
+ )
25
+ }
26
+
27
+ override def run(taskSource: TaskSource, schema: Schema, input: FileInput, output: PageOutput): Unit = {
28
+
29
+ val task = taskSource.loadTask(classOf[PluginTask])
30
+ val stopOnInvalidRecord = task.getStopOnInvalidRecord
31
+
32
+ LoanPattern(new PageBuilder(Exec.getBufferAllocator, schema, output)) { pb =>
33
+ while (input.nextFile()) {
34
+ (for {
35
+ root <- createRootFrom(input)
36
+ columns <- root.resolveColumns(metricElementNames)
37
+ } yield addRecord(pb, columns, root)) match {
38
+ case Right(_) =>
39
+ case Left(e) =>
40
+ if (stopOnInvalidRecord) {
41
+ throw new DataException(e.getMessage, e)
42
+ } else {
43
+ logger.warn(s"Skipped invalid record $e")
44
+ }
45
+ }
46
+ }
47
+ pb.finish()
48
+ }
49
+ }
50
+
51
+ private def addRecord(pb: PageBuilder, columns: Seq[Column], root: Root): Unit = {
52
+ columns.foreach { column =>
53
+ Column.createEmbulkColumns(metricElementNames).foreach { embulkColumn =>
54
+ (column, embulkColumn.getName) match {
55
+ case (Column(id, _, _, _, _), "id") =>
56
+ pb.setString(embulkColumn, id)
57
+ case (Column(_, date, _, _,_), "date") =>
58
+ pb.setString(embulkColumn, date.toString)
59
+ case (Column(_, _, Some(segment),_, _), "segment") =>
60
+ pb.setString(embulkColumn, segment)
61
+ case (Column(_, _, None,_, _), "segment") =>
62
+ pb.setNull(embulkColumn)
63
+ case (Column(_, _, _,placement, _), "placement") =>
64
+ pb.setString(embulkColumn, placement)
65
+ case (Column(_, _, _, _,metricsGroup), key) =>
66
+ metricsGroup.get(key) match {
67
+ case Some(m) =>
68
+ pb.setJson(
69
+ embulkColumn,
70
+ jsonParser.parse(m.toJson.compactPrint)
71
+ )
72
+ case None =>
73
+ pb.setNull(embulkColumn)
74
+
75
+ }
76
+ case _ => throw new RuntimeException
77
+ }
78
+ }
79
+ pb.addRecord()
80
+ }
81
+ }
82
+
83
+ private def createRootFrom(input: FileInput): Either[ParseException, Root] = {
84
+ val stream = new FileInputInputStream(input)
85
+ try {
86
+ val jsValue = scala.io.Source.fromInputStream(stream).mkString.parseJson
87
+ val root = new RootJson(metricElementNames).RootReader.read(jsValue)
88
+ Right(root)
89
+ } catch {
90
+ case NonFatal(e) => Left(new InvalidInputFileException(e))
91
+ }
92
+ }
93
+ }
94
+
95
+ object TwitterAdsStatsParserPlugin {
96
+ val logger: Logger = Exec.getLogger(classOf[TwitterAdsStatsParserPlugin])
97
+ val jsonParser = new JsonParser
98
+ }
@@ -0,0 +1,19 @@
1
+ package org.embulk.parser.twitter_ads_stats.define
2
+
3
+ import org.embulk.parser.twitter_ads_stats.{Column, MetricElementNames, ParseException}
4
+
5
+ case class Data(id: String, id_data: Seq[IDData]) {
6
+ private[define] def resolveColumns(metricElementNames: MetricElementNames, request: Request): Either[ParseException, Seq[Column]] = {
7
+ id_data.map { idData =>
8
+ idData.resolveColumns(id, metricElementNames, request)
9
+ }.foldRight[Either[ParseException, Seq[Column]]](Right(Nil)) {
10
+ case (Left(e), _) => Left(e)
11
+ case (Right(_), Left(e)) => Left(e)
12
+ case (Right(seq1), Right(seq2)) => Right(seq1 ++: seq2)
13
+ }
14
+ }
15
+ }
16
+
17
+ object Data {
18
+ val fieldNames:Array[String] = FieldNameUtil.fieldList[Data]
19
+ }
@@ -0,0 +1,9 @@
1
+ package org.embulk.parser.twitter_ads_stats.define
2
+
3
+ import scala.reflect.ClassTag
4
+
5
+ object FieldNameUtil {
6
+ //Innerクラスは利用しないこと
7
+ def fieldList[T](implicit tag: ClassTag[T]): Array[String] =
8
+ tag.runtimeClass.getDeclaredFields.map(_.getName)
9
+ }
@@ -0,0 +1,36 @@
1
+ package org.embulk.parser.twitter_ads_stats.define
2
+
3
+ import java.time.LocalDate
4
+
5
+ import org.embulk.parser.twitter_ads_stats._
6
+
7
+ case class IDData(metrics: Metrics, segment: Option[String]) {
8
+ private def resolveColumn(
9
+ id: String,
10
+ metricElementNames: MetricElementNames,
11
+ date: (LocalDate, Int),
12
+ placement: String
13
+ ): Either[ParseException, Column] = {
14
+ metricElementNames.names.map { name =>
15
+ (name._1, metrics.findMetricsGroup(date._2, name._2))
16
+ }.foldRight[Either[ParseException, Map[String, MetricsGroup]]](Right(Map.empty)) {
17
+ case ((_, Left(e)), _) => Left(e)
18
+ case ((_, Right(_)), Left(e)) => Left(e)
19
+ case ((s, Right(r)), Right(acc)) => Right(acc + (s -> r))
20
+ }.map(Column(id, date._1, segment, placement, _))
21
+ }
22
+
23
+ private[define] def resolveColumns(id: String, metricElementNames: MetricElementNames, request: Request): Either[ParseException, Seq[Column]] = {
24
+ request.params.targetDates.zipWithIndex.map { date =>
25
+ resolveColumn(id, metricElementNames, date, request.params.placement)
26
+ }.foldRight[Either[ParseException, Seq[Column]]](Right(Nil)) {
27
+ case (Left(e), _) => Left(e)
28
+ case (Right(_), Left(e)) => Left(e)
29
+ case (Right(a), Right(seq)) => Right(a +: seq)
30
+ }
31
+ }
32
+ }
33
+
34
+ object IDData {
35
+ val fieldNames: Array[String] = FieldNameUtil.fieldList[IDData]
36
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.parser.twitter_ads_stats.define
2
+
3
+ import org.embulk.parser.twitter_ads_stats._
4
+
5
+ import scala.util.control.NonFatal
6
+
7
+ case class Metrics(map: Map[String, MetricTimeSeries]) {
8
+
9
+ private[define] def findMetricsGroup(index: Int, metricNames: Seq[String]): Either[ParseException, MetricsGroup] = {
10
+ try {
11
+ val metricsGroup = metricNames.map { name =>
12
+ val n = MetricElementNames.replaceSeparator(name)
13
+ (
14
+ n,
15
+ map.get(n).flatten.map(v => v(index))
16
+ )
17
+ }.toMap
18
+ Right(metricsGroup)
19
+ } catch {
20
+ case NonFatal(e) => Left(new InvalidMetricTimeSeriesException(e, index))
21
+ }
22
+ }
23
+ }