embulk-parser-twitter_ads_stats 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.scalafmt.conf +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +79 -0
- data/build.gradle +79 -0
- data/build.sbt +15 -0
- data/classpath/embulk-parser-twitter_ads_stats-0.1.0.jar +0 -0
- data/classpath/scala-library-2.12.3.jar +0 -0
- data/classpath/spray-json_2.12-1.3.3.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/guess/twitter_ads_stats.rb +61 -0
- data/lib/embulk/parser/twitter_ads_stats.rb +3 -0
- data/project/Dependencies.scala +11 -0
- data/project/build.properties +1 -0
- data/project/plugins.sbt +2 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/Column.scala +50 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/LoanPattern.scala +19 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/MetricElementNames.scala +47 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/MetricsGroupJson.scala +19 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/ParseException.scala +27 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/PluginTask.scala +9 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/TwitterAdsStatsParserPlugin.scala +98 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Data.scala +19 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/FieldNameUtil.scala +9 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/IDData.scala +36 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Metrics.scala +23 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Request.scala +42 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/Root.scala +21 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/define/RootJson.scala +107 -0
- data/src/main/scala/org/embulk/parser/twitter_ads_stats/package.scala +192 -0
- data/src/test/resources/test.json +759 -0
- data/src/test/scala/org/embulk/parser/twitter_ads_stats/ColumnSpec.scala +33 -0
- data/src/test/scala/org/embulk/parser/twitter_ads_stats/MetricElementNamesSpec.scala +14 -0
- data/src/test/scala/org/embulk/parser/twitter_ads_stats/MetricsGroupJsonSpec.scala +20 -0
- data/src/test/scala/org/embulk/parser/twitter_ads_stats/UnitSpec.scala +5 -0
- data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/MetricsJsonSpec.scala +57 -0
- data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/ParamsSpec.scala +26 -0
- data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/RootJsonSpec.scala +20 -0
- data/src/test/scala/org/embulk/parser/twitter_ads_stats/define/RootSpec.scala +356 -0
- metadata +114 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Guess
|
3
|
+
|
4
|
+
# TODO implement guess plugin to make this command work:
|
5
|
+
# $ embulk guess -g "twitter_ads_stats" partial-config.yml
|
6
|
+
#
|
7
|
+
# Depending on the file format the plugin uses, you can use choose
|
8
|
+
# one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
|
9
|
+
# or line guess (LineGuessPlugin).
|
10
|
+
|
11
|
+
# class TwitterAdsStats < GuessPlugin
|
12
|
+
# Plugin.register_guess("twitter_ads_stats", self)
|
13
|
+
#
|
14
|
+
# def guess(config, sample_buffer)
|
15
|
+
# if sample_buffer[0,2] == GZIP_HEADER
|
16
|
+
# guessed = {}
|
17
|
+
# guessed["type"] = "twitter_ads_stats"
|
18
|
+
# guessed["property1"] = "guessed-value"
|
19
|
+
# return {"parser" => guessed}
|
20
|
+
# else
|
21
|
+
# return {}
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
|
26
|
+
# class TwitterAdsStats < TextGuessPlugin
|
27
|
+
# Plugin.register_guess("twitter_ads_stats", self)
|
28
|
+
#
|
29
|
+
# def guess_text(config, sample_text)
|
30
|
+
# js = JSON.parse(sample_text) rescue nil
|
31
|
+
# if js && js["mykeyword"] == "keyword"
|
32
|
+
# guessed = {}
|
33
|
+
# guessed["type"] = "twitter_ads_stats"
|
34
|
+
# guessed["property1"] = "guessed-value"
|
35
|
+
# return {"parser" => guessed}
|
36
|
+
# else
|
37
|
+
# return {}
|
38
|
+
# end
|
39
|
+
# end
|
40
|
+
# end
|
41
|
+
|
42
|
+
# class TwitterAdsStats < LineGuessPlugin
|
43
|
+
# Plugin.register_guess("twitter_ads_stats", self)
|
44
|
+
#
|
45
|
+
# def guess_lines(config, sample_lines)
|
46
|
+
# all_line_matched = sample_lines.all? do |line|
|
47
|
+
# line =~ /mypattern/
|
48
|
+
# end
|
49
|
+
# if all_line_matched
|
50
|
+
# guessed = {}
|
51
|
+
# guessed["type"] = "twitter_ads_stats"
|
52
|
+
# guessed["property1"] = "guessed-value"
|
53
|
+
# return {"parser" => guessed}
|
54
|
+
# else
|
55
|
+
# return {}
|
56
|
+
# end
|
57
|
+
# end
|
58
|
+
# end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
import sbt._
|
2
|
+
|
3
|
+
object Dependencies {
|
4
|
+
|
5
|
+
val value = Seq(
|
6
|
+
"org.scalatest" %% "scalatest" % "3.0.1" % Test,
|
7
|
+
"org.embulk" % "embulk-core" % "0.8.35",
|
8
|
+
"org.embulk" % "embulk-core" % "0.8.35" classifier "tests",
|
9
|
+
"io.spray" %% "spray-json" % "1.3.3"
|
10
|
+
)
|
11
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
sbt.version=0.13.16
|
data/project/plugins.sbt
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats
|
2
|
+
|
3
|
+
import java.time.LocalDate
|
4
|
+
|
5
|
+
import org.embulk.spi.`type`.Types
|
6
|
+
import org.embulk.spi.{Column => EmbulkColumn}
|
7
|
+
|
8
|
+
object Column {
|
9
|
+
def createEmbulkColumns(metricElementNames: MetricElementNames): Seq[EmbulkColumn] = {
|
10
|
+
@scala.annotation.tailrec
|
11
|
+
def loop(
|
12
|
+
curIndex: Int,
|
13
|
+
curNames: List[String],
|
14
|
+
acc: Seq[EmbulkColumn]
|
15
|
+
): Seq[EmbulkColumn] = {
|
16
|
+
curNames match {
|
17
|
+
case Nil => acc.reverse
|
18
|
+
case x :: xs =>
|
19
|
+
loop(
|
20
|
+
curIndex + 1,
|
21
|
+
xs,
|
22
|
+
new EmbulkColumn(curIndex, x, Types.JSON) +: acc
|
23
|
+
)
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
|
28
|
+
val baseColumns = Seq(
|
29
|
+
new EmbulkColumn(0, "id", Types.STRING),
|
30
|
+
new EmbulkColumn(1, "date", Types.STRING),
|
31
|
+
new EmbulkColumn(2, "segment", Types.STRING),
|
32
|
+
new EmbulkColumn(3, "placement", Types.STRING)
|
33
|
+
)
|
34
|
+
|
35
|
+
val metricsColumns = loop(
|
36
|
+
4,
|
37
|
+
metricElementNames.getSortedMetricsGroupNames,
|
38
|
+
Nil
|
39
|
+
)
|
40
|
+
baseColumns ++ metricsColumns
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
case class Column(
|
45
|
+
id: String,
|
46
|
+
date: LocalDate,
|
47
|
+
segment: Option[String],
|
48
|
+
placement: String,
|
49
|
+
metricsGroup: Map[String, MetricsGroup]
|
50
|
+
)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats
|
2
|
+
|
3
|
+
import scala.util.control.Exception.ignoring
|
4
|
+
|
5
|
+
object LoanPattern {
|
6
|
+
|
7
|
+
type Closable = { def close(): Unit }
|
8
|
+
|
9
|
+
def apply[R <: Closable, A](resource: R)(f: R => A): A = {
|
10
|
+
try {
|
11
|
+
f(resource)
|
12
|
+
} finally {
|
13
|
+
ignoring(classOf[Throwable]) apply {
|
14
|
+
resource.close()
|
15
|
+
}
|
16
|
+
}
|
17
|
+
}
|
18
|
+
|
19
|
+
}
|
@@ -0,0 +1,47 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats
|
2
|
+
|
3
|
+
import org.embulk.parser.twitter_ads_stats.define.Metrics
|
4
|
+
import spray.json.{JsObject, JsValue}
|
5
|
+
|
6
|
+
/**
|
7
|
+
* Metricの要素名
|
8
|
+
*
|
9
|
+
*
|
10
|
+
* @param names key: MetricsGroup名 value: Metricの要素s
|
11
|
+
* (example)
|
12
|
+
* Map(
|
13
|
+
* "engagement" -> Seq("engagements", "impressions"),
|
14
|
+
* "web_conversion" -> Seq("conversion_purchases.assisted", "conversion_sign_ups.assisted")
|
15
|
+
* )
|
16
|
+
*/
|
17
|
+
case class MetricElementNames(names: Map[String, Seq[String]]) {
|
18
|
+
|
19
|
+
import MetricElementNames._
|
20
|
+
|
21
|
+
def getSortedMetricsGroupNames:List[String] = names.keys.toList.sorted
|
22
|
+
|
23
|
+
private[twitter_ads_stats] def resolveMetrics(resolveMetricTimeSeries: (List[String], Option[JsValue]) => MetricTimeSeries, json: JsObject): Metrics =
|
24
|
+
Metrics(
|
25
|
+
names.flatMap { v =>
|
26
|
+
v._2.map { value =>
|
27
|
+
val keys = splitSeparator(value)
|
28
|
+
(keys.mkString(separator), resolveMetricTimeSeries(keys, Some(json)))
|
29
|
+
}
|
30
|
+
}
|
31
|
+
)
|
32
|
+
}
|
33
|
+
|
34
|
+
object MetricElementNames {
|
35
|
+
|
36
|
+
private val separator = "_"
|
37
|
+
|
38
|
+
private def splitSeparator(name: String): List[String] = {
|
39
|
+
name.split("[.+]").toList
|
40
|
+
}
|
41
|
+
|
42
|
+
// @todo MetricElementNamesのスコープ内に閉じられるように
|
43
|
+
// @todo 例外処理
|
44
|
+
def replaceSeparator(name: String): String = {
|
45
|
+
name.replaceAll("[.]", separator)
|
46
|
+
}
|
47
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats
|
2
|
+
|
3
|
+
import spray.json.DefaultJsonProtocol._
|
4
|
+
import spray.json._
|
5
|
+
|
6
|
+
object MetricsGroupJson {
|
7
|
+
implicit object MetricsGroupJsonWriter extends RootJsonWriter[MetricsGroup] {
|
8
|
+
|
9
|
+
private def toJValue(element: Option[Long]): JsValue = element match {
|
10
|
+
case Some(e) => e.toJson
|
11
|
+
case None => JsNull
|
12
|
+
}
|
13
|
+
|
14
|
+
override def write(obj: MetricsGroup): JsValue = {
|
15
|
+
val jsFields = obj.map(v => (v._1, toJValue(v._2)))
|
16
|
+
JsObject(jsFields)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats
|
2
|
+
|
3
|
+
sealed abstract class ParseException(
|
4
|
+
message: String,
|
5
|
+
cause: Throwable
|
6
|
+
) extends Throwable
|
7
|
+
|
8
|
+
|
9
|
+
case class InvalidMetricTimeSeriesException(
|
10
|
+
message: String,
|
11
|
+
cause: Throwable
|
12
|
+
) extends ParseException(message, cause) {
|
13
|
+
def this(cause: Throwable, index: Int) = {
|
14
|
+
this(s"Not Found index: $index", cause)
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
case class InvalidInputFileException(
|
19
|
+
message: String,
|
20
|
+
cause: Throwable
|
21
|
+
)
|
22
|
+
extends ParseException(message, cause) {
|
23
|
+
|
24
|
+
def this(cause: Throwable) = {
|
25
|
+
this(s"Input file can't parse", cause)
|
26
|
+
}
|
27
|
+
}
|
@@ -0,0 +1,98 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats
|
2
|
+
|
3
|
+
import org.embulk.config._
|
4
|
+
import org.embulk.spi._
|
5
|
+
import org.embulk.spi.json.JsonParser
|
6
|
+
import org.embulk.spi.util.FileInputInputStream
|
7
|
+
import org.slf4j.Logger
|
8
|
+
import spray.json.{pimpAny, pimpString}
|
9
|
+
|
10
|
+
import scala.util.control.NonFatal
|
11
|
+
import scala.collection.JavaConverters._
|
12
|
+
import MetricsGroupJson._
|
13
|
+
import org.embulk.parser.twitter_ads_stats.define.{Root, RootJson}
|
14
|
+
|
15
|
+
class TwitterAdsStatsParserPlugin extends ParserPlugin {
|
16
|
+
|
17
|
+
import TwitterAdsStatsParserPlugin._
|
18
|
+
|
19
|
+
override def transaction(config: ConfigSource, control: ParserPlugin.Control): Unit = {
|
20
|
+
val task = config.loadConfig(classOf[PluginTask])
|
21
|
+
control.run(
|
22
|
+
task.dump(),
|
23
|
+
new Schema(Column.createEmbulkColumns(metricElementNames).asJava)
|
24
|
+
)
|
25
|
+
}
|
26
|
+
|
27
|
+
override def run(taskSource: TaskSource, schema: Schema, input: FileInput, output: PageOutput): Unit = {
|
28
|
+
|
29
|
+
val task = taskSource.loadTask(classOf[PluginTask])
|
30
|
+
val stopOnInvalidRecord = task.getStopOnInvalidRecord
|
31
|
+
|
32
|
+
LoanPattern(new PageBuilder(Exec.getBufferAllocator, schema, output)) { pb =>
|
33
|
+
while (input.nextFile()) {
|
34
|
+
(for {
|
35
|
+
root <- createRootFrom(input)
|
36
|
+
columns <- root.resolveColumns(metricElementNames)
|
37
|
+
} yield addRecord(pb, columns, root)) match {
|
38
|
+
case Right(_) =>
|
39
|
+
case Left(e) =>
|
40
|
+
if (stopOnInvalidRecord) {
|
41
|
+
throw new DataException(e.getMessage, e)
|
42
|
+
} else {
|
43
|
+
logger.warn(s"Skipped invalid record $e")
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
pb.finish()
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
private def addRecord(pb: PageBuilder, columns: Seq[Column], root: Root): Unit = {
|
52
|
+
columns.foreach { column =>
|
53
|
+
Column.createEmbulkColumns(metricElementNames).foreach { embulkColumn =>
|
54
|
+
(column, embulkColumn.getName) match {
|
55
|
+
case (Column(id, _, _, _, _), "id") =>
|
56
|
+
pb.setString(embulkColumn, id)
|
57
|
+
case (Column(_, date, _, _,_), "date") =>
|
58
|
+
pb.setString(embulkColumn, date.toString)
|
59
|
+
case (Column(_, _, Some(segment),_, _), "segment") =>
|
60
|
+
pb.setString(embulkColumn, segment)
|
61
|
+
case (Column(_, _, None,_, _), "segment") =>
|
62
|
+
pb.setNull(embulkColumn)
|
63
|
+
case (Column(_, _, _,placement, _), "placement") =>
|
64
|
+
pb.setString(embulkColumn, placement)
|
65
|
+
case (Column(_, _, _, _,metricsGroup), key) =>
|
66
|
+
metricsGroup.get(key) match {
|
67
|
+
case Some(m) =>
|
68
|
+
pb.setJson(
|
69
|
+
embulkColumn,
|
70
|
+
jsonParser.parse(m.toJson.compactPrint)
|
71
|
+
)
|
72
|
+
case None =>
|
73
|
+
pb.setNull(embulkColumn)
|
74
|
+
|
75
|
+
}
|
76
|
+
case _ => throw new RuntimeException
|
77
|
+
}
|
78
|
+
}
|
79
|
+
pb.addRecord()
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
private def createRootFrom(input: FileInput): Either[ParseException, Root] = {
|
84
|
+
val stream = new FileInputInputStream(input)
|
85
|
+
try {
|
86
|
+
val jsValue = scala.io.Source.fromInputStream(stream).mkString.parseJson
|
87
|
+
val root = new RootJson(metricElementNames).RootReader.read(jsValue)
|
88
|
+
Right(root)
|
89
|
+
} catch {
|
90
|
+
case NonFatal(e) => Left(new InvalidInputFileException(e))
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
object TwitterAdsStatsParserPlugin {
|
96
|
+
val logger: Logger = Exec.getLogger(classOf[TwitterAdsStatsParserPlugin])
|
97
|
+
val jsonParser = new JsonParser
|
98
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats.define
|
2
|
+
|
3
|
+
import org.embulk.parser.twitter_ads_stats.{Column, MetricElementNames, ParseException}
|
4
|
+
|
5
|
+
case class Data(id: String, id_data: Seq[IDData]) {
|
6
|
+
private[define] def resolveColumns(metricElementNames: MetricElementNames, request: Request): Either[ParseException, Seq[Column]] = {
|
7
|
+
id_data.map { idData =>
|
8
|
+
idData.resolveColumns(id, metricElementNames, request)
|
9
|
+
}.foldRight[Either[ParseException, Seq[Column]]](Right(Nil)) {
|
10
|
+
case (Left(e), _) => Left(e)
|
11
|
+
case (Right(_), Left(e)) => Left(e)
|
12
|
+
case (Right(seq1), Right(seq2)) => Right(seq1 ++: seq2)
|
13
|
+
}
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
object Data {
|
18
|
+
val fieldNames:Array[String] = FieldNameUtil.fieldList[Data]
|
19
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats.define
|
2
|
+
|
3
|
+
import java.time.LocalDate
|
4
|
+
|
5
|
+
import org.embulk.parser.twitter_ads_stats._
|
6
|
+
|
7
|
+
case class IDData(metrics: Metrics, segment: Option[String]) {
|
8
|
+
private def resolveColumn(
|
9
|
+
id: String,
|
10
|
+
metricElementNames: MetricElementNames,
|
11
|
+
date: (LocalDate, Int),
|
12
|
+
placement: String
|
13
|
+
): Either[ParseException, Column] = {
|
14
|
+
metricElementNames.names.map { name =>
|
15
|
+
(name._1, metrics.findMetricsGroup(date._2, name._2))
|
16
|
+
}.foldRight[Either[ParseException, Map[String, MetricsGroup]]](Right(Map.empty)) {
|
17
|
+
case ((_, Left(e)), _) => Left(e)
|
18
|
+
case ((_, Right(_)), Left(e)) => Left(e)
|
19
|
+
case ((s, Right(r)), Right(acc)) => Right(acc + (s -> r))
|
20
|
+
}.map(Column(id, date._1, segment, placement, _))
|
21
|
+
}
|
22
|
+
|
23
|
+
private[define] def resolveColumns(id: String, metricElementNames: MetricElementNames, request: Request): Either[ParseException, Seq[Column]] = {
|
24
|
+
request.params.targetDates.zipWithIndex.map { date =>
|
25
|
+
resolveColumn(id, metricElementNames, date, request.params.placement)
|
26
|
+
}.foldRight[Either[ParseException, Seq[Column]]](Right(Nil)) {
|
27
|
+
case (Left(e), _) => Left(e)
|
28
|
+
case (Right(_), Left(e)) => Left(e)
|
29
|
+
case (Right(a), Right(seq)) => Right(a +: seq)
|
30
|
+
}
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
object IDData {
|
35
|
+
val fieldNames: Array[String] = FieldNameUtil.fieldList[IDData]
|
36
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package org.embulk.parser.twitter_ads_stats.define
|
2
|
+
|
3
|
+
import org.embulk.parser.twitter_ads_stats._
|
4
|
+
|
5
|
+
import scala.util.control.NonFatal
|
6
|
+
|
7
|
+
case class Metrics(map: Map[String, MetricTimeSeries]) {
|
8
|
+
|
9
|
+
private[define] def findMetricsGroup(index: Int, metricNames: Seq[String]): Either[ParseException, MetricsGroup] = {
|
10
|
+
try {
|
11
|
+
val metricsGroup = metricNames.map { name =>
|
12
|
+
val n = MetricElementNames.replaceSeparator(name)
|
13
|
+
(
|
14
|
+
n,
|
15
|
+
map.get(n).flatten.map(v => v(index))
|
16
|
+
)
|
17
|
+
}.toMap
|
18
|
+
Right(metricsGroup)
|
19
|
+
} catch {
|
20
|
+
case NonFatal(e) => Left(new InvalidMetricTimeSeriesException(e, index))
|
21
|
+
}
|
22
|
+
}
|
23
|
+
}
|