embulk-output-fluentd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +27 -0
  3. data/.gitignore +80 -0
  4. data/.scalafmt.conf +2 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +59 -0
  7. data/build.gradle +83 -0
  8. data/build.sbt +32 -0
  9. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  10. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  11. data/gradlew +169 -0
  12. data/gradlew.bat +84 -0
  13. data/lib/embulk/output/fluentd.rb +3 -0
  14. data/project/build.properties +1 -0
  15. data/project/plugins.sbt +2 -0
  16. data/settings.gradle +1 -0
  17. data/src/main/resources/application.conf +8 -0
  18. data/src/main/scala/org/embulk/output/fluentd/ColumnVisitor.scala +43 -0
  19. data/src/main/scala/org/embulk/output/fluentd/FluentdOutputPlugin.scala +55 -0
  20. data/src/main/scala/org/embulk/output/fluentd/FluentdTransactionalPageOutput.scala +52 -0
  21. data/src/main/scala/org/embulk/output/fluentd/PluginTask.scala +36 -0
  22. data/src/main/scala/org/embulk/output/fluentd/sender/ActorManager.scala +29 -0
  23. data/src/main/scala/org/embulk/output/fluentd/sender/Sender.scala +131 -0
  24. data/src/main/scala/org/embulk/output/fluentd/sender/SenderBuilder.scala +40 -0
  25. data/src/main/scala/org/embulk/output/fluentd/sender/SenderFlow.scala +36 -0
  26. data/src/main/scala/org/embulk/output/fluentd/sender/SuperVisor.scala +46 -0
  27. data/src/test/scala/org/embulk/output/fluentd/TestActorManager.scala +42 -0
  28. data/src/test/scala/org/embulk/output/fluentd/sender/SenderFlowImplTest.scala +87 -0
  29. data/src/test/scala/org/embulk/output/fluentd/sender/SenderImplTest.scala +119 -0
  30. metadata +118 -0
@@ -0,0 +1,84 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ set DIRNAME=%~dp0
12
+ if "%DIRNAME%" == "" set DIRNAME=.
13
+ set APP_BASE_NAME=%~n0
14
+ set APP_HOME=%DIRNAME%
15
+
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windows variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+
53
+ :win9xME_args
54
+ @rem Slurp the command line arguments.
55
+ set CMD_LINE_ARGS=
56
+ set _SKIP=2
57
+
58
+ :win9xME_args_slurp
59
+ if "x%~1" == "x" goto execute
60
+
61
+ set CMD_LINE_ARGS=%*
62
+
63
+ :execute
64
+ @rem Setup the command line
65
+
66
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67
+
68
+ @rem Execute Gradle
69
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70
+
71
+ :end
72
+ @rem End local scope for the variables with windows NT shell
73
+ if "%ERRORLEVEL%"=="0" goto mainEnd
74
+
75
+ :fail
76
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77
+ rem the _cmd.exe /c_ return code!
78
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79
+ exit /b 1
80
+
81
+ :mainEnd
82
+ if "%OS%"=="Windows_NT" endlocal
83
+
84
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_output(
2
+ "fluentd", "org.embulk.output.fluentd.FluentdOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1 @@
1
+ sbt.version=0.13.15
@@ -0,0 +1,2 @@
1
+ addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.7")
2
+ addSbtPlugin("io.get-coursier" % "sbt-coursier" % "1.0.0-RC6")
@@ -0,0 +1 @@
1
+ rootProject.name = 'embulk-output-fluentd'
@@ -0,0 +1,8 @@
1
+ blocking-dispatcher {
2
+ type = Dispatcher
3
+ executor = "thread-pool-executor"
4
+ thread-pool-executor {
5
+ fixed-pool-size = 4
6
+ }
7
+ throughput = 100
8
+ }
@@ -0,0 +1,43 @@
1
+ package org.embulk.output.fluentd
2
+
3
+ import org.embulk.spi.time.TimestampFormatter
4
+ import org.embulk.spi.{Column, PageReader, ColumnVisitor => EmbulkColumnVisitor}
5
+
6
+ case class ColumnVisitor(reader: PageReader, timestampFormatter: TimestampFormatter) extends EmbulkColumnVisitor {
7
+ import scala.collection.mutable
8
+
9
+ private val record = mutable.Map[String, AnyRef]()
10
+
11
+ override def timestampColumn(column: Column): Unit =
12
+ value(column, reader.getTimestamp).foreach(v => put(column, timestampFormatter.format(v)))
13
+
14
+ override def stringColumn(column: Column): Unit =
15
+ value(column, reader.getString).foreach(v => put(column, v))
16
+
17
+ override def longColumn(column: Column): Unit =
18
+ value(column, reader.getLong).foreach(v => put(column, Long.box(v)))
19
+
20
+ override def doubleColumn(column: Column): Unit =
21
+ value(column, reader.getDouble).foreach(v => put(column, Double.box(v)))
22
+
23
+ override def booleanColumn(column: Column): Unit =
24
+ value(column, reader.getBoolean).foreach(v => put(column, Boolean.box(v)))
25
+
26
+ override def jsonColumn(column: Column): Unit =
27
+ value(column, reader.getJson).foreach(v => put(column, v.toJson))
28
+
29
+ def value[A](column: Column, method: => (Column => A)): Option[A] =
30
+ if (reader.isNull(column)) {
31
+ None
32
+ } else {
33
+ Some(method(column))
34
+ }
35
+
36
+ def put[A <: AnyRef](column: Column, value: A): Unit = {
37
+ record.put(column.getName, value)
38
+ ()
39
+ }
40
+
41
+ def getRecord: Map[String, AnyRef] = record.toMap
42
+
43
+ }
@@ -0,0 +1,55 @@
1
+ package org.embulk.output.fluentd
2
+
3
+ import java.util
4
+
5
+ import org.embulk.config._
6
+ import org.embulk.output.fluentd.sender._
7
+ import org.embulk.spi._
8
+ import wvlet.log._
9
+
10
+ class FluentdOutputPlugin extends OutputPlugin {
11
+
12
+ override def transaction(config: ConfigSource,
13
+ schema: Schema,
14
+ taskCount: Int,
15
+ control: OutputPlugin.Control): ConfigDiff = {
16
+ Logger.setDefaultLogLevel(LogLevel.OFF)
17
+ val task = config.loadConfig(classOf[PluginTask])
18
+ FluentdOutputPlugin.taskCountOpt = Some(taskCount)
19
+ control.run(task.dump())
20
+ FluentdOutputPlugin.sender.foreach(_.close())
21
+ Exec.newConfigDiff
22
+ }
23
+
24
+ override def resume(taskSource: TaskSource,
25
+ schema: Schema,
26
+ taskCount: Int,
27
+ control: OutputPlugin.Control): ConfigDiff =
28
+ throw new UnsupportedOperationException("fluentd output plugin does not support resuming")
29
+
30
+ override def cleanup(taskSource: TaskSource,
31
+ schema: Schema,
32
+ taskCount: Int,
33
+ successTaskReports: util.List[TaskReport]): Unit = {}
34
+
35
+ override def open(taskSource: TaskSource, schema: Schema, taskIndex: Int): TransactionalPageOutput = {
36
+ FluentdOutputPlugin.sender.synchronized {
37
+ FluentdOutputPlugin.sender match {
38
+ case Some(sender) =>
39
+ FluentdTransactionalPageOutput(taskSource, schema, taskIndex, FluentdOutputPlugin.taskCountOpt, sender)
40
+ case None =>
41
+ val task = taskSource.loadTask(classOf[PluginTask])
42
+ SenderBuilder(task).withSession { session =>
43
+ val sender = session.build[Sender]
44
+ FluentdOutputPlugin.sender = Option(sender)
45
+ FluentdTransactionalPageOutput(taskSource, schema, taskIndex, FluentdOutputPlugin.taskCountOpt, sender)
46
+ }
47
+ }
48
+ }
49
+ }
50
+ }
51
+
52
+ object FluentdOutputPlugin {
53
+ var sender: Option[Sender] = None
54
+ var taskCountOpt: Option[Int] = None
55
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.output.fluentd
2
+
3
+ import com.google.common.base.Optional
4
+ import org.embulk.config.{TaskReport, TaskSource}
5
+ import org.embulk.output.fluentd.sender.Sender
6
+ import org.embulk.spi._
7
+ import org.embulk.spi.time.TimestampFormatter
8
+
9
+ case class FluentdTransactionalPageOutput(taskSource: TaskSource,
10
+ schema: Schema,
11
+ taskIndex: Int,
12
+ taskCountOpt: Option[Int],
13
+ sender: Sender)
14
+ extends TransactionalPageOutput {
15
+
16
+ val task: PluginTask = taskSource.loadTask(classOf[PluginTask])
17
+ val logger = Exec.getLogger(classOf[FluentdTransactionalPageOutput])
18
+
19
+ def timestampFormatter(): TimestampFormatter =
20
+ new TimestampFormatter(task, Optional.absent())
21
+
22
+ override def add(page: Page): Unit = {
23
+ sender(asIterator(page).toSeq)
24
+ }
25
+
26
+ def asIterator(page: Page): Iterator[Map[String, AnyRef]] = {
27
+ val reader: PageReader = new PageReader(schema)
28
+ reader.setPage(page)
29
+ Iterator.continually {
30
+ if (reader.nextRecord()) {
31
+ val visitor = ColumnVisitor(reader, timestampFormatter())
32
+ schema.visitColumns(visitor)
33
+ visitor.getRecord
34
+ } else {
35
+ reader.close()
36
+ Map.empty[String, AnyRef]
37
+ }
38
+ } takeWhile (_ != Map.empty[String, AnyRef])
39
+ }
40
+
41
+ override def commit(): TaskReport = Exec.newTaskReport
42
+ override def abort(): Unit = ()
43
+ override def finish(): Unit = {
44
+ logger.debug(s"finished at " + this)
45
+ // for map/reduce executor.
46
+ if (taskCountOpt.isEmpty) {
47
+ // close immediately.
48
+ sender.close()
49
+ }
50
+ }
51
+ override def close(): Unit = ()
52
+ }
@@ -0,0 +1,36 @@
1
+ package org.embulk.output.fluentd
2
+
3
+ import com.google.common.base.Optional
4
+ import org.embulk.config.{Config, ConfigDefault, Task}
5
+ import org.embulk.spi.time.TimestampFormatter
6
+
7
+ trait PluginTask extends Task with TimestampFormatter.Task {
8
+
9
+ @Config("host")
10
+ @ConfigDefault("\"127.0.0.1\"")
11
+ def getHost: String
12
+
13
+ @Config("port")
14
+ @ConfigDefault("24224")
15
+ def getPort: Int
16
+
17
+ @Config("async_size")
18
+ @ConfigDefault("1")
19
+ def getAsyncSize: Int
20
+
21
+ @Config("request_per_seconds")
22
+ @ConfigDefault("0")
23
+ def getRequestPerSeconds: Int
24
+
25
+ @Config("request_grouping_size")
26
+ @ConfigDefault("100")
27
+ def getRequestGroupingSize: Int
28
+
29
+ @Config("tag")
30
+ def getTag: String
31
+
32
+ @Config("time_key")
33
+ @ConfigDefault("null")
34
+ def getTimeKey: Optional[String]
35
+
36
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.output.fluentd.sender
2
+
3
+ import akka.actor.{ActorRef, ActorSystem, Props, Terminated}
4
+ import akka.stream.{ActorMaterializer, ActorMaterializerSettings, Supervision}
5
+
6
+ import scala.concurrent.{ExecutionContext, Future}
7
+
8
+ case class ActorManagerImpl(implicit val system: ActorSystem) extends ActorManager {
9
+ val supervisor: ActorRef = system.actorOf(Props[SuperVisor])
10
+ val decider: Supervision.Decider = {
11
+ case _: Exception => Supervision.Resume
12
+ case _ => Supervision.Stop
13
+ }
14
+ implicit val materializer = ActorMaterializer(
15
+ ActorMaterializerSettings(system)
16
+ .withSupervisionStrategy(decider)
17
+ .withDispatcher("blocking-dispatcher"))
18
+
19
+ implicit val dispatcher: ExecutionContext =
20
+ system.dispatchers.lookup("blocking-dispatcher")
21
+ }
22
+
23
+ trait ActorManager {
24
+ implicit val system: ActorSystem
25
+ val supervisor: ActorRef
26
+ implicit val materializer: ActorMaterializer
27
+ def terminate(): Future[Terminated] = system.terminate()
28
+ implicit val dispatcher: ExecutionContext
29
+ }
@@ -0,0 +1,131 @@
1
+ package org.embulk.output.fluentd.sender
2
+
3
+ import akka._
4
+ import akka.pattern.ask
5
+ import akka.stream._
6
+ import akka.stream.scaladsl._
7
+ import akka.util.{ByteString, Timeout}
8
+ import org.slf4j.Logger
9
+
10
+ import scala.concurrent._
11
+ import scala.concurrent.duration._
12
+ import scala.util._
13
+
14
+ trait Sender {
15
+ def close(): Unit
16
+ val instance: SourceQueueWithComplete[Seq[Map[String, AnyRef]]]
17
+ def apply(value: Seq[Map[String, AnyRef]]): Future[QueueOfferResult]
18
+ def tcpHandling(size: Int, byteString: ByteString): Future[Done]
19
+ def waitForComplete(): Result
20
+ }
21
+
22
+ case class SenderImpl private[sender] (host: String,
23
+ port: Int,
24
+ groupedSize: Int,
25
+ asyncSize: Int,
26
+ senderFlow: SenderFlow,
27
+ actorManager: ActorManager,
28
+ asyncSizeRequestPerSecond: Int = 0,
29
+ retryCount: Int = 0,
30
+ retryDelayIntervalSecond: Int = 10)(implicit logger: Logger)
31
+ extends Sender {
32
+ import actorManager._
33
+ system.scheduler.schedule(0.seconds, 30.seconds, supervisor, LogStatus(logger))
34
+
35
+ val retryDelayIntervalSecondDuration: FiniteDuration = retryDelayIntervalSecond.seconds
36
+
37
+ def apply(value: Seq[Map[String, AnyRef]]): Future[QueueOfferResult] = {
38
+ actorManager.supervisor ! Record(value.size)
39
+ instance.offer(value)
40
+ }
41
+
42
+ def close(): Unit = {
43
+ implicit val timeout = Timeout(5.seconds)
44
+ val f: Future[ClosedStatus] = (actorManager.supervisor ? Close).mapTo[ClosedStatus]
45
+ val result = Await.result(f, Duration.Inf)
46
+ if (!result.alreadyClosed) {
47
+ logger.debug("wait for closing.")
48
+ // wait for akka-stream termination.
49
+ instance.complete()
50
+ val result = waitForComplete()
51
+ Await.result(actorManager.terminate(), Duration.Inf)
52
+ actorManager.system.terminate()
53
+ logger.info(
54
+ s"Completed RecordCount:${result.record} completedCount:${result.complete} retriedRecordCount:${result.retried}")
55
+ }
56
+ }
57
+
58
+ def waitForComplete(): Result = {
59
+ logger.debug("wait for complete.")
60
+ var result: Option[Result] = None
61
+ implicit val timeout = Timeout(5.seconds)
62
+ while (result.isEmpty) {
63
+ (actorManager.supervisor ? GetStatus).onComplete {
64
+ case Success(Result(recordCount, complete, failed, retried)) =>
65
+ logger.debug(s"current status ${Result(recordCount, complete, failed, retried)}")
66
+ if (recordCount == (complete + failed)) {
67
+ result = Some(Result(recordCount, complete, failed, retried))
68
+ }
69
+ case Success(Stop(recordCount, complete, failed, retried)) =>
70
+ result = Some(Result(recordCount, complete, failed, retried))
71
+ case _ =>
72
+ sys.error("fail of wait complete.")
73
+ }
74
+ Thread.sleep(1000)
75
+ }
76
+ result.get
77
+ }
78
+
79
+ val instance: SourceQueueWithComplete[Seq[Map[String, AnyRef]]] = {
80
+ val base = Source
81
+ .queue(Int.MaxValue, OverflowStrategy.backpressure)
82
+ .grouped(groupedSize)
83
+ .via(senderFlow.msgPackFlow)
84
+ val withThrottle = if (asyncSizeRequestPerSecond > 0) {
85
+ base.throttle(asyncSize, asyncSizeRequestPerSecond.seconds, 0, ThrottleMode.Shaping)
86
+ } else base
87
+ withThrottle
88
+ .mapAsync(asyncSize) {
89
+ case (size, byteString) =>
90
+ tcpHandling(size, byteString)
91
+ }
92
+ .to(Sink.ignore)
93
+ .run()
94
+ }
95
+
96
+ def sendCommand(byteString: ByteString): Future[Done] =
97
+ Source
98
+ .single(byteString)
99
+ .via(senderFlow.tcpConnectionFlow(host, port))
100
+ .runWith(Sink.ignore)
101
+
102
+ def tcpHandling(size: Int, byteString: ByteString): Future[Done] = {
103
+ def _tcpHandling(size: Int, byteString: ByteString, c: Int)(retried: Boolean): Future[Done] = {
104
+ val futureCommand = sendCommand(byteString)
105
+ futureCommand.onComplete {
106
+ case Success(_) =>
107
+ actorManager.supervisor ! Complete(size)
108
+ case Failure(e) if c > 0 =>
109
+ logger.info(
110
+ s"Sending fluentd ${size.toString} records was failed. - will retry ${c - 1} more times ${retryDelayIntervalSecondDuration.toSeconds} seconds later.",
111
+ e)
112
+ actorManager.supervisor ! Retried(size)
113
+ akka.pattern.after(retryDelayIntervalSecondDuration, actorManager.system.scheduler)(
114
+ _tcpHandling(size, byteString, c - 1)(retried = true))
115
+ case Failure(e) =>
116
+ actorManager.supervisor ! Failed(size)
117
+ logger.error(
118
+ s"Sending fluentd retry count is over and will be terminate soon. Please check your fluentd environment.",
119
+ e)
120
+ sys.error("Sending fluentd was terminated cause of retry count over.")
121
+ instance.complete()
122
+ }
123
+ futureCommand
124
+ }
125
+ _tcpHandling(size, byteString, retryCount)(retried = false).recoverWith {
126
+ case _: Exception =>
127
+ Future.successful(Done)
128
+ }
129
+ }
130
+
131
+ }