embulk-filter-key_in_redis 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8c07a54c906730f2de5c0a2e32a1792cc0170877
4
- data.tar.gz: 3cf0b84f743814a7ee32a35cb9a084eca9db287d
3
+ metadata.gz: 37e4700ee70663be6a24b53e20ea86dbeea87336
4
+ data.tar.gz: 845fc7bc8b2d19f964483a43f9f860a9674eb752
5
5
  SHA512:
6
- metadata.gz: 5b51785e0dfe0a5007cea67fd60c96aaa9c805ec605b074db141310532418847cd07f4d38d5d6745ff9a5067f7f5500f0fda004212e9f88a22d9d98018d00b00
7
- data.tar.gz: d73e095906c90aef7216fcba69a5f514a7871acde3d1b94ce07918f16cdfecb5ffa05b04bfe8485baf0f3ef62c408a6a11004864fd9a24e6facb87ddc7ef5508
6
+ metadata.gz: 04400de1c5bc2041f28e29b4a3e2a27c407c4171e49408d70f3e02d75e2109efaf14fda42ba49cb9f2a9e94ece3184248b018fe98f43989f35cca82f234ee73c
7
+ data.tar.gz: 8bef1f124c5fdd3f93f7fac9e9b7a9ad52f071050e1013d07f0be320a015f89b05e05565855f7baa5ead073b60495afe27f9946c7b1f7d53af5722adc5ef65de
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.2"
16
+ version = "0.1.3"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -0,0 +1,26 @@
1
+ akka.actor.deployment {
2
+ /filtering_actor {
3
+ dispatcher = embulk-filter_key_in_redis-dispatcher
4
+ }
5
+ }
6
+
7
+ embulk-filter_key_in_redis-dispatcher {
8
+ # Dispatcher is the name of the event-based dispatcher
9
+ type = Dispatcher
10
+ # What kind of ExecutionService to use
11
+ executor = "fork-join-executor"
12
+ # Configuration for the fork join pool
13
+ fork-join-executor {
14
+ # Min number of threads to cap factor-based parallelism number to
15
+ parallelism-min = 2
16
+ # Parallelism (threads) ... ceil(available processors * factor)
17
+ parallelism-factor = 2.0
18
+ # Max number of threads to cap factor-based parallelism number to
19
+ parallelism-max = 10
20
+ }
21
+
22
+ # Throughput defines the maximum number of messages to be
23
+ # processed per actor before the thread jumps to the next actor.
24
+ # Set to 1 for as fair as possible.
25
+ throughput = 100
26
+ }
@@ -1,9 +1,14 @@
1
1
  package org.embulk.filter.key_in_redis
2
2
 
3
+ import java.util.concurrent.TimeUnit
4
+
3
5
  import com.google.common.base.Optional
4
- import org.embulk.filter.key_in_redis.column._
6
+ import org.embulk.filter.key_in_redis.actor._
7
+ import org.embulk.filter.key_in_redis.row._
8
+ import org.embulk.filter.key_in_redis.ToFutureExtensionOps._
9
+ import akka.pattern.ask
10
+ import akka.util.Timeout
5
11
 
6
- import scala.collection.mutable.ListBuffer
7
12
  import scala.collection.JavaConverters._
8
13
  import org.embulk.spi.time.TimestampFormatter
9
14
  import org.embulk.spi.{
@@ -20,13 +25,13 @@ case class PageOutput(task: PluginTask,
20
25
  output: EmbulkPageOutput)
21
26
  extends EmbulkPageOutput {
22
27
  val pageBuilder = new PageBuilder(Exec.getBufferAllocator, schema, output)
28
+ var finished = false
23
29
  def timestampFormatter(): TimestampFormatter =
24
30
  new TimestampFormatter(task, Optional.absent())
25
31
 
26
32
  override def add(page: Page): Unit = {
27
33
  val baseReader: PageReader = new PageReader(schema)
28
34
  baseReader.setPage(page)
29
- val rows = new ListBuffer[SetValueColumnVisitor]()
30
35
  while (baseReader.nextRecord()) {
31
36
  val setValueVisitor = SetValueColumnVisitor(
32
37
  baseReader,
@@ -36,19 +41,49 @@ case class PageOutput(task: PluginTask,
36
41
  task.getAppender,
37
42
  task.getMatchAsMD5)
38
43
  schema.visitColumns(setValueVisitor)
39
- rows.append(setValueVisitor)
44
+ Actors.register ! setValueVisitor.getRow(pageBuilder)
40
45
  }
41
- KeyInRedisFilterPlugin.redis.foreach { redis =>
42
- val result = redis.exists(rows.map(_.getMatchKey))
43
- rows.foreach { row =>
44
- if (!result(row.getMatchKey)) {
45
- row.addRecord(pageBuilder)
46
- }
46
+ baseReader.close()
47
+ }
48
+
49
+ def counter(): Int = {
50
+ import scala.concurrent.ExecutionContext.Implicits.global
51
+ implicit val timeout = Timeout(24, TimeUnit.HOURS)
52
+ (Actors.register ? Counter(pageBuilder))
53
+ .mapTo[Int]
54
+ .toTask
55
+ .unsafePerformSync
56
+ }
57
+
58
+ def forceWrite(): Unit = {
59
+ Actors.register ! ForceWrite(pageBuilder)
60
+ }
61
+
62
+ override def finish(): Unit = {
63
+ var lastRecord = false
64
+ while (counter() != 0) {
65
+ if (!lastRecord) {
66
+ forceWrite()
67
+ lastRecord = true
47
68
  }
69
+ Thread.sleep(1000)
70
+ }
71
+ if (!finished) {
72
+ pageBuilder.finish()
73
+ finished = true
48
74
  }
49
75
  }
50
76
 
51
- override def finish(): Unit = pageBuilder.finish()
52
- override def close(): Unit = pageBuilder.close()
77
+ override def close(): Unit = {
78
+ var lastRecord = false
79
+ while (counter() != 0 & finished) {
80
+ if (!lastRecord) {
81
+ forceWrite()
82
+ lastRecord = true
83
+ }
84
+ Thread.sleep(1000)
85
+ }
86
+ pageBuilder.close()
87
+ }
53
88
 
54
- }
89
+ }
@@ -1,8 +1,8 @@
1
- package org.embulk.filter.key_in_redis.redis
1
+ package org.embulk.filter.key_in_redis
2
2
 
3
3
  import scala.concurrent.{ExecutionContext, Future}
4
4
  import scala.util.{Failure, Success}
5
- import scalaz._, Scalaz._
5
+ import scalaz.Scalaz._
6
6
  import scalaz.concurrent._
7
7
 
8
8
  final class FutureExtensionOps[A](self: Future[A]) {
@@ -0,0 +1,12 @@
1
+ package org.embulk.filter.key_in_redis.actor
2
+
3
+ import akka.actor.{ActorRef, ActorSystem, Props}
4
+
5
+ object Actors {
6
+ implicit val actorSystem: ActorSystem = akka.actor.ActorSystem(
7
+ "redis-register",
8
+ classLoader = Some(this.getClass.getClassLoader))
9
+ val register: ActorRef =
10
+ actorSystem.actorOf(Props(classOf[Register]), "filtering_actor")
11
+
12
+ }
@@ -0,0 +1,65 @@
1
+ package org.embulk.filter.key_in_redis.actor
2
+
3
+ import akka.actor._
4
+ import org.embulk.filter.key_in_redis.KeyInRedisFilterPlugin
5
+ import org.embulk.filter.key_in_redis.redis.Redis
6
+ import org.embulk.filter.key_in_redis.row.Row
7
+ import org.embulk.spi.PageBuilder
8
+
9
+ import scala.collection.mutable
10
+ import scala.concurrent._
11
+
12
+ class Register extends Actor {
13
+
14
+ implicit val ec: ExecutionContextExecutor = context.system.dispatcher
15
+ var rowList: List[Row] = List.empty[Row]
16
+ lazy val redis: Redis =
17
+ KeyInRedisFilterPlugin.redis.getOrElse(sys.error("redis is undefined."))
18
+ val counter: mutable.Map[PageBuilder, Int] = mutable.Map[PageBuilder, Int]()
19
+
20
+ override def receive: PartialFunction[Any, Unit] = {
21
+ case row: Row =>
22
+ rowList = row :: rowList
23
+ counter.put(row.pageBuilder, counter.getOrElse(row.pageBuilder, 0) + 1)
24
+ if (rowList.size == 500) {
25
+ addRecords(rowList)
26
+ rowList = List.empty[Row]
27
+ }
28
+ case Counter(pb) =>
29
+ sender() ! counter.getOrElse(pb, 0)
30
+ case ForceWrite(pb) =>
31
+ val (owned, other) =
32
+ rowList.partition(_.pageBuilder == pb)
33
+ addRecords(owned)
34
+ rowList = other
35
+ case Add(row) =>
36
+ counter.put(row.pageBuilder, counter(row.pageBuilder) - 1)
37
+ row.addRecord()
38
+ case Ignore(row) =>
39
+ counter.put(row.pageBuilder, counter(row.pageBuilder) - 1)
40
+ case TotalCount =>
41
+ sender() ! counter.foldLeft[Int](0) {
42
+ case (total, (_, counter: Int)) =>
43
+ total + counter
44
+ }
45
+ }
46
+
47
+ private def addRecords(rows: List[Row]): Unit = {
48
+ redis.exists(rows.map(_.matchKey)).foreach { resultMap =>
49
+ rows.foreach { row =>
50
+ val result = resultMap(row.matchKey)
51
+ if (!result) {
52
+ self ! Add(row)
53
+ } else {
54
+ self ! Ignore(row)
55
+ }
56
+ }
57
+ }
58
+ }
59
+ }
60
+
61
+ case object TotalCount
62
+ case class Add(row: Row)
63
+ case class Ignore(row: Row)
64
+ case class Counter(pageBuilder: PageBuilder)
65
+ case class ForceWrite(pageBuilder: PageBuilder)
@@ -1,14 +1,20 @@
1
1
  package org.embulk.filter.key_in_redis.redis
2
2
 
3
+ import java.util.concurrent.TimeUnit
4
+
5
+ import akka.util.Timeout
6
+ import akka.pattern.ask
3
7
  import org.slf4j.Logger
4
8
  import redis._
9
+ import org.embulk.filter.key_in_redis.actor.Actors._
5
10
 
6
11
  import scala.annotation.tailrec
7
12
  import scala.concurrent.duration._
8
13
  import scala.concurrent._
9
14
  import scala.util._
10
-
11
15
  import scala.collection.mutable
16
+ import org.embulk.filter.key_in_redis.ToFutureExtensionOps._
17
+ import org.embulk.filter.key_in_redis.actor._
12
18
 
13
19
  class Redis(setKey: String,
14
20
  host: String,
@@ -16,10 +22,8 @@ class Redis(setKey: String,
16
22
  replicaHosts: Map[String, Int],
17
23
  db: Option[Int],
18
24
  loadOnMemory: Boolean)(implicit logger: Logger) {
19
- implicit val actorSystem = akka.actor.ActorSystem(
20
- "redis-client",
21
- classLoader = Some(this.getClass.getClassLoader))
22
25
 
26
+ implicit val ec: ExecutionContextExecutor = actorSystem.dispatcher
23
27
  lazy val cacheInstance: Option[Cache] = if (loadOnMemory) {
24
28
  Some(Cache(() => loadAll()))
25
29
  } else None
@@ -37,8 +41,7 @@ class Redis(setKey: String,
37
41
 
38
42
  def loadAll(): mutable.Set[String] = {
39
43
  logger.info(s"Loading from Redis start.")
40
- import scala.concurrent.ExecutionContext.Implicits.global
41
- import ToFutureExtensionOps._
44
+ import org.embulk.filter.key_in_redis.ToFutureExtensionOps._
42
45
  val buffer = mutable.Set.empty[String]
43
46
  @tailrec
44
47
  def _scan(cursor: Int): Unit = {
@@ -57,10 +60,10 @@ class Redis(setKey: String,
57
60
  }
58
61
 
59
62
  def ping(): String = {
60
- import scala.concurrent.ExecutionContext.Implicits.global
61
63
  val s: Future[String] = redis.ping()
62
64
  s.onComplete {
63
- case Success(result) => result
65
+ case Success(result) =>
66
+ result
64
67
  case Failure(t) =>
65
68
  actorSystem.shutdown()
66
69
  throw t
@@ -68,38 +71,63 @@ class Redis(setKey: String,
68
71
  Await.result(s, 10.minute)
69
72
  }
70
73
 
71
- def exists(values: Seq[String]): Map[String, Boolean] = cacheInstance match {
72
- case Some(cached) =>
73
- values.map { v =>
74
- v -> cached.contains(v)
75
- }.toMap
76
- case None =>
77
- import scala.concurrent.ExecutionContext.Implicits.global
78
- import ToFutureExtensionOps._
79
- val input = values.zipWithIndex.map(_.swap).toMap
80
- val transaction = redis.transaction()
81
- val f = values.map { v =>
82
- transaction.sismember(setKey, v)
83
- }
84
- transaction.exec()
85
- val results = Future
86
- .sequence(f)
87
- .toTask
88
- .unsafePerformSync
89
- .zipWithIndex
90
- .map(_.swap)
91
- .toMap
92
- results.map {
93
- case (index, result) =>
94
- input(index) -> result
74
+ def keyExists(): Unit = {
75
+ val s: Future[Boolean] = redis.exists(setKey)
76
+ s.onComplete {
77
+ case Success(_) =>
78
+ case Failure(t) =>
79
+ actorSystem.shutdown()
80
+ throw t
81
+ }
82
+ val result = Await.result(s, 10.minute)
83
+ if (!result) {
84
+ actorSystem.shutdown()
85
+ throw sys.error(s"key not found in redis. $setKey")
86
+ }
87
+ }
88
+
89
+ def exists(values: Seq[String]): Future[mutable.Map[String, Boolean]] = {
90
+ val futureResult = cacheInstance match {
91
+ case Some(cached) =>
92
+ values.map { v =>
93
+ Future.successful(v -> cached.contains(v))
94
+ }
95
+ case None =>
96
+ val transaction = redis.transaction()
97
+ val futures = values.map { v =>
98
+ transaction.sismember(setKey, v).map { result =>
99
+ (v ,result)
100
+ }
101
+ }
102
+ transaction.exec()
103
+ futures
104
+ }
105
+ Future.sequence(futureResult).map { sequence =>
106
+ val result = mutable.ListMap[String,Boolean]()
107
+ sequence.foreach {
108
+ case (key, value) =>
109
+ result.put(key, value)
95
110
  }
111
+ result
112
+ }
96
113
  }
97
114
 
98
115
  def close(): Unit = {
116
+ while (counter() != 0) {
117
+ Thread.sleep(1000)
118
+ }
99
119
  redis.stop()
100
120
  // wait for stopping.
101
121
  Thread.sleep(1000)
102
122
  actorSystem.shutdown()
103
123
  }
104
124
 
125
+ def counter(): Int = {
126
+ implicit val timeout: Timeout = Timeout(24, TimeUnit.HOURS)
127
+ (Actors.register ? TotalCount)
128
+ .mapTo[Int]
129
+ .toTask
130
+ .unsafePerformSync
131
+ }
132
+
105
133
  }
@@ -0,0 +1,38 @@
1
+ package org.embulk.filter.key_in_redis.row
2
+
3
+ import org.embulk.spi.PageBuilder
4
+ import org.embulk.spi.`type`._
5
+ import org.embulk.spi.time.Timestamp
6
+ import org.msgpack.value.Value
7
+
8
+ import scala.collection.mutable
9
+
10
+ case class Row(matchKey: String,
11
+ seq: mutable.Set[ValueHolder[_]],
12
+ pageBuilder: PageBuilder) {
13
+ def addRecord(): Unit = {
14
+ seq.foreach { vh =>
15
+ vh.value match {
16
+ case Some(v: Boolean) if vh.column.getType.isInstanceOf[BooleanType] =>
17
+ pageBuilder.setBoolean(vh.column, v)
18
+ case Some(v: Long) if vh.column.getType.isInstanceOf[LongType] =>
19
+ pageBuilder.setLong(vh.column, v)
20
+ case Some(v: Double) if vh.column.getType.isInstanceOf[DoubleType] =>
21
+ pageBuilder.setDouble(vh.column, v)
22
+ case Some(v: String) if vh.column.getType.isInstanceOf[StringType] =>
23
+ pageBuilder.setString(vh.column, v)
24
+ case Some(v: Timestamp)
25
+ if vh.column.getType.isInstanceOf[TimestampType] =>
26
+ pageBuilder.setTimestamp(vh.column, v)
27
+ case Some(v: Value) if vh.column.getType.isInstanceOf[JsonType] =>
28
+ pageBuilder.setJson(vh.column, v)
29
+ case None =>
30
+ pageBuilder.setNull(vh.column)
31
+ case _ =>
32
+ sys.error("unmatched types.")
33
+ }
34
+ }
35
+ pageBuilder.addRecord()
36
+ }
37
+
38
+ }
@@ -1,18 +1,16 @@
1
- package org.embulk.filter.key_in_redis.column
1
+ package org.embulk.filter.key_in_redis.row
2
2
 
3
3
  import java.security.MessageDigest
4
4
 
5
5
  import org.bouncycastle.util.encoders.Hex
6
6
  import org.embulk.filter.key_in_redis.json.JsonParser
7
- import org.embulk.spi.`type`._
8
- import org.embulk.spi.time.{Timestamp, TimestampFormatter}
7
+ import org.embulk.spi.time.TimestampFormatter
9
8
  import org.embulk.spi.{
10
9
  Column,
11
10
  PageBuilder,
12
11
  PageReader,
13
12
  ColumnVisitor => EmbulkColumnVisitor
14
13
  }
15
- import org.msgpack.value.Value
16
14
 
17
15
  case class SetValueColumnVisitor(reader: PageReader,
18
16
  timestampFormatter: TimestampFormatter,
@@ -84,8 +82,6 @@ case class SetValueColumnVisitor(reader: PageReader,
84
82
  result
85
83
  }
86
84
 
87
- case class ValueHolder[A](column: Column, value: Option[A])
88
-
89
85
  def put(column: Column, value: String): Unit = {
90
86
  if (parameterKeys.contains(column.getName)) {
91
87
  recordMap.put(column.getName, value)
@@ -93,41 +89,18 @@ case class SetValueColumnVisitor(reader: PageReader,
93
89
  ()
94
90
  }
95
91
 
96
- def addRecord(pageBuilder: PageBuilder): Unit = {
97
- valueHolderSet.foreach { vh =>
98
- vh.value match {
99
- case Some(v: Boolean) if vh.column.getType.isInstanceOf[BooleanType] =>
100
- pageBuilder.setBoolean(vh.column, v)
101
- case Some(v: Long) if vh.column.getType.isInstanceOf[LongType] =>
102
- pageBuilder.setLong(vh.column, v)
103
- case Some(v: Double) if vh.column.getType.isInstanceOf[DoubleType] =>
104
- pageBuilder.setDouble(vh.column, v)
105
- case Some(v: String) if vh.column.getType.isInstanceOf[StringType] =>
106
- pageBuilder.setString(vh.column, v)
107
- case Some(v: Timestamp)
108
- if vh.column.getType.isInstanceOf[TimestampType] =>
109
- pageBuilder.setTimestamp(vh.column, v)
110
- case Some(v: Value) if vh.column.getType.isInstanceOf[JsonType] =>
111
- pageBuilder.setJson(vh.column, v)
112
- case None =>
113
- pageBuilder.setNull(vh.column)
114
- case _ =>
115
- sys.error("unmatched types.")
116
- }
117
- }
118
- pageBuilder.addRecord()
119
- }
120
-
121
- def getMatchKey: String = {
92
+ lazy val matchKey: String = {
122
93
  val keys = sortedKeys
123
94
  .flatMap { key =>
124
95
  recordMap.get(key)
125
96
  }
126
97
  .mkString(appender)
127
-
128
98
  if (matchAsMd5) {
129
99
  Hex.toHexString(digestMd5.digest(keys.getBytes()))
130
100
  } else keys
131
101
  }
132
102
 
103
+ def getRow(pageBuilder: PageBuilder): Row =
104
+ Row(matchKey, valueHolderSet, pageBuilder)
105
+
133
106
  }
@@ -0,0 +1,5 @@
1
+ package org.embulk.filter.key_in_redis.row
2
+
3
+ import org.embulk.spi.Column
4
+
5
+ case class ValueHolder[A](column: Column, value: Option[A])
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-key_in_redis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - smdmts
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-23 00:00:00.000000000 Z
11
+ date: 2017-08-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -59,14 +59,19 @@ files:
59
59
  - project/build.properties
60
60
  - project/plugins.sbt
61
61
  - settings.gradle
62
+ - src/main/resources/application.conf
62
63
  - src/main/scala/org/embulk/filter/key_in_redis/KeyInRedisFilterPlugin.scala
63
64
  - src/main/scala/org/embulk/filter/key_in_redis/PageOutput.scala
64
65
  - src/main/scala/org/embulk/filter/key_in_redis/PluginTask.scala
65
- - src/main/scala/org/embulk/filter/key_in_redis/column/SetValueColumnVisitor.scala
66
+ - src/main/scala/org/embulk/filter/key_in_redis/TaskExtensionOps.scala
67
+ - src/main/scala/org/embulk/filter/key_in_redis/actor/Actors.scala
68
+ - src/main/scala/org/embulk/filter/key_in_redis/actor/Register.scala
66
69
  - src/main/scala/org/embulk/filter/key_in_redis/json/JsonParser.scala
67
70
  - src/main/scala/org/embulk/filter/key_in_redis/redis/Cache.scala
68
71
  - src/main/scala/org/embulk/filter/key_in_redis/redis/Redis.scala
69
- - src/main/scala/org/embulk/filter/key_in_redis/redis/TaskExtensionOps.scala
72
+ - src/main/scala/org/embulk/filter/key_in_redis/row/Row.scala
73
+ - src/main/scala/org/embulk/filter/key_in_redis/row/SetValueColumnVisitor.scala
74
+ - src/main/scala/org/embulk/filter/key_in_redis/row/ValueHolder.scala
70
75
  - src/test/scala/org/embulk/filter/key_in_redis/json/JsonParserSpec.scala
71
76
  - classpath/akka-actor_2.11-2.3.6.jar
72
77
  - classpath/bcpkix-jdk15on-1.57.jar
@@ -81,7 +86,7 @@ files:
81
86
  - classpath/circe-numbers_2.11-0.8.0.jar
82
87
  - classpath/circe-parser_2.11-0.8.0.jar
83
88
  - classpath/config-1.2.1.jar
84
- - classpath/embulk-filter-key_in_redis-0.1.2.jar
89
+ - classpath/embulk-filter-key_in_redis-0.1.3.jar
85
90
  - classpath/jawn-parser_2.11-0.10.4.jar
86
91
  - classpath/machinist_2.11-0.6.1.jar
87
92
  - classpath/macro-compat_2.11-1.1.1.jar