embulk-filter-key_in_redis 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +27 -0
- data/README.md +6 -2
- data/build.gradle +4 -1
- data/build.sbt +3 -0
- data/src/main/scala/org/embulk/filter/key_in_redis/KeyInRedisFilterPlugin.scala +19 -5
- data/src/main/scala/org/embulk/filter/key_in_redis/PageOutput.scala +14 -6
- data/src/main/scala/org/embulk/filter/key_in_redis/PluginTask.scala +8 -0
- data/src/main/scala/org/embulk/filter/key_in_redis/redis/Cache.scala +11 -0
- data/src/main/scala/org/embulk/filter/key_in_redis/redis/Redis.scala +72 -8
- data/src/main/scala/org/embulk/filter/key_in_redis/redis/TaskExtensionOps.scala +23 -0
- metadata +10 -4
- data/src/test/scala/org/embulk/filter/key_in_redis/.gitkeep +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e093ef9f81cc900cf6eb92fd2a3b277f5e7340a
|
4
|
+
data.tar.gz: 6310042c14d5179a8093d2d289b495adb6a43109
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 58ffdafa04ed57b99cd88638e72c03798ec91390f3a0ee26aa8956600753fced81f4f4b62f88b557399ed87496946b14f90b041bd7aa8032424830c440401f7a
|
7
|
+
data.tar.gz: e6ad1f2a9993e9ea2b4003d04273fa1212ab4825ea91f24e665e42744d77d7440445aaf7930dfc9e66aff4ec96dc450bce2639bf3bde3af0f87a4ced54feebf9
|
@@ -0,0 +1,27 @@
|
|
1
|
+
version: 2
|
2
|
+
jobs:
|
3
|
+
build:
|
4
|
+
executorType: docker
|
5
|
+
docker:
|
6
|
+
- image: hseeberger/scala-sbt
|
7
|
+
working_directory: /root/embulk-filter-key_in_redis/
|
8
|
+
steps:
|
9
|
+
- checkout
|
10
|
+
- restore_cache:
|
11
|
+
name: Restoring Cache
|
12
|
+
keys:
|
13
|
+
- sbt
|
14
|
+
- setup_remote_docker
|
15
|
+
- run:
|
16
|
+
name: prepare
|
17
|
+
command: sbt update exit
|
18
|
+
- save_cache:
|
19
|
+
name: Saving Cache sbt
|
20
|
+
key: sbt
|
21
|
+
paths:
|
22
|
+
- "/root/.sbt"
|
23
|
+
- "/root/.ivy2"
|
24
|
+
- run:
|
25
|
+
name: compile
|
26
|
+
command: |
|
27
|
+
sbt compile test scalafmt::test exit
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Key In Redis filter plugin for Embulk
|
2
2
|
|
3
|
-
|
3
|
+
Filtering by aggregated the keys in included Redis's SET.
|
4
4
|
|
5
5
|
This plugin is designed to extract data set diff files used with the combination in below use cases.
|
6
6
|
|
@@ -25,8 +25,8 @@ This plugin is designed to extract data set diff files used with the combination
|
|
25
25
|
| host | string | optional | "127.0.0.1" | redis servers host |
|
26
26
|
| port | integer | optional | "6379" | redis servers port |
|
27
27
|
| db | integer | optional | "null" | redis servers db |
|
28
|
-
| flush_on_start | boolean | optional | "false" | flush on start specified redis servers db |
|
29
28
|
| redis_set_key | string | required | | redis of key of set name |
|
29
|
+
| load_on_memory | boolean | optional | "false" | load all data from redis *1 |
|
30
30
|
| appender | string | optional | "-" | multi key of appender |
|
31
31
|
| match_as_md5 | boolean | optional | "false" | smembers the value to converted md5 |
|
32
32
|
| key_with_index | hash: Map<Int,String> | required with key_with_index or json_key_with_index or only one || index with key name |
|
@@ -34,6 +34,10 @@ This plugin is designed to extract data set diff files used with the combination
|
|
34
34
|
| default_timezone | string | optional | UTC | |
|
35
35
|
| default_timestamp_format | string | optional | %Y-%m-%d %H:%M:%S.%6N | |
|
36
36
|
|
37
|
+
|
38
|
+
*1: load_on_memory mode requires JVM memory as all records stored on Redis.
|
39
|
+
|
40
|
+
|
37
41
|
## Example
|
38
42
|
|
39
43
|
- inside redis
|
data/build.gradle
CHANGED
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.1"
|
17
17
|
|
18
18
|
sourceCompatibility = 1.7
|
19
19
|
targetCompatibility = 1.7
|
@@ -21,11 +21,14 @@ targetCompatibility = 1.7
|
|
21
21
|
dependencies {
|
22
22
|
compile "org.embulk:embulk-core:0.8.26"
|
23
23
|
compile "org.scala-lang:scala-library:2.11.11"
|
24
|
+
compile group: 'com.github.pathikrit', name: 'better-files_2.11', version: '2.17.1'
|
24
25
|
compile group: 'io.circe', name: 'circe-core_2.11', version: '0.8.0'
|
25
26
|
compile group: 'io.circe', name: 'circe-generic_2.11', version: '0.8.0'
|
26
27
|
compile group: 'io.circe', name: 'circe-parser_2.11', version: '0.8.0'
|
27
28
|
compile group: 'com.github.etaty', name: 'rediscala_2.11', version: '1.7.0'
|
28
29
|
compile group: 'org.bouncycastle', name: 'bcpkix-jdk15on', version: '1.57'
|
30
|
+
compile group: 'org.scalaz', name: 'scalaz-core_2.11', version: '7.2.14'
|
31
|
+
compile group: 'org.scalaz', name: 'scalaz-concurrent_2.11', version: '7.2.14'
|
29
32
|
testCompile group: 'org.scalatest', name: 'scalatest_2.11', version: '3.0.1'
|
30
33
|
provided "org.embulk:embulk-core:0.8.26"
|
31
34
|
testCompile "junit:junit:4.+"
|
data/build.sbt
CHANGED
@@ -22,6 +22,9 @@ libraryDependencies ++= Seq(
|
|
22
22
|
"org.embulk" % "embulk-core" % "0.8.25",
|
23
23
|
"com.github.etaty" %% "rediscala" % "1.7.0",
|
24
24
|
"org.bouncycastle" % "bcpkix-jdk15on" % "1.57",
|
25
|
+
"com.github.pathikrit" %% "better-files" % "2.17.1",
|
26
|
+
"org.scalaz" %% "scalaz-core" % "7.2.14",
|
27
|
+
"org.scalaz" %% "scalaz-concurrent" % "7.2.14",
|
25
28
|
"io.circe" %% "circe-core" % circeVersion,
|
26
29
|
"io.circe" %% "circe-generic" % circeVersion,
|
27
30
|
"io.circe" %% "circe-parser" % circeVersion,
|
@@ -4,16 +4,22 @@ import org.embulk.config.{ConfigSource, TaskSource}
|
|
4
4
|
import org.embulk.filter.key_in_redis.redis.Redis
|
5
5
|
import org.embulk.spi
|
6
6
|
import org.embulk.spi._
|
7
|
+
import org.slf4j.Logger
|
8
|
+
|
9
|
+
import scala.collection.JavaConverters._
|
7
10
|
|
8
11
|
class KeyInRedisFilterPlugin extends FilterPlugin {
|
9
12
|
|
10
13
|
override def transaction(config: ConfigSource,
|
11
14
|
inputSchema: Schema,
|
12
15
|
control: FilterPlugin.Control): Unit = {
|
16
|
+
|
13
17
|
val task = config.loadConfig(classOf[PluginTask])
|
18
|
+
val taskSource = task.dump()
|
19
|
+
|
14
20
|
KeyInRedisFilterPlugin.createRedisInstance(task)
|
15
21
|
KeyInRedisFilterPlugin.redis.foreach(_.ping())
|
16
|
-
control.run(
|
22
|
+
control.run(taskSource, inputSchema)
|
17
23
|
KeyInRedisFilterPlugin.redis.foreach(_.close())
|
18
24
|
}
|
19
25
|
|
@@ -32,12 +38,20 @@ class KeyInRedisFilterPlugin extends FilterPlugin {
|
|
32
38
|
}
|
33
39
|
|
34
40
|
object KeyInRedisFilterPlugin {
|
41
|
+
lazy val cacheName = s"${this.getClass.getCanonicalName}-cache"
|
42
|
+
implicit val logger: Logger = Exec.getLogger(classOf[KeyInRedisFilterPlugin])
|
35
43
|
var redis: Option[Redis] = None
|
36
44
|
def createRedisInstance(task: PluginTask): Unit = {
|
37
45
|
KeyInRedisFilterPlugin.redis = Some(
|
38
|
-
Redis(
|
39
|
-
|
40
|
-
|
41
|
-
|
46
|
+
new Redis(
|
47
|
+
task.getRedisSetKey,
|
48
|
+
task.getHost,
|
49
|
+
task.getPort,
|
50
|
+
task.getReplicaHosts.asScala.toMap.mapValues(_.toInt), {
|
51
|
+
if (task.getDb.isPresent) Some(task.getDb.get())
|
52
|
+
else None
|
53
|
+
},
|
54
|
+
task.getLoadOnMemory
|
55
|
+
))
|
42
56
|
}
|
43
57
|
}
|
@@ -6,6 +6,8 @@ import com.google.common.base.Optional
|
|
6
6
|
import org.bouncycastle.util.encoders.Hex
|
7
7
|
import org.embulk.filter.key_in_redis.column._
|
8
8
|
|
9
|
+
import scala.collection.mutable.ListBuffer
|
10
|
+
|
9
11
|
import scala.collection.JavaConverters._
|
10
12
|
import org.embulk.spi.time.TimestampFormatter
|
11
13
|
import org.embulk.spi.{
|
@@ -30,6 +32,7 @@ case class PageOutput(task: PluginTask,
|
|
30
32
|
override def add(page: Page): Unit = {
|
31
33
|
val reader: PageReader = new PageReader(schema)
|
32
34
|
reader.setPage(page)
|
35
|
+
val handlerBuffer = new ListBuffer[PageHandler]()
|
33
36
|
while (reader.nextRecord()) {
|
34
37
|
val setValueVisitor = SetValueColumnVisitor(
|
35
38
|
reader,
|
@@ -41,12 +44,15 @@ case class PageOutput(task: PluginTask,
|
|
41
44
|
val matchValue = if (task.getMatchAsMD5) {
|
42
45
|
Hex.toHexString(digestMd5.digest(setValueVisitor.getValue.getBytes()))
|
43
46
|
} else setValueVisitor.getValue
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
handlerBuffer.append(
|
48
|
+
PageHandler(matchValue, PassthroughColumnVisitor(reader, pageBuilder)))
|
49
|
+
}
|
50
|
+
KeyInRedisFilterPlugin.redis.foreach { redis =>
|
51
|
+
val result = redis.exists(handlerBuffer.map(_.matchValue))
|
52
|
+
handlerBuffer.foreach { value =>
|
53
|
+
if (!result(value.matchValue)) {
|
54
|
+
schema.visitColumns(value.visitor)
|
55
|
+
value.visitor.addRecord()
|
50
56
|
}
|
51
57
|
}
|
52
58
|
}
|
@@ -57,3 +63,5 @@ case class PageOutput(task: PluginTask,
|
|
57
63
|
override def close(): Unit = pageBuilder.close()
|
58
64
|
|
59
65
|
}
|
66
|
+
|
67
|
+
case class PageHandler(matchValue: String, visitor: PassthroughColumnVisitor)
|
@@ -13,6 +13,10 @@ trait PluginTask extends Task with TimestampFormatter.Task {
|
|
13
13
|
@ConfigDefault("false")
|
14
14
|
def getMatchAsMD5: Boolean
|
15
15
|
|
16
|
+
@Config("load_on_memory")
|
17
|
+
@ConfigDefault("false")
|
18
|
+
def getLoadOnMemory: Boolean
|
19
|
+
|
16
20
|
@Config("key_with_index")
|
17
21
|
@ConfigDefault("{}")
|
18
22
|
def getKeyWithIndex: java.util.Map[String, String]
|
@@ -33,6 +37,10 @@ trait PluginTask extends Task with TimestampFormatter.Task {
|
|
33
37
|
@ConfigDefault("6379")
|
34
38
|
def getPort: Int
|
35
39
|
|
40
|
+
@Config("replica_hosts")
|
41
|
+
@ConfigDefault("{}")
|
42
|
+
def getReplicaHosts: java.util.Map[String, String]
|
43
|
+
|
36
44
|
@Config("db")
|
37
45
|
@ConfigDefault("null")
|
38
46
|
def getDb: Optional[Int]
|
@@ -0,0 +1,11 @@
|
|
1
|
+
package org.embulk.filter.key_in_redis.redis
|
2
|
+
|
3
|
+
import org.slf4j.Logger
|
4
|
+
|
5
|
+
import scala.collection.mutable
|
6
|
+
|
7
|
+
case class Cache(loadFromStorage: () => mutable.Set[String])(
|
8
|
+
implicit logger: Logger) {
|
9
|
+
private val cache = loadFromStorage()
|
10
|
+
def contains(value: String): Boolean = cache.contains(value)
|
11
|
+
}
|
@@ -1,16 +1,60 @@
|
|
1
1
|
package org.embulk.filter.key_in_redis.redis
|
2
2
|
|
3
|
-
import
|
3
|
+
import org.slf4j.Logger
|
4
|
+
import redis._
|
4
5
|
|
6
|
+
import scala.annotation.tailrec
|
5
7
|
import scala.concurrent.duration._
|
6
8
|
import scala.concurrent._
|
7
9
|
import scala.util._
|
8
10
|
|
9
|
-
|
11
|
+
import scala.collection.mutable
|
12
|
+
|
13
|
+
class Redis(setKey: String,
|
14
|
+
host: String,
|
15
|
+
port: Int,
|
16
|
+
replicaHosts: Map[String, Int],
|
17
|
+
db: Option[Int],
|
18
|
+
loadOnMemory: Boolean)(implicit logger: Logger) {
|
10
19
|
implicit val actorSystem = akka.actor.ActorSystem(
|
11
20
|
"redis-client",
|
12
21
|
classLoader = Some(this.getClass.getClassLoader))
|
13
|
-
|
22
|
+
|
23
|
+
lazy val cacheInstance: Option[Cache] = if (loadOnMemory) {
|
24
|
+
Some(Cache(() => loadAll()))
|
25
|
+
} else None
|
26
|
+
|
27
|
+
val redisServers: Seq[RedisClient] = {
|
28
|
+
val primary = RedisClient(host, port, db = db)
|
29
|
+
val replica = replicaHosts.map {
|
30
|
+
case (host: String, port: Int) =>
|
31
|
+
RedisClient(host, port, db = db)
|
32
|
+
}
|
33
|
+
Seq(primary) ++ replica.toSeq
|
34
|
+
}
|
35
|
+
|
36
|
+
def redis: RedisClient = Random.shuffle(redisServers).head
|
37
|
+
|
38
|
+
def loadAll(): mutable.Set[String] = {
|
39
|
+
logger.info(s"Loading from Redis start.")
|
40
|
+
import scala.concurrent.ExecutionContext.Implicits.global
|
41
|
+
import ToFutureExtensionOps._
|
42
|
+
val buffer = mutable.Set.empty[String]
|
43
|
+
@tailrec
|
44
|
+
def _scan(cursor: Int): Unit = {
|
45
|
+
val task = redis.sscan[String](setKey, cursor, Option(500)).toTask
|
46
|
+
val result = task.unsafePerformSync
|
47
|
+
result.data.foreach { v =>
|
48
|
+
buffer.add(v)
|
49
|
+
}
|
50
|
+
if (result.index != 0) {
|
51
|
+
_scan(result.index)
|
52
|
+
}
|
53
|
+
}
|
54
|
+
_scan(0)
|
55
|
+
logger.info(s"Loading from Redis finished. record size is ${buffer.size}")
|
56
|
+
buffer
|
57
|
+
}
|
14
58
|
|
15
59
|
def ping(): String = {
|
16
60
|
import scala.concurrent.ExecutionContext.Implicits.global
|
@@ -24,13 +68,33 @@ case class Redis(setKey: String, host: String, port: Int, db: Option[Int]) {
|
|
24
68
|
Await.result(s, 10.minute)
|
25
69
|
}
|
26
70
|
|
27
|
-
def exists(
|
28
|
-
|
29
|
-
|
71
|
+
def exists(values: Seq[String]): Map[String, Boolean] = cacheInstance match {
|
72
|
+
case Some(cached) =>
|
73
|
+
values.map { v =>
|
74
|
+
v -> cached.contains(v)
|
75
|
+
}.toMap
|
76
|
+
case None =>
|
77
|
+
import scala.concurrent.ExecutionContext.Implicits.global
|
78
|
+
import ToFutureExtensionOps._
|
79
|
+
val input = values.zipWithIndex.map(_.swap).toMap
|
80
|
+
val transaction = redis.transaction()
|
81
|
+
val f = values.map { v =>
|
82
|
+
transaction.sismember(setKey, v)
|
83
|
+
}
|
84
|
+
transaction.exec()
|
85
|
+
val results = Future
|
86
|
+
.sequence(f)
|
87
|
+
.toTask
|
88
|
+
.unsafePerformSync
|
89
|
+
.zipWithIndex
|
90
|
+
.map(_.swap)
|
91
|
+
.toMap
|
92
|
+
results.map {
|
93
|
+
case (index, result) =>
|
94
|
+
input(index) -> result
|
95
|
+
}
|
30
96
|
}
|
31
97
|
|
32
|
-
def nonExists(value: String): Boolean = !exists(value)
|
33
|
-
|
34
98
|
def close(): Unit = {
|
35
99
|
redis.stop()
|
36
100
|
// wait for stopping.
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package org.embulk.filter.key_in_redis.redis
|
2
|
+
|
3
|
+
import scala.concurrent.{ExecutionContext, Future}
|
4
|
+
import scala.util.{Failure, Success}
|
5
|
+
import scalaz._, Scalaz._
|
6
|
+
import scalaz.concurrent._
|
7
|
+
|
8
|
+
final class FutureExtensionOps[A](self: Future[A]) {
|
9
|
+
def toTask(implicit ec: ExecutionContext): Task[A] = Task.async { register =>
|
10
|
+
self.onComplete {
|
11
|
+
case Success(v) => register(v.right)
|
12
|
+
case Failure(ex) => register(ex.left)
|
13
|
+
}
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
trait ToFutureExtensionOps {
|
18
|
+
implicit def toFutureExtensionOps[A](
|
19
|
+
future: Future[A]): FutureExtensionOps[A] =
|
20
|
+
new FutureExtensionOps(future)
|
21
|
+
}
|
22
|
+
|
23
|
+
object ToFutureExtensionOps extends ToFutureExtensionOps
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-key_in_redis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- smdmts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,6 +45,7 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
+
- .circleci/config.yml
|
48
49
|
- .gitignore
|
49
50
|
- LICENSE.txt
|
50
51
|
- README.md
|
@@ -64,12 +65,14 @@ files:
|
|
64
65
|
- src/main/scala/org/embulk/filter/key_in_redis/column/PassthroughColumnVisitor.scala
|
65
66
|
- src/main/scala/org/embulk/filter/key_in_redis/column/SetValueColumnVisitor.scala
|
66
67
|
- src/main/scala/org/embulk/filter/key_in_redis/json/JsonParser.scala
|
68
|
+
- src/main/scala/org/embulk/filter/key_in_redis/redis/Cache.scala
|
67
69
|
- src/main/scala/org/embulk/filter/key_in_redis/redis/Redis.scala
|
68
|
-
- src/
|
70
|
+
- src/main/scala/org/embulk/filter/key_in_redis/redis/TaskExtensionOps.scala
|
69
71
|
- src/test/scala/org/embulk/filter/key_in_redis/json/JsonParserSpec.scala
|
70
72
|
- classpath/akka-actor_2.11-2.3.6.jar
|
71
73
|
- classpath/bcpkix-jdk15on-1.57.jar
|
72
74
|
- classpath/bcprov-jdk15on-1.57.jar
|
75
|
+
- classpath/better-files_2.11-2.17.1.jar
|
73
76
|
- classpath/cats-core_2.11-0.9.0.jar
|
74
77
|
- classpath/cats-kernel_2.11-0.9.0.jar
|
75
78
|
- classpath/cats-macros_2.11-0.9.0.jar
|
@@ -79,7 +82,7 @@ files:
|
|
79
82
|
- classpath/circe-numbers_2.11-0.8.0.jar
|
80
83
|
- classpath/circe-parser_2.11-0.8.0.jar
|
81
84
|
- classpath/config-1.2.1.jar
|
82
|
-
- classpath/embulk-filter-key_in_redis-0.1.
|
85
|
+
- classpath/embulk-filter-key_in_redis-0.1.1.jar
|
83
86
|
- classpath/jawn-parser_2.11-0.10.4.jar
|
84
87
|
- classpath/machinist_2.11-0.6.1.jar
|
85
88
|
- classpath/macro-compat_2.11-1.1.1.jar
|
@@ -87,6 +90,9 @@ files:
|
|
87
90
|
- classpath/scala-library-2.11.11.jar
|
88
91
|
- classpath/scala-reflect-2.11.8.jar
|
89
92
|
- classpath/scala-stm_2.11-0.7.jar
|
93
|
+
- classpath/scalaz-concurrent_2.11-7.2.14.jar
|
94
|
+
- classpath/scalaz-core_2.11-7.2.14.jar
|
95
|
+
- classpath/scalaz-effect_2.11-7.2.14.jar
|
90
96
|
- classpath/shapeless_2.11-2.3.2.jar
|
91
97
|
- classpath/simulacrum_2.11-0.10.0.jar
|
92
98
|
homepage:
|
File without changes
|