embulk-filter-key_in_redis 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/build.gradle +2 -2
- data/src/main/scala/org/embulk/filter/key_in_redis/PageOutput.scala +13 -26
- data/src/main/scala/org/embulk/filter/key_in_redis/column/SetValueColumnVisitor.scala +49 -6
- metadata +4 -5
- data/src/main/scala/org/embulk/filter/key_in_redis/column/PassthroughColumnVisitor.scala +0 -57
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c07a54c906730f2de5c0a2e32a1792cc0170877
|
4
|
+
data.tar.gz: 3cf0b84f743814a7ee32a35cb9a084eca9db287d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b51785e0dfe0a5007cea67fd60c96aaa9c805ec605b074db141310532418847cd07f4d38d5d6745ff9a5067f7f5500f0fda004212e9f88a22d9d98018d00b00
|
7
|
+
data.tar.gz: d73e095906c90aef7216fcba69a5f514a7871acde3d1b94ce07918f16cdfecb5ffa05b04bfe8485baf0f3ef62c408a6a11004864fd9a24e6facb87ddc7ef5508
|
data/README.md
CHANGED
@@ -28,7 +28,8 @@ This plugin is designed to extract data set diff files used with the combination
|
|
28
28
|
| redis_set_key | string | required | | redis of key of set name |
|
29
29
|
| load_on_memory | boolean | optional | "false" | load all data from redis *1 |
|
30
30
|
| appender | string | optional | "-" | multi key of appender |
|
31
|
-
| match_as_md5 | boolean
|
31
|
+
| match_as_md5 | boolean | optional | "false" | smembers the value to converted md5 |
|
32
|
+
| replica_hosts | hash: Map<String,Int> | optional | | list of replica redis servers host: port |
|
32
33
|
| key_with_index | hash: Map<Int,String> | required with key_with_index or json_key_with_index or only one || index with key name |
|
33
34
|
| json_key_with_index | hash: Map<Int,String> | required with key_with_index or json_key_with_index or only one || json columns's expanded key name |
|
34
35
|
| default_timezone | string | optional | UTC | |
|
data/build.gradle
CHANGED
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.2"
|
17
17
|
|
18
18
|
sourceCompatibility = 1.7
|
19
19
|
targetCompatibility = 1.7
|
@@ -73,7 +73,7 @@ Gem::Specification.new do |spec|
|
|
73
73
|
spec.description = %[Key In Redis]
|
74
74
|
spec.email = ["smdmts@gmail.com"]
|
75
75
|
spec.licenses = ["MIT"]
|
76
|
-
|
76
|
+
spec.homepage = "https://github.com/smdmts/embulk-filter-key_in_redis"
|
77
77
|
|
78
78
|
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
79
79
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
@@ -1,13 +1,9 @@
|
|
1
1
|
package org.embulk.filter.key_in_redis
|
2
2
|
|
3
|
-
import java.security.MessageDigest
|
4
|
-
|
5
3
|
import com.google.common.base.Optional
|
6
|
-
import org.bouncycastle.util.encoders.Hex
|
7
4
|
import org.embulk.filter.key_in_redis.column._
|
8
5
|
|
9
6
|
import scala.collection.mutable.ListBuffer
|
10
|
-
|
11
7
|
import scala.collection.JavaConverters._
|
12
8
|
import org.embulk.spi.time.TimestampFormatter
|
13
9
|
import org.embulk.spi.{
|
@@ -27,41 +23,32 @@ case class PageOutput(task: PluginTask,
|
|
27
23
|
def timestampFormatter(): TimestampFormatter =
|
28
24
|
new TimestampFormatter(task, Optional.absent())
|
29
25
|
|
30
|
-
val digestMd5: MessageDigest = MessageDigest.getInstance("MD5")
|
31
|
-
|
32
26
|
override def add(page: Page): Unit = {
|
33
|
-
val
|
34
|
-
|
35
|
-
val
|
36
|
-
while (
|
27
|
+
val baseReader: PageReader = new PageReader(schema)
|
28
|
+
baseReader.setPage(page)
|
29
|
+
val rows = new ListBuffer[SetValueColumnVisitor]()
|
30
|
+
while (baseReader.nextRecord()) {
|
37
31
|
val setValueVisitor = SetValueColumnVisitor(
|
38
|
-
|
32
|
+
baseReader,
|
39
33
|
timestampFormatter(),
|
40
34
|
task.getKeyWithIndex.asScala.toMap,
|
41
35
|
task.getJsonKeyWithIndex.asScala.toMap,
|
42
|
-
task.getAppender
|
36
|
+
task.getAppender,
|
37
|
+
task.getMatchAsMD5)
|
43
38
|
schema.visitColumns(setValueVisitor)
|
44
|
-
|
45
|
-
Hex.toHexString(digestMd5.digest(setValueVisitor.getValue.getBytes()))
|
46
|
-
} else setValueVisitor.getValue
|
47
|
-
handlerBuffer.append(
|
48
|
-
PageHandler(matchValue, PassthroughColumnVisitor(reader, pageBuilder)))
|
39
|
+
rows.append(setValueVisitor)
|
49
40
|
}
|
50
41
|
KeyInRedisFilterPlugin.redis.foreach { redis =>
|
51
|
-
val result = redis.exists(
|
52
|
-
|
53
|
-
if (!result(
|
54
|
-
|
55
|
-
value.visitor.addRecord()
|
42
|
+
val result = redis.exists(rows.map(_.getMatchKey))
|
43
|
+
rows.foreach { row =>
|
44
|
+
if (!result(row.getMatchKey)) {
|
45
|
+
row.addRecord(pageBuilder)
|
56
46
|
}
|
57
47
|
}
|
58
48
|
}
|
59
|
-
reader.close()
|
60
49
|
}
|
61
50
|
|
62
51
|
override def finish(): Unit = pageBuilder.finish()
|
63
52
|
override def close(): Unit = pageBuilder.close()
|
64
53
|
|
65
|
-
}
|
66
|
-
|
67
|
-
case class PageHandler(matchValue: String, visitor: PassthroughColumnVisitor)
|
54
|
+
}
|
@@ -1,22 +1,31 @@
|
|
1
1
|
package org.embulk.filter.key_in_redis.column
|
2
2
|
|
3
|
+
import java.security.MessageDigest
|
4
|
+
|
5
|
+
import org.bouncycastle.util.encoders.Hex
|
3
6
|
import org.embulk.filter.key_in_redis.json.JsonParser
|
4
|
-
import org.embulk.spi
|
7
|
+
import org.embulk.spi.`type`._
|
8
|
+
import org.embulk.spi.time.{Timestamp, TimestampFormatter}
|
5
9
|
import org.embulk.spi.{
|
6
10
|
Column,
|
11
|
+
PageBuilder,
|
7
12
|
PageReader,
|
8
13
|
ColumnVisitor => EmbulkColumnVisitor
|
9
14
|
}
|
15
|
+
import org.msgpack.value.Value
|
10
16
|
|
11
17
|
case class SetValueColumnVisitor(reader: PageReader,
|
12
18
|
timestampFormatter: TimestampFormatter,
|
13
19
|
keyMap: Map[String, String],
|
14
20
|
jsonKeyMap: Map[String, String],
|
15
|
-
appender: String
|
21
|
+
appender: String,
|
22
|
+
matchAsMd5: Boolean)
|
16
23
|
extends EmbulkColumnVisitor {
|
17
24
|
import scala.collection.mutable
|
18
25
|
private val recordMap = mutable.Map[String, String]()
|
26
|
+
private val valueHolderSet = mutable.Set[ValueHolder[_]]()
|
19
27
|
|
28
|
+
val digestMd5: MessageDigest = MessageDigest.getInstance("MD5")
|
20
29
|
val parameterKeys: Seq[String] = keyMap.values.toSeq
|
21
30
|
val jsonKeys: Seq[String] = jsonKeyMap.values.toSeq
|
22
31
|
val sortedKeys: List[String] = {
|
@@ -65,12 +74,17 @@ case class SetValueColumnVisitor(reader: PageReader,
|
|
65
74
|
put(column, v.toJson)
|
66
75
|
}
|
67
76
|
|
68
|
-
def value[A](column: Column, method: => (Column => A)): Option[A] =
|
69
|
-
if (reader.isNull(column)) {
|
77
|
+
def value[A](column: Column, method: => (Column => A)): Option[A] = {
|
78
|
+
val result = if (reader.isNull(column)) {
|
70
79
|
None
|
71
80
|
} else {
|
72
81
|
Some(method(column))
|
73
82
|
}
|
83
|
+
valueHolderSet.add(ValueHolder(column, result))
|
84
|
+
result
|
85
|
+
}
|
86
|
+
|
87
|
+
case class ValueHolder[A](column: Column, value: Option[A])
|
74
88
|
|
75
89
|
def put(column: Column, value: String): Unit = {
|
76
90
|
if (parameterKeys.contains(column.getName)) {
|
@@ -79,12 +93,41 @@ case class SetValueColumnVisitor(reader: PageReader,
|
|
79
93
|
()
|
80
94
|
}
|
81
95
|
|
82
|
-
def
|
83
|
-
|
96
|
+
def addRecord(pageBuilder: PageBuilder): Unit = {
|
97
|
+
valueHolderSet.foreach { vh =>
|
98
|
+
vh.value match {
|
99
|
+
case Some(v: Boolean) if vh.column.getType.isInstanceOf[BooleanType] =>
|
100
|
+
pageBuilder.setBoolean(vh.column, v)
|
101
|
+
case Some(v: Long) if vh.column.getType.isInstanceOf[LongType] =>
|
102
|
+
pageBuilder.setLong(vh.column, v)
|
103
|
+
case Some(v: Double) if vh.column.getType.isInstanceOf[DoubleType] =>
|
104
|
+
pageBuilder.setDouble(vh.column, v)
|
105
|
+
case Some(v: String) if vh.column.getType.isInstanceOf[StringType] =>
|
106
|
+
pageBuilder.setString(vh.column, v)
|
107
|
+
case Some(v: Timestamp)
|
108
|
+
if vh.column.getType.isInstanceOf[TimestampType] =>
|
109
|
+
pageBuilder.setTimestamp(vh.column, v)
|
110
|
+
case Some(v: Value) if vh.column.getType.isInstanceOf[JsonType] =>
|
111
|
+
pageBuilder.setJson(vh.column, v)
|
112
|
+
case None =>
|
113
|
+
pageBuilder.setNull(vh.column)
|
114
|
+
case _ =>
|
115
|
+
sys.error("unmatched types.")
|
116
|
+
}
|
117
|
+
}
|
118
|
+
pageBuilder.addRecord()
|
119
|
+
}
|
120
|
+
|
121
|
+
def getMatchKey: String = {
|
122
|
+
val keys = sortedKeys
|
84
123
|
.flatMap { key =>
|
85
124
|
recordMap.get(key)
|
86
125
|
}
|
87
126
|
.mkString(appender)
|
127
|
+
|
128
|
+
if (matchAsMd5) {
|
129
|
+
Hex.toHexString(digestMd5.digest(keys.getBytes()))
|
130
|
+
} else keys
|
88
131
|
}
|
89
132
|
|
90
133
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-key_in_redis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- smdmts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,7 +62,6 @@ files:
|
|
62
62
|
- src/main/scala/org/embulk/filter/key_in_redis/KeyInRedisFilterPlugin.scala
|
63
63
|
- src/main/scala/org/embulk/filter/key_in_redis/PageOutput.scala
|
64
64
|
- src/main/scala/org/embulk/filter/key_in_redis/PluginTask.scala
|
65
|
-
- src/main/scala/org/embulk/filter/key_in_redis/column/PassthroughColumnVisitor.scala
|
66
65
|
- src/main/scala/org/embulk/filter/key_in_redis/column/SetValueColumnVisitor.scala
|
67
66
|
- src/main/scala/org/embulk/filter/key_in_redis/json/JsonParser.scala
|
68
67
|
- src/main/scala/org/embulk/filter/key_in_redis/redis/Cache.scala
|
@@ -82,7 +81,7 @@ files:
|
|
82
81
|
- classpath/circe-numbers_2.11-0.8.0.jar
|
83
82
|
- classpath/circe-parser_2.11-0.8.0.jar
|
84
83
|
- classpath/config-1.2.1.jar
|
85
|
-
- classpath/embulk-filter-key_in_redis-0.1.
|
84
|
+
- classpath/embulk-filter-key_in_redis-0.1.2.jar
|
86
85
|
- classpath/jawn-parser_2.11-0.10.4.jar
|
87
86
|
- classpath/machinist_2.11-0.6.1.jar
|
88
87
|
- classpath/macro-compat_2.11-1.1.1.jar
|
@@ -95,7 +94,7 @@ files:
|
|
95
94
|
- classpath/scalaz-effect_2.11-7.2.14.jar
|
96
95
|
- classpath/shapeless_2.11-2.3.2.jar
|
97
96
|
- classpath/simulacrum_2.11-0.10.0.jar
|
98
|
-
homepage:
|
97
|
+
homepage: https://github.com/smdmts/embulk-filter-key_in_redis
|
99
98
|
licenses:
|
100
99
|
- MIT
|
101
100
|
metadata: {}
|
@@ -1,57 +0,0 @@
|
|
1
|
-
package org.embulk.filter.key_in_redis.column
|
2
|
-
|
3
|
-
import org.embulk.spi.{
|
4
|
-
Column,
|
5
|
-
PageBuilder,
|
6
|
-
PageReader,
|
7
|
-
ColumnVisitor => EmbulkColumnVisitor
|
8
|
-
}
|
9
|
-
|
10
|
-
case class PassthroughColumnVisitor(pageReader: PageReader,
|
11
|
-
pageBuilder: PageBuilder)
|
12
|
-
extends EmbulkColumnVisitor {
|
13
|
-
|
14
|
-
override def timestampColumn(column: Column): Unit =
|
15
|
-
if (pageReader.isNull(column)) {
|
16
|
-
pageBuilder.setNull(column)
|
17
|
-
} else {
|
18
|
-
pageBuilder.setTimestamp(column, pageReader.getTimestamp(column))
|
19
|
-
}
|
20
|
-
|
21
|
-
override def stringColumn(column: Column): Unit =
|
22
|
-
if (pageReader.isNull(column)) {
|
23
|
-
pageBuilder.setNull(column)
|
24
|
-
} else {
|
25
|
-
pageBuilder.setString(column, pageReader.getString(column))
|
26
|
-
}
|
27
|
-
|
28
|
-
override def longColumn(column: Column): Unit =
|
29
|
-
if (pageReader.isNull(column)) {
|
30
|
-
pageBuilder.setNull(column)
|
31
|
-
} else {
|
32
|
-
pageBuilder.setLong(column, pageReader.getLong(column))
|
33
|
-
}
|
34
|
-
|
35
|
-
override def doubleColumn(column: Column): Unit =
|
36
|
-
if (pageReader.isNull(column)) {
|
37
|
-
pageBuilder.setNull(column)
|
38
|
-
} else {
|
39
|
-
pageBuilder.setDouble(column, pageReader.getDouble(column))
|
40
|
-
}
|
41
|
-
|
42
|
-
override def booleanColumn(column: Column): Unit =
|
43
|
-
if (pageReader.isNull(column)) {
|
44
|
-
pageBuilder.setNull(column)
|
45
|
-
} else {
|
46
|
-
pageBuilder.setBoolean(column, pageReader.getBoolean(column))
|
47
|
-
}
|
48
|
-
|
49
|
-
override def jsonColumn(column: Column): Unit =
|
50
|
-
if (pageReader.isNull(column)) {
|
51
|
-
pageBuilder.setNull(column)
|
52
|
-
} else {
|
53
|
-
pageBuilder.setJson(column, pageReader.getJson(column))
|
54
|
-
}
|
55
|
-
|
56
|
-
def addRecord(): Unit = pageBuilder.addRecord()
|
57
|
-
}
|