embulk-filter-key_in_redis 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e093ef9f81cc900cf6eb92fd2a3b277f5e7340a
4
- data.tar.gz: 6310042c14d5179a8093d2d289b495adb6a43109
3
+ metadata.gz: 8c07a54c906730f2de5c0a2e32a1792cc0170877
4
+ data.tar.gz: 3cf0b84f743814a7ee32a35cb9a084eca9db287d
5
5
  SHA512:
6
- metadata.gz: 58ffdafa04ed57b99cd88638e72c03798ec91390f3a0ee26aa8956600753fced81f4f4b62f88b557399ed87496946b14f90b041bd7aa8032424830c440401f7a
7
- data.tar.gz: e6ad1f2a9993e9ea2b4003d04273fa1212ab4825ea91f24e665e42744d77d7440445aaf7930dfc9e66aff4ec96dc450bce2639bf3bde3af0f87a4ced54feebf9
6
+ metadata.gz: 5b51785e0dfe0a5007cea67fd60c96aaa9c805ec605b074db141310532418847cd07f4d38d5d6745ff9a5067f7f5500f0fda004212e9f88a22d9d98018d00b00
7
+ data.tar.gz: d73e095906c90aef7216fcba69a5f514a7871acde3d1b94ce07918f16cdfecb5ffa05b04bfe8485baf0f3ef62c408a6a11004864fd9a24e6facb87ddc7ef5508
data/README.md CHANGED
@@ -28,7 +28,8 @@ This plugin is designed to extract data set diff files used with the combination
28
28
  | redis_set_key | string | required | | redis of key of set name |
29
29
  | load_on_memory | boolean | optional | "false" | load all data from redis *1 |
30
30
  | appender | string | optional | "-" | multi key of appender |
31
- | match_as_md5 | boolean | optional | "false" | smembers the value to converted md5 |
31
+ | match_as_md5 | boolean | optional | "false" | smembers the value to converted md5 |
32
+ | replica_hosts | hash: Map<String,Int> | optional | | list of replica redis servers host: port |
32
33
  | key_with_index | hash: Map<Int,String> | required with key_with_index or json_key_with_index or only one || index with key name |
33
34
  | json_key_with_index | hash: Map<Int,String> | required with key_with_index or json_key_with_index or only one || json columns's expanded key name |
34
35
  | default_timezone | string | optional | UTC | |
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.1"
16
+ version = "0.1.2"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -73,7 +73,7 @@ Gem::Specification.new do |spec|
73
73
  spec.description = %[Key In Redis]
74
74
  spec.email = ["smdmts@gmail.com"]
75
75
  spec.licenses = ["MIT"]
76
- # TODO set this: spec.homepage = "https://github.com/smdmts/embulk-filter-key_in_redis"
76
+ spec.homepage = "https://github.com/smdmts/embulk-filter-key_in_redis"
77
77
 
78
78
  spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
79
79
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
@@ -1,13 +1,9 @@
1
1
  package org.embulk.filter.key_in_redis
2
2
 
3
- import java.security.MessageDigest
4
-
5
3
  import com.google.common.base.Optional
6
- import org.bouncycastle.util.encoders.Hex
7
4
  import org.embulk.filter.key_in_redis.column._
8
5
 
9
6
  import scala.collection.mutable.ListBuffer
10
-
11
7
  import scala.collection.JavaConverters._
12
8
  import org.embulk.spi.time.TimestampFormatter
13
9
  import org.embulk.spi.{
@@ -27,41 +23,32 @@ case class PageOutput(task: PluginTask,
27
23
  def timestampFormatter(): TimestampFormatter =
28
24
  new TimestampFormatter(task, Optional.absent())
29
25
 
30
- val digestMd5: MessageDigest = MessageDigest.getInstance("MD5")
31
-
32
26
  override def add(page: Page): Unit = {
33
- val reader: PageReader = new PageReader(schema)
34
- reader.setPage(page)
35
- val handlerBuffer = new ListBuffer[PageHandler]()
36
- while (reader.nextRecord()) {
27
+ val baseReader: PageReader = new PageReader(schema)
28
+ baseReader.setPage(page)
29
+ val rows = new ListBuffer[SetValueColumnVisitor]()
30
+ while (baseReader.nextRecord()) {
37
31
  val setValueVisitor = SetValueColumnVisitor(
38
- reader,
32
+ baseReader,
39
33
  timestampFormatter(),
40
34
  task.getKeyWithIndex.asScala.toMap,
41
35
  task.getJsonKeyWithIndex.asScala.toMap,
42
- task.getAppender)
36
+ task.getAppender,
37
+ task.getMatchAsMD5)
43
38
  schema.visitColumns(setValueVisitor)
44
- val matchValue = if (task.getMatchAsMD5) {
45
- Hex.toHexString(digestMd5.digest(setValueVisitor.getValue.getBytes()))
46
- } else setValueVisitor.getValue
47
- handlerBuffer.append(
48
- PageHandler(matchValue, PassthroughColumnVisitor(reader, pageBuilder)))
39
+ rows.append(setValueVisitor)
49
40
  }
50
41
  KeyInRedisFilterPlugin.redis.foreach { redis =>
51
- val result = redis.exists(handlerBuffer.map(_.matchValue))
52
- handlerBuffer.foreach { value =>
53
- if (!result(value.matchValue)) {
54
- schema.visitColumns(value.visitor)
55
- value.visitor.addRecord()
42
+ val result = redis.exists(rows.map(_.getMatchKey))
43
+ rows.foreach { row =>
44
+ if (!result(row.getMatchKey)) {
45
+ row.addRecord(pageBuilder)
56
46
  }
57
47
  }
58
48
  }
59
- reader.close()
60
49
  }
61
50
 
62
51
  override def finish(): Unit = pageBuilder.finish()
63
52
  override def close(): Unit = pageBuilder.close()
64
53
 
65
- }
66
-
67
- case class PageHandler(matchValue: String, visitor: PassthroughColumnVisitor)
54
+ }
@@ -1,22 +1,31 @@
1
1
  package org.embulk.filter.key_in_redis.column
2
2
 
3
+ import java.security.MessageDigest
4
+
5
+ import org.bouncycastle.util.encoders.Hex
3
6
  import org.embulk.filter.key_in_redis.json.JsonParser
4
- import org.embulk.spi.time.TimestampFormatter
7
+ import org.embulk.spi.`type`._
8
+ import org.embulk.spi.time.{Timestamp, TimestampFormatter}
5
9
  import org.embulk.spi.{
6
10
  Column,
11
+ PageBuilder,
7
12
  PageReader,
8
13
  ColumnVisitor => EmbulkColumnVisitor
9
14
  }
15
+ import org.msgpack.value.Value
10
16
 
11
17
  case class SetValueColumnVisitor(reader: PageReader,
12
18
  timestampFormatter: TimestampFormatter,
13
19
  keyMap: Map[String, String],
14
20
  jsonKeyMap: Map[String, String],
15
- appender: String)
21
+ appender: String,
22
+ matchAsMd5: Boolean)
16
23
  extends EmbulkColumnVisitor {
17
24
  import scala.collection.mutable
18
25
  private val recordMap = mutable.Map[String, String]()
26
+ private val valueHolderSet = mutable.Set[ValueHolder[_]]()
19
27
 
28
+ val digestMd5: MessageDigest = MessageDigest.getInstance("MD5")
20
29
  val parameterKeys: Seq[String] = keyMap.values.toSeq
21
30
  val jsonKeys: Seq[String] = jsonKeyMap.values.toSeq
22
31
  val sortedKeys: List[String] = {
@@ -65,12 +74,17 @@ case class SetValueColumnVisitor(reader: PageReader,
65
74
  put(column, v.toJson)
66
75
  }
67
76
 
68
- def value[A](column: Column, method: => (Column => A)): Option[A] =
69
- if (reader.isNull(column)) {
77
+ def value[A](column: Column, method: => (Column => A)): Option[A] = {
78
+ val result = if (reader.isNull(column)) {
70
79
  None
71
80
  } else {
72
81
  Some(method(column))
73
82
  }
83
+ valueHolderSet.add(ValueHolder(column, result))
84
+ result
85
+ }
86
+
87
+ case class ValueHolder[A](column: Column, value: Option[A])
74
88
 
75
89
  def put(column: Column, value: String): Unit = {
76
90
  if (parameterKeys.contains(column.getName)) {
@@ -79,12 +93,41 @@ case class SetValueColumnVisitor(reader: PageReader,
79
93
  ()
80
94
  }
81
95
 
82
- def getValue: String = {
83
- sortedKeys
96
+ def addRecord(pageBuilder: PageBuilder): Unit = {
97
+ valueHolderSet.foreach { vh =>
98
+ vh.value match {
99
+ case Some(v: Boolean) if vh.column.getType.isInstanceOf[BooleanType] =>
100
+ pageBuilder.setBoolean(vh.column, v)
101
+ case Some(v: Long) if vh.column.getType.isInstanceOf[LongType] =>
102
+ pageBuilder.setLong(vh.column, v)
103
+ case Some(v: Double) if vh.column.getType.isInstanceOf[DoubleType] =>
104
+ pageBuilder.setDouble(vh.column, v)
105
+ case Some(v: String) if vh.column.getType.isInstanceOf[StringType] =>
106
+ pageBuilder.setString(vh.column, v)
107
+ case Some(v: Timestamp)
108
+ if vh.column.getType.isInstanceOf[TimestampType] =>
109
+ pageBuilder.setTimestamp(vh.column, v)
110
+ case Some(v: Value) if vh.column.getType.isInstanceOf[JsonType] =>
111
+ pageBuilder.setJson(vh.column, v)
112
+ case None =>
113
+ pageBuilder.setNull(vh.column)
114
+ case _ =>
115
+ sys.error("unmatched types.")
116
+ }
117
+ }
118
+ pageBuilder.addRecord()
119
+ }
120
+
121
+ def getMatchKey: String = {
122
+ val keys = sortedKeys
84
123
  .flatMap { key =>
85
124
  recordMap.get(key)
86
125
  }
87
126
  .mkString(appender)
127
+
128
+ if (matchAsMd5) {
129
+ Hex.toHexString(digestMd5.digest(keys.getBytes()))
130
+ } else keys
88
131
  }
89
132
 
90
133
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-key_in_redis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - smdmts
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-21 00:00:00.000000000 Z
11
+ date: 2017-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,7 +62,6 @@ files:
62
62
  - src/main/scala/org/embulk/filter/key_in_redis/KeyInRedisFilterPlugin.scala
63
63
  - src/main/scala/org/embulk/filter/key_in_redis/PageOutput.scala
64
64
  - src/main/scala/org/embulk/filter/key_in_redis/PluginTask.scala
65
- - src/main/scala/org/embulk/filter/key_in_redis/column/PassthroughColumnVisitor.scala
66
65
  - src/main/scala/org/embulk/filter/key_in_redis/column/SetValueColumnVisitor.scala
67
66
  - src/main/scala/org/embulk/filter/key_in_redis/json/JsonParser.scala
68
67
  - src/main/scala/org/embulk/filter/key_in_redis/redis/Cache.scala
@@ -82,7 +81,7 @@ files:
82
81
  - classpath/circe-numbers_2.11-0.8.0.jar
83
82
  - classpath/circe-parser_2.11-0.8.0.jar
84
83
  - classpath/config-1.2.1.jar
85
- - classpath/embulk-filter-key_in_redis-0.1.1.jar
84
+ - classpath/embulk-filter-key_in_redis-0.1.2.jar
86
85
  - classpath/jawn-parser_2.11-0.10.4.jar
87
86
  - classpath/machinist_2.11-0.6.1.jar
88
87
  - classpath/macro-compat_2.11-1.1.1.jar
@@ -95,7 +94,7 @@ files:
95
94
  - classpath/scalaz-effect_2.11-7.2.14.jar
96
95
  - classpath/shapeless_2.11-2.3.2.jar
97
96
  - classpath/simulacrum_2.11-0.10.0.jar
98
- homepage:
97
+ homepage: https://github.com/smdmts/embulk-filter-key_in_redis
99
98
  licenses:
100
99
  - MIT
101
100
  metadata: {}
@@ -1,57 +0,0 @@
1
- package org.embulk.filter.key_in_redis.column
2
-
3
- import org.embulk.spi.{
4
- Column,
5
- PageBuilder,
6
- PageReader,
7
- ColumnVisitor => EmbulkColumnVisitor
8
- }
9
-
10
- case class PassthroughColumnVisitor(pageReader: PageReader,
11
- pageBuilder: PageBuilder)
12
- extends EmbulkColumnVisitor {
13
-
14
- override def timestampColumn(column: Column): Unit =
15
- if (pageReader.isNull(column)) {
16
- pageBuilder.setNull(column)
17
- } else {
18
- pageBuilder.setTimestamp(column, pageReader.getTimestamp(column))
19
- }
20
-
21
- override def stringColumn(column: Column): Unit =
22
- if (pageReader.isNull(column)) {
23
- pageBuilder.setNull(column)
24
- } else {
25
- pageBuilder.setString(column, pageReader.getString(column))
26
- }
27
-
28
- override def longColumn(column: Column): Unit =
29
- if (pageReader.isNull(column)) {
30
- pageBuilder.setNull(column)
31
- } else {
32
- pageBuilder.setLong(column, pageReader.getLong(column))
33
- }
34
-
35
- override def doubleColumn(column: Column): Unit =
36
- if (pageReader.isNull(column)) {
37
- pageBuilder.setNull(column)
38
- } else {
39
- pageBuilder.setDouble(column, pageReader.getDouble(column))
40
- }
41
-
42
- override def booleanColumn(column: Column): Unit =
43
- if (pageReader.isNull(column)) {
44
- pageBuilder.setNull(column)
45
- } else {
46
- pageBuilder.setBoolean(column, pageReader.getBoolean(column))
47
- }
48
-
49
- override def jsonColumn(column: Column): Unit =
50
- if (pageReader.isNull(column)) {
51
- pageBuilder.setNull(column)
52
- } else {
53
- pageBuilder.setJson(column, pageReader.getJson(column))
54
- }
55
-
56
- def addRecord(): Unit = pageBuilder.addRecord()
57
- }