embulk-output-aerospike 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dfe48bb0f3d7b1b99ea715a70fe95aaa59e4c11c
4
- data.tar.gz: 6f7e98a99ef16ba823ffac4116a52d54334eb28e
3
+ metadata.gz: bb2c68c087cb8d343a0a775e7d8207b2543c3538
4
+ data.tar.gz: 7c21ed64bcb6d35afad9a86641725d90360bf17c
5
5
  SHA512:
6
- metadata.gz: 0b85f656d793aed7e1f6fc68e877b261c11276ab224a71b065392aabbea7f75f64043f5e97dbba20ca00247e0193827c65fae7c1c4622061f60f00e3b7818efe
7
- data.tar.gz: b33ca3cfad0112dd8d358dd653f0c19d0f8e7ddbb3975819b2e1c7584cc0731930f4a15b7f5957cbc170b76548674641f7b106a9ef25a0ee1b2a7f65df5e60e6
6
+ metadata.gz: 7efe78f207491420be5db28ab336b7595b2e20a076bef531a9553bace433874d6bde16080acd41534ff709f7cdc4c837574e935f2f6a03cdef513e71520d718e
7
+ data.tar.gz: 7ffbdb387bba2251233f212f29d1b113054fe7475a15a9f5b1e41ab80f552519911c081d884e6aa54da6532faf109b077621fc4d9f6a1e358c61a7354a5b1aff
@@ -17,12 +17,13 @@ configurations {
17
17
  provided
18
18
  }
19
19
 
20
- version = "0.2.1"
20
+ version = "0.3.0"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.7.5"
24
24
  provided "org.embulk:embulk-core:0.7.5"
25
25
  compile 'org.scala-lang:scala-library:2.11.7'
26
+ compile 'org.scalaz.stream:scalaz-stream_2.11:0.8'
26
27
  compile 'com.github.tkrs:aerospiker-core_2.11:0.4.0-SNAPSHOT'
27
28
  compile 'com.github.tkrs:aerospiker-msgpack_2.11:0.4.0-SNAPSHOT'
28
29
  compile 'com.github.tkrs:aerospiker-task_2.11:0.4.0-SNAPSHOT'
@@ -4,10 +4,7 @@ import java.util.{ List => JList, Map => JMap }
4
4
 
5
5
  import com.google.common.base.Optional
6
6
  import org.embulk.config._
7
- import org.embulk.spi.Exec
8
- import org.embulk.spi.OutputPlugin
9
- import org.embulk.spi.Schema
10
- import org.embulk.spi.TransactionalPageOutput
7
+ import org.embulk.spi._
11
8
 
12
9
  object AerospikeOutputPlugin {
13
10
 
@@ -97,6 +94,7 @@ object AerospikeOutputPlugin {
97
94
  class AerospikeOutputPlugin extends OutputPlugin {
98
95
  import OutputPlugin._
99
96
  import AerospikeOutputPlugin._
97
+ import scala.collection.JavaConversions._
100
98
 
101
99
  def transaction(config: ConfigSource, schema: Schema, taskCount: Int, control: Control): ConfigDiff = {
102
100
  val task = config.loadConfig(classOf[PluginTask])
@@ -104,7 +102,7 @@ class AerospikeOutputPlugin extends OutputPlugin {
104
102
  Exec.newConfigDiff
105
103
  }
106
104
 
107
- def resume(taskSource: TaskSource, schema: Schema, taskCount: Int, control: OutputPlugin.Control): ConfigDiff =
105
+ def resume(taskSource: TaskSource, schema: Schema, taskCount: Int, control: Control): ConfigDiff =
108
106
  throw new UnsupportedOperationException("aerospike output plugin does not support resuming")
109
107
 
110
108
  def open(taskSource: TaskSource, schema: Schema, taskIndex: Int): TransactionalPageOutput =
@@ -1,5 +1,7 @@
1
1
  package org.embulk.output.aerospike
2
2
 
3
+ import java.util.concurrent.atomic.AtomicLong
4
+
3
5
  import aerospiker._
4
6
  import aerospiker.policy.{ ClientPolicy, WritePolicy }
5
7
  import aerospiker.task.Aerospike
@@ -7,29 +9,25 @@ import cats.data.Xor, Xor._
7
9
  import io.circe._, io.circe.syntax._
8
10
  import org.embulk.config.TaskReport
9
11
  import org.embulk.config.TaskSource
10
- import org.embulk.spi.Exec
11
- import org.embulk.spi.PageReader
12
- import org.embulk.spi.Schema
13
- import org.embulk.spi.TransactionalPageOutput
14
12
  import org.embulk.spi._
15
- import java.util.concurrent.atomic.AtomicLong
13
+ import org.embulk.spi.`type`.Type
14
+ import org.embulk.spi.time.Timestamp
16
15
 
17
- import scala.collection.mutable.{ Map => MMap }
16
+ import scala.collection.mutable.{ Map => MMap, ListBuffer }
18
17
  import scala.collection.JavaConversions._
19
-
20
- import org.slf4j.Logger
21
-
22
- import scalaz.{\/-, -\/}
18
+ import scalaz.concurrent.Task
19
+ import scalaz.stream._
23
20
 
24
21
  class AerospikePageOutput(taskSource: TaskSource, schema: Schema, taskIndex: Int) extends TransactionalPageOutput {
25
22
 
26
- import AerospikeOutputPlugin._
23
+ import org.embulk.output.aerospike.ops._
27
24
 
28
- private[this] val log: Logger = Exec.getLogger(classOf[AerospikePageOutput])
29
- private[this] val tsk: AerospikeOutputPlugin.PluginTask = taskSource.loadTask(classOf[AerospikeOutputPlugin.PluginTask])
30
- private[this] val successCount: AtomicLong = new AtomicLong
31
- private[this] val failCount: AtomicLong = new AtomicLong
25
+ import AerospikeOutputPlugin._
32
26
 
27
+ private[this] val log = Exec.getLogger(classOf[AerospikePageOutput])
28
+ private[this] val tsk = taskSource.loadTask(classOf[AerospikeOutputPlugin.PluginTask])
29
+ private[this] val successCount = new AtomicLong
30
+ private[this] val failCount = new AtomicLong
33
31
 
34
32
  private[this] val wp: WritePolicy = {
35
33
  if (tsk.getWritePolicy.isPresent) {
@@ -70,7 +68,6 @@ class AerospikePageOutput(taskSource: TaskSource, schema: Schema, taskIndex: Int
70
68
  override protected def namespace: String = tsk.getNamespace
71
69
  override protected def setName: String = tsk.getSetName
72
70
  }
73
- private[this] val reader: PageReader = new PageReader(schema)
74
71
 
75
72
  private[this] def toJson(a: Any): Json = a match {
76
73
  case v: Boolean => v.asJson
@@ -85,87 +82,110 @@ class AerospikePageOutput(taskSource: TaskSource, schema: Schema, taskIndex: Int
85
82
 
86
83
  implicit val encoder = Encoder.instance[Any](toJson)
87
84
 
88
- def add(page: Page) {
85
+ implicit private[this] val reader: PageReader = new PageReader(schema)
86
+
87
+ val createRecords: Page => Process[Task, Seq[Seq[Col]]] = { (page) =>
89
88
  reader.setPage(page)
90
- while (reader.nextRecord()) {
91
- val record: MMap[String, Any] = MMap.empty
92
- val schema = reader.getSchema
93
- schema.getColumns.foreach { col =>
94
- val name = col.getName
95
- col.getType.getName match {
96
- case "long" => if (!(reader isNull col))
97
- record += name -> reader.getLong(col)
98
- case "double" => if (!(reader isNull col))
99
- record += name -> reader.getDouble(col)
100
- case "timestamp" => if (!(reader isNull col))
101
- record += name -> reader.getTimestamp(col).toEpochMilli
102
- case "boolean" => if (!(reader isNull col))
103
- record += name -> reader.getBoolean(col)
104
- case "string" => if (!(reader isNull col)) {
105
- val cv = reader.getString(col)
106
- if (tsk.getSplitters.isPresent) {
107
- val sps = tsk.getSplitters.get.toMap
108
- sps.get(name) match {
109
- case None => //
110
- record += name -> cv
111
- case Some(v) =>
112
- val sep = v.getSeparator
113
- v.getElementType match {
114
- case "long" =>
115
- val x = cv.split(sep).map(s => if (s.isEmpty) "0" else s).map(_.toLong)
116
- record += name -> x
117
- case "double" =>
118
- val x = cv.split(sep).map(s => if (s.isEmpty) "0" else s).map(_.toDouble)
119
- record += name -> x
120
- case "string" =>
121
- val x = cv.split(sep)
122
- record += name -> x
123
- }
89
+ Process.eval {
90
+ Task.delay {
91
+ val records: ListBuffer[Seq[Col]] = ListBuffer.empty
92
+ while (reader.nextRecord())
93
+ records += (for (col <- schema.getColumns.toStream) yield Col of col).toSeq
94
+ records.toSeq
95
+ }
96
+ }
97
+ }
98
+
99
+ val toRecords: Seq[Seq[Col]] => Seq[Map[String, Any]] = _ map { row =>
100
+ val rec: MMap[String, Any] = MMap.empty
101
+ row foreach {
102
+ case DoubleColumn(i, n, v) => rec += n -> v
103
+ case LongColumn(i, n, v) => rec += n -> v
104
+ case StringColumn(i, n, v) =>
105
+ if (tsk.getSplitters.isPresent) {
106
+ val sps = tsk.getSplitters.get.toMap
107
+ sps.get(n) match {
108
+ case None => //
109
+ rec += n -> v
110
+ case Some(sp) =>
111
+ val sep = sp.getSeparator
112
+ sp.getElementType match {
113
+ case "long" =>
114
+ val x = v.split(sep).map(s => if (s.isEmpty) "0" else s).map(_.toLong)
115
+ rec += n -> x
116
+ case "double" =>
117
+ val x = v.split(sep).map(s => if (s.isEmpty) "0" else s).map(_.toDouble)
118
+ rec += n -> x
119
+ case "string" =>
120
+ val x = v.split(sep)
121
+ rec += n -> x
124
122
  }
125
- } else {
126
- record += name -> cv
127
- }
128
123
  }
129
- case typ => log.error(typ + "[?]")
124
+ } else {
125
+ rec += n -> v
130
126
  }
127
+ case BooleanColumn(i, n, v) => rec += n -> v
128
+ case TimestampColumn(i, n, v) => rec += n -> v
129
+ case NullColumn(i, n, t) => // nop
130
+ }
131
+ rec.toMap
132
+ }
133
+
134
+ val updater: Sink[Task, Seq[Map[String, Any]]] = sink.lift[Task, Seq[Map[String, Any]]] { records =>
135
+ val t = Task.gatherUnordered {
136
+ records map { record =>
137
+ val keyObj = record.getOrElse(tsk.getKeyName.get, "")
138
+ val deRec = record - tsk.getKeyName.get
139
+ if (tsk.getSingleBinName.isPresent)
140
+ aerospike.put(keyObj.toString, Map(tsk.getSingleBinName.get() -> deRec))
141
+ else
142
+ aerospike.put(keyObj.toString, deRec)
131
143
  }
132
- val keyObj = record.getOrElse(tsk.getKeyName.get, "")
133
- record -= tsk.getKeyName.get
134
- tsk.getCommand match {
135
- case "put" =>
136
- val t =
137
- if (tsk.getSingleBinName.isPresent)
138
- aerospike.put(keyObj.toString, Map(tsk.getSingleBinName.get() -> record.toMap))
139
- else
140
- aerospike.put(keyObj.toString, record.toMap)
141
- t runAsync {
142
- case -\/(e) => log.error(e.toString, e.getCause)
143
- case \/-(x) => x match {
144
- case Left(ex) =>
145
- failCount.addAndGet(1L)
146
- log.error(ex.toString, ex.getCause)
147
- case Right(v) =>
148
- successCount.addAndGet(1L)
149
- log.debug(v.toString)
150
- }
151
- }
152
- case "delete" =>
153
- val t = aerospike.delete(keyObj.toString)
154
- t runAsync {
155
- case -\/(e) => log.error(e.toString, e.getCause)
156
- case \/-(x) => x match {
157
- case Left(ex) =>
158
- failCount.addAndGet(1L)
159
- log.error(ex.toString, ex.getCause)
160
- case Right(v) =>
161
- successCount.addAndGet(1L)
162
- log.debug(v.toString)
163
- }
164
- }
144
+ } run
145
+
146
+ Task.delay {
147
+ for ( r <- t ) {
148
+ r match {
149
+ case Left(e) =>
150
+ log.error(e.toString, e)
151
+ failCount.addAndGet(1L)
152
+ case Right(_) =>
153
+ successCount.addAndGet(1L)
154
+ }
165
155
  }
166
156
  }
167
157
  }
168
158
 
159
+ val deleter: Sink[Task, Seq[Map[String, Any]]] = sink.lift[Task, Seq[Map[String, Any]]] { records =>
160
+ val t = Task.gatherUnordered {
161
+ records map { record =>
162
+ val keyObj = record.getOrElse(tsk.getKeyName.get, "")
163
+ aerospike.delete(keyObj.toString)
164
+ }
165
+ } run
166
+
167
+ Task.delay {
168
+ for ( r <- t ) {
169
+ r match {
170
+ case Left(e) =>
171
+ log.error(e.key, e)
172
+ failCount.addAndGet(1L)
173
+ case Right(_) =>
174
+ successCount.addAndGet(1L)
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ def add(page: Page) {
181
+ tsk.getCommand match {
182
+ case "put" =>
183
+ createRecords(page).takeWhile(_.nonEmpty).map(toRecords).to(updater).runLog.run
184
+ case "delete" =>
185
+ createRecords(page).takeWhile(_.nonEmpty).map(toRecords).to(deleter).runLog.run
186
+ }
187
+ }
188
+
169
189
  def finish(): Unit = log.info(s"finish ${tsk.getCommand} ok[${successCount.longValue}] ng[${failCount.longValue()}]")
170
190
 
171
191
  def close(): Unit = {
@@ -177,3 +197,33 @@ class AerospikePageOutput(taskSource: TaskSource, schema: Schema, taskIndex: Int
177
197
 
178
198
  def commit: TaskReport = Exec.newTaskReport
179
199
  }
200
+
201
+ object ops {
202
+
203
+ sealed trait Col
204
+
205
+ object Col {
206
+ def of(c: Column)(implicit r: PageReader) =
207
+ if (r isNull c) NullColumn(c.getIndex, c.getName, c.getType)
208
+ else c.getType.getName match {
209
+ case "string" =>
210
+ StringColumn(c.getIndex, c.getName, r.getString(c))
211
+ case "double" =>
212
+ DoubleColumn(c.getIndex, c.getName, r.getDouble(c))
213
+ case "long" =>
214
+ LongColumn(c.getIndex, c.getName, r.getLong(c))
215
+ case "boolean" =>
216
+ BooleanColumn(c.getIndex, c.getName, r.getBoolean(c))
217
+ case "timestamp" =>
218
+ TimestampColumn(c.getIndex, c.getName, r.getTimestamp(c))
219
+ }
220
+ }
221
+
222
+ case class DoubleColumn(index: Int, name: String, value: Double) extends Col
223
+ case class StringColumn(index: Int, name: String, value: String) extends Col
224
+ case class BooleanColumn(index: Int, name: String, value: Boolean) extends Col
225
+ case class TimestampColumn(index: Int, name: String, value: Timestamp) extends Col
226
+ case class LongColumn(index: Int, name: String, value: Long) extends Col
227
+ case class NullColumn(index: Int, name: String, typ: Type) extends Col
228
+
229
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-aerospike
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Takeru Sato
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-15 00:00:00.000000000 Z
11
+ date: 2015-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -77,7 +77,7 @@ files:
77
77
  - classpath/circe-jawn_2.11-0.1.1.jar
78
78
  - classpath/commons-math3-3.2.jar
79
79
  - classpath/discipline_2.11-0.3.jar
80
- - classpath/embulk-output-aerospike-0.2.1.jar
80
+ - classpath/embulk-output-aerospike-0.3.0.jar
81
81
  - classpath/gnu-crypto-2.0.1.jar
82
82
  - classpath/jawn-parser_2.11-0.8.0.jar
83
83
  - classpath/jbcrypt-0.3m.jar
@@ -94,9 +94,11 @@ files:
94
94
  - classpath/scala-reflect-2.11.2.jar
95
95
  - classpath/scala-xml_2.11-1.0.4.jar
96
96
  - classpath/scalacheck_2.11-1.12.4.jar
97
- - classpath/scalaz-concurrent_2.11-7.1.3.jar
98
- - classpath/scalaz-core_2.11-7.1.3.jar
99
- - classpath/scalaz-effect_2.11-7.1.3.jar
97
+ - classpath/scalaz-concurrent_2.11-7.1.4.jar
98
+ - classpath/scalaz-core_2.11-7.1.4.jar
99
+ - classpath/scalaz-effect_2.11-7.1.4.jar
100
+ - classpath/scalaz-stream_2.11-0.8.jar
101
+ - classpath/scodec-bits_2.11-1.0.9.jar
100
102
  - classpath/shapeless_2.11-2.2.5.jar
101
103
  - classpath/simulacrum_2.11-0.3.0.jar
102
104
  - classpath/test-interface-1.0.jar