embulk-output-aerospike 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8780e59a53948dd4e2cff92443464abe410f0ef0
4
- data.tar.gz: c83b6b5257932f69fcb438fe4d554fde4763ba12
3
+ metadata.gz: 6c0883b7c72faacf208313cc1537682b1559cd9c
4
+ data.tar.gz: fe4bc433cfdddcad53d24f8bf7a8593b064a9bbd
5
5
  SHA512:
6
- metadata.gz: 84a5902bc21a65ffdcc3438bf926ec5b44f91bdbc0a4e3ab7fc4bbdf994bc5e4f080aeac8178c8a947c06d65a0415e353b871225a77d10732a6eb2ca7995028d
7
- data.tar.gz: 4261add7207fa9cdaf6683981724066de243c38b20055078f06b542d02867b321af6f03e947f9a84541d8dae9c2807b2e71008330cd94d8bb793b6b0a4fadef9
6
+ metadata.gz: b3488d2e5b20ef0612e48713125b9c426031f9fbc941198c2b24e175feb0e91dd4cf5d5343fa8eb235b8d8a424fe8cad358f79d1caf744128d15a23d1e7ed0df
7
+ data.tar.gz: 693adc8051a5b7d0ea519d43ed7623dd0ca778663ac48f8080ec84ec9ac3d61ceb1680166e1e06ef5a6fc741c6526d30895c73361cab368a908e5bee2ee69425
data/README.md CHANGED
@@ -1,6 +1,10 @@
1
1
  # Aerospike output plugin for Embulk
2
2
 
3
- Aerospike output plugins for Embulk loads records to databases using AerospikeJavaClient.
3
+ [![Gem Version](https://badge.fury.io/rb/embulk-output-aerospike.svg)](https://badge.fury.io/rb/embulk-output-aerospike)
4
+
5
+ Aerospike output plugins for Embulk loads records to databases using [aerospiker](https://github.com/tkrs/aerospiker).
6
+
7
+ And it is implementation of Scala :laughing:
4
8
 
5
9
  ## Overview
6
10
 
@@ -36,7 +40,6 @@ Aerospike output plugins for Embulk loads records to databases using AerospikeJa
36
40
  - **splitters**: key is column_name (hash, required)
37
41
  - **separator**: regexp for splitting separator (string, default: `,`)
38
42
  - **element_type**: to type of conversions for each elements. now supported type is string, long and double (string, default: `string`)
39
- - **parallel**: use parallel execute (boolean, default: `false`)
40
43
 
41
44
  ## Example
42
45
 
@@ -51,7 +54,6 @@ out:
51
54
  namespace: test
52
55
  set_name: set
53
56
  single_bin_name: record
54
- parallel: true
55
57
  splitters:
56
58
  column1: {separator: '\.*', element_type: string}
57
59
  column2: {separator: '\t', element_type: long}
@@ -75,7 +77,6 @@ out:
75
77
  generation: 0
76
78
  expiration: 64000
77
79
  send_key: true
78
- parallel: true
79
80
  ```
80
81
 
81
82
 
@@ -2,22 +2,30 @@ plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
+ id 'scala'
5
6
  }
6
7
  import com.github.jrubygradle.JRubyExec
8
+
7
9
  repositories {
8
10
  mavenCentral()
9
11
  jcenter()
12
+ maven {
13
+ url "https://oss.sonatype.org/content/repositories/snapshots/"
14
+ }
10
15
  }
11
16
  configurations {
12
17
  provided
13
18
  }
14
19
 
15
- version = "0.1.0"
20
+ version = "0.2.0"
16
21
 
17
22
  dependencies {
18
23
  compile "org.embulk:embulk-core:0.7.5"
19
24
  provided "org.embulk:embulk-core:0.7.5"
20
- compile 'com.aerospike:aerospike-client:3.1.6'
25
+ compile 'org.scala-lang:scala-library:2.11.7'
26
+ compile 'com.github.tkrs:aerospiker-core_2.11:0.4.0-SNAPSHOT'
27
+ compile 'com.github.tkrs:aerospiker-msgpack_2.11:0.4.0-SNAPSHOT'
28
+ compile 'com.github.tkrs:aerospiker-task_2.11:0.4.0-SNAPSHOT'
21
29
  testCompile "junit:junit:4.+"
22
30
  }
23
31
 
@@ -0,0 +1,115 @@
1
+ package org.embulk.output.aerospike
2
+
3
+ import java.util.{ List => JList, Map => JMap }
4
+
5
+ import com.google.common.base.Optional
6
+ import org.embulk.config._
7
+ import org.embulk.spi.Exec
8
+ import org.embulk.spi.OutputPlugin
9
+ import org.embulk.spi.Schema
10
+ import org.embulk.spi.TransactionalPageOutput
11
+
12
+ object AerospikeOutputPlugin {
13
+
14
+ trait PluginTask extends Task {
15
+
16
+ @Config("hosts") def getHosts: JList[AerospikeOutputPlugin.HostTask]
17
+
18
+ @Config("command") def getCommand: String
19
+
20
+ @Config("namespace") def getNamespace: String
21
+
22
+ @Config("set_name") def getSetName: String
23
+
24
+ @Config("key_name")
25
+ @ConfigDefault("key") def getKeyName: Optional[String]
26
+
27
+ @Config("client_policy")
28
+ @ConfigDefault("null") def getClientPolicy: Optional[AerospikeOutputPlugin.ClientPolicyTask]
29
+
30
+ @Config("write_policy")
31
+ @ConfigDefault("null") def getWritePolicy: Optional[AerospikeOutputPlugin.WritePolicyTask]
32
+
33
+ @Config("single_bin_name")
34
+ @ConfigDefault("null") def getSingleBinName: Optional[String]
35
+
36
+ @Config("splitters")
37
+ @ConfigDefault("null") def getSplitters: Optional[JMap[String, AerospikeOutputPlugin.SplitterTask]]
38
+
39
+ }
40
+
41
+ trait SplitterTask extends Task {
42
+ @Config("separator")
43
+ @ConfigDefault(",") def getSeparator: String
44
+
45
+ @Config("element_type")
46
+ @ConfigDefault("string") def getElementType: String
47
+ }
48
+
49
+ trait HostTask extends Task {
50
+ @Config("name") def getName: String
51
+
52
+ @Config("port") def getPort: Int
53
+ }
54
+
55
+ trait ClientPolicyTask extends Task {
56
+ @Config("user")
57
+ @ConfigDefault("null") def getUser: Optional[String]
58
+
59
+ @Config("password")
60
+ @ConfigDefault("null") def getPassword: Optional[String]
61
+
62
+ @Config("timeout")
63
+ @ConfigDefault("0") def getTimeout: Optional[Integer]
64
+
65
+ @Config("max_threads")
66
+ @ConfigDefault("300") def getMaxThreads: Optional[Integer]
67
+
68
+ @Config("max_socket_idle")
69
+ @ConfigDefault("14") def getMaxSocketIdle: Optional[Integer]
70
+
71
+ @Config("tend_interval")
72
+ @ConfigDefault("1000") def getTendInterval: Optional[Integer]
73
+
74
+ @Config("fail_if_not_connected")
75
+ @ConfigDefault("true") def getFailIfNotConnected: Optional[Boolean]
76
+ }
77
+
78
+ trait WritePolicyTask extends Task {
79
+ @Config("generation")
80
+ @ConfigDefault("null") def getGeneration: Optional[Integer]
81
+
82
+ @Config("expiration")
83
+ @ConfigDefault("0") def getExpiration: Optional[Integer]
84
+
85
+ @Config("max_retries")
86
+ @ConfigDefault("0") def getMaxRetries: Optional[Integer]
87
+
88
+ @Config("send_key")
89
+ @ConfigDefault("false") def getSendKey: Optional[Boolean]
90
+
91
+ @Config("sleep_between_retries")
92
+ @ConfigDefault("0") def getSleepBetweenRetries: Optional[Integer]
93
+ }
94
+
95
+ }
96
+
97
+ class AerospikeOutputPlugin extends OutputPlugin {
98
+ import OutputPlugin._
99
+ import AerospikeOutputPlugin._
100
+
101
+ def transaction(config: ConfigSource, schema: Schema, taskCount: Int, control: Control): ConfigDiff = {
102
+ val task = config.loadConfig(classOf[PluginTask])
103
+ control.run(task.dump)
104
+ Exec.newConfigDiff
105
+ }
106
+
107
+ def resume(taskSource: TaskSource, schema: Schema, taskCount: Int, control: OutputPlugin.Control): ConfigDiff =
108
+ throw new UnsupportedOperationException("aerospike output plugin does not support resuming")
109
+
110
+ def open(taskSource: TaskSource, schema: Schema, taskIndex: Int): TransactionalPageOutput =
111
+ new AerospikePageOutput(taskSource, schema, taskIndex)
112
+
113
+ override def cleanup(taskSource: TaskSource, schema: Schema, taskCount: Int, successTaskReports: JList[TaskReport]): Unit = {
114
+ }
115
+ }
@@ -0,0 +1,179 @@
1
+ package org.embulk.output.aerospike
2
+
3
+ import aerospiker._
4
+ import aerospiker.policy.{ ClientPolicy, WritePolicy }
5
+ import aerospiker.task.Aerospike
6
+ import cats.data.Xor, Xor._
7
+ import io.circe._, io.circe.syntax._
8
+ import org.embulk.config.TaskReport
9
+ import org.embulk.config.TaskSource
10
+ import org.embulk.spi.Exec
11
+ import org.embulk.spi.PageReader
12
+ import org.embulk.spi.Schema
13
+ import org.embulk.spi.TransactionalPageOutput
14
+ import org.embulk.spi._
15
+ import java.util.concurrent.atomic.AtomicLong
16
+
17
+ import scala.collection.mutable.{ Map => MMap }
18
+ import scala.collection.JavaConversions._
19
+
20
+ import org.slf4j.Logger
21
+
22
+ import scalaz.{\/-, -\/}
23
+
24
+ class AerospikePageOutput(taskSource: TaskSource, schema: Schema, taskIndex: Int) extends TransactionalPageOutput {
25
+
26
+ import AerospikeOutputPlugin._
27
+
28
+ private[this] val log: Logger = Exec.getLogger(classOf[AerospikePageOutput])
29
+ private[this] val tsk: AerospikeOutputPlugin.PluginTask = taskSource.loadTask(classOf[AerospikeOutputPlugin.PluginTask])
30
+ private[this] val successCount: AtomicLong = new AtomicLong
31
+ private[this] val failCount: AtomicLong = new AtomicLong
32
+
33
+
34
+ private[this] val wp: WritePolicy = {
35
+ if (tsk.getWritePolicy.isPresent) {
36
+ val wpTask: WritePolicyTask = tsk.getWritePolicy.get
37
+ WritePolicy(
38
+ sendKey = wpTask.getSendKey.get,
39
+ expiration = wpTask.getExpiration.get,
40
+ maxRetries = wpTask.getMaxRetries.get,
41
+ generation = wpTask.getGeneration.get,
42
+ sleepBetweenRetries = wpTask.getSleepBetweenRetries.get
43
+ )
44
+ } else {
45
+ WritePolicy()
46
+ }
47
+ }
48
+
49
+ implicit val policy: ClientPolicy = {
50
+ if (tsk.getClientPolicy.isPresent) {
51
+ val cpTask: ClientPolicyTask = tsk.getClientPolicy.get
52
+ ClientPolicy(
53
+ failIfNotConnected = cpTask.getFailIfNotConnected.get,
54
+ maxThreads = cpTask.getMaxThreads.get,
55
+ maxSocketIdle = cpTask.getMaxSocketIdle.get,
56
+ password = cpTask.getPassword.orNull,
57
+ user = cpTask.getUser.orNull,
58
+ timeout = cpTask.getTimeout.get,
59
+ tendInterval = cpTask.getTendInterval.get,
60
+ writePolicyDefault = wp
61
+ )
62
+ } else {
63
+ ClientPolicy(writePolicyDefault = wp)
64
+ }
65
+ }
66
+
67
+ private[this] val hosts: Seq[Host] = tsk.getHosts.map(host => new Host(host.getName, host.getPort))
68
+ private[this] val executor = AsyncCommandExecutor(AsyncClient(hosts: _*))
69
+ private[this] val aerospike = new Aerospike(executor) {
70
+ override protected def namespace: String = tsk.getNamespace
71
+ override protected def setName: String = tsk.getSetName
72
+ }
73
+ private[this] val reader: PageReader = new PageReader(schema)
74
+
75
+ private[this] def toJson(a: Any): Json = a match {
76
+ case v: Boolean => v.asJson
77
+ case v: Int => v.asJson
78
+ case v: Long => v.asJson
79
+ case v: Double => v.asJson
80
+ case v: String => v.asJson
81
+ case v: Seq[Any] => Json.array(v.map(x => toJson(x)): _*)
82
+ case v: Map[String, Any] => Json.fromFields(v.map { case (k, va) => (k, toJson(va)) } toSeq)
83
+ case _ => ???
84
+ }
85
+
86
+ implicit val encoder = Encoder.instance[Any](toJson)
87
+
88
+ def add(page: Page) {
89
+ reader.setPage(page)
90
+ while (reader.nextRecord()) {
91
+ val record: MMap[String, Any] = MMap.empty
92
+ val schema = reader.getSchema
93
+ schema.getColumns.foreach { col =>
94
+ val name = col.getName
95
+ col.getType.getName match {
96
+ case "long" => if (!(reader isNull col))
97
+ record += name -> reader.getLong(col)
98
+ case "double" => if (!(reader isNull col))
99
+ record += name -> reader.getDouble(col)
100
+ case "timestamp" => if (!(reader isNull col))
101
+ record += name -> reader.getTimestamp(col).toEpochMilli
102
+ case "boolean" => if (!(reader isNull col))
103
+ record += name -> reader.getBoolean(col)
104
+ case "string" => if (!(reader isNull col)) {
105
+ val cv = reader.getString(col)
106
+ if (tsk.getSplitters.isPresent) {
107
+ val sps = tsk.getSplitters.get.toMap
108
+ sps.get(name) match {
109
+ case None => //
110
+ record += name -> cv
111
+ case Some(v) =>
112
+ val sep = v.getSeparator
113
+ v.getElementType match {
114
+ case "long" =>
115
+ val x = cv.split(sep).map(s => if (s.isEmpty) "0" else s).map(_.toLong)
116
+ record += name -> x
117
+ case "double" =>
118
+ val x = cv.split(sep).map(s => if (s.isEmpty) "0" else s).map(_.toDouble)
119
+ record += name -> x
120
+ case "string" =>
121
+ val x = cv.split(sep)
122
+ record += name -> x
123
+ }
124
+ }
125
+ } else {
126
+ record += name -> cv
127
+ }
128
+ }
129
+ case typ => log.error(typ + "[?]")
130
+ }
131
+ }
132
+ val keyObj = record.getOrElse(tsk.getKeyName.get, "")
133
+ record -= tsk.getKeyName.get
134
+ tsk.getCommand match {
135
+ case "put" =>
136
+ val t =
137
+ if (tsk.getSingleBinName.isPresent)
138
+ aerospike.put(keyObj.toString, Map(tsk.getSingleBinName.get() -> record.toMap))
139
+ else
140
+ aerospike.put(keyObj.toString, record.toMap)
141
+ t runAsync {
142
+ case -\/(e) => log.error(e.toString, e.getCause)
143
+ case \/-(x) => x match {
144
+ case Left(ex) =>
145
+ failCount.addAndGet(1L)
146
+ log.error(ex.toString, ex.getCause)
147
+ case Right(v) =>
148
+ successCount.addAndGet(1L)
149
+ log.debug(v.toString)
150
+ }
151
+ }
152
+ case "delete" =>
153
+ val t = aerospike.delete(keyObj.toString)
154
+ t runAsync {
155
+ case -\/(e) => log.error(e.toString, e.getCause)
156
+ case \/-(x) => x match {
157
+ case Left(ex) =>
158
+ failCount.addAndGet(1L)
159
+ log.error(ex.toString, ex.getCause)
160
+ case Right(v) =>
161
+ successCount.addAndGet(1L)
162
+ log.debug(v.toString)
163
+ }
164
+ }
165
+ }
166
+ }
167
+ }
168
+
169
+ def finish(): Unit = log.info(s"finish ${tsk.getCommand} ok[${successCount.longValue}] ng[${failCount.longValue()}]")
170
+
171
+ def close(): Unit = {
172
+ reader.close()
173
+ executor.close
174
+ }
175
+
176
+ def abort(): Unit = log.error(s"abort ${tsk.getCommand} ok[${successCount.longValue}] ng[${failCount.longValue()}]")
177
+
178
+ def commit: TaskReport = Exec.newTaskReport
179
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-aerospike
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Takeru Sato
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-14 00:00:00.000000000 Z
11
+ date: 2015-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -49,20 +49,57 @@ files:
49
49
  - LICENSE.txt
50
50
  - README.md
51
51
  - build.gradle
52
- - embulk-output-aerospike.iml
53
52
  - gradle/wrapper/gradle-wrapper.jar
54
53
  - gradle/wrapper/gradle-wrapper.properties
55
54
  - gradlew
56
55
  - gradlew.bat
57
56
  - lib/embulk/output/aerospike.rb
58
- - src/main/java/org/embulk/output/aerospike/AerospikeOutputPlugin.java
59
- - src/main/java/org/embulk/output/aerospike/AerospikePageOutput.java
57
+ - src/main/scala/org/embulk/output/aerospike/AerospikeOutputPlugin.scala
58
+ - src/main/scala/org/embulk/output/aerospike/AerospikePageOutput.scala
60
59
  - src/test/java/org/embulk/output/aerospike/TestAerospikeOutputPlugin.java
61
- - classpath/aerospike-client-3.1.6.jar
62
- - classpath/embulk-output-aerospike-0.1.0.jar
60
+ - classpath/aerospike-client-3.1.4.jar
61
+ - classpath/aerospiker-core_2.11-0.4.0-SNAPSHOT.jar
62
+ - classpath/aerospiker-msgpack_2.11-0.4.0-SNAPSHOT.jar
63
+ - classpath/aerospiker-task_2.11-0.4.0-SNAPSHOT.jar
64
+ - classpath/algebra-laws_2.11-0.2.1.jar
65
+ - classpath/algebra-std_2.11-0.2.1.jar
66
+ - classpath/algebra_2.11-0.2.1.jar
67
+ - classpath/asm-5.0.3.jar
68
+ - classpath/cats-core_2.11-0.1.2.jar
69
+ - classpath/cats-free_2.11-0.1.2.jar
70
+ - classpath/cats-laws_2.11-0.1.2.jar
71
+ - classpath/cats-macros_2.11-0.1.2.jar
72
+ - classpath/cats-state_2.11-0.1.2.jar
73
+ - classpath/cats-std_2.11-0.1.2.jar
74
+ - classpath/cats_2.11-0.1.2.jar
75
+ - classpath/circe-core_2.11-0.1.1.jar
76
+ - classpath/circe-generic_2.11-0.1.1.jar
77
+ - classpath/circe-jawn_2.11-0.1.1.jar
78
+ - classpath/commons-math3-3.2.jar
79
+ - classpath/discipline_2.11-0.3.jar
80
+ - classpath/embulk-output-aerospike-0.2.0.jar
63
81
  - classpath/gnu-crypto-2.0.1.jar
82
+ - classpath/jawn-parser_2.11-0.8.0.jar
64
83
  - classpath/jbcrypt-0.3m.jar
84
+ - classpath/jmh-core-1.9.1.jar
85
+ - classpath/jmh-generator-asm-1.9.1.jar
86
+ - classpath/jmh-generator-bytecode-1.9.1.jar
87
+ - classpath/jmh-generator-reflection-1.9.1.jar
88
+ - classpath/jopt-simple-4.6.jar
65
89
  - classpath/luaj-jse-3.0.jar
90
+ - classpath/machinist_2.11-0.3.0.jar
91
+ - classpath/scala-library-2.11.7.jar
92
+ - classpath/scala-logging_2.11-3.1.0.jar
93
+ - classpath/scala-parser-combinators_2.11-1.0.4.jar
94
+ - classpath/scala-reflect-2.11.2.jar
95
+ - classpath/scala-xml_2.11-1.0.4.jar
96
+ - classpath/scalacheck_2.11-1.12.4.jar
97
+ - classpath/scalaz-concurrent_2.11-7.1.3.jar
98
+ - classpath/scalaz-core_2.11-7.1.3.jar
99
+ - classpath/scalaz-effect_2.11-7.1.3.jar
100
+ - classpath/shapeless_2.11-2.2.5.jar
101
+ - classpath/simulacrum_2.11-0.3.0.jar
102
+ - classpath/test-interface-1.0.jar
66
103
  homepage: https://github.com/tkrs/embulk-output-aerospike
67
104
  licenses:
68
105
  - MIT
@@ -1,168 +0,0 @@
1
- package org.embulk.output.aerospike;
2
-
3
- import com.google.common.base.Optional;
4
- import org.embulk.config.*;
5
- import org.embulk.spi.Exec;
6
- import org.embulk.spi.OutputPlugin;
7
- import org.embulk.spi.Schema;
8
- import org.embulk.spi.TransactionalPageOutput;
9
-
10
- import java.util.List;
11
- import java.util.Map;
12
-
13
- public class AerospikeOutputPlugin
14
- implements OutputPlugin
15
- {
16
- @Override
17
- public ConfigDiff transaction(ConfigSource config,
18
- Schema schema, int taskCount,
19
- OutputPlugin.Control control) {
20
- PluginTask task = config.loadConfig(PluginTask.class);
21
-
22
- // retryable (idempotent) output:
23
- // return resume(task.dump(), schema, taskCount, control);
24
-
25
- // non-retryable (non-idempotent) output:
26
- control.run(task.dump());
27
- return Exec.newConfigDiff();
28
- }
29
-
30
- @Override
31
- public ConfigDiff resume(TaskSource taskSource,
32
- Schema schema, int taskCount,
33
- OutputPlugin.Control control)
34
- {
35
- throw new UnsupportedOperationException("aerospike output plugin does not support resuming");
36
- }
37
-
38
- @Override
39
- public void cleanup(TaskSource taskSource,
40
- Schema schema, int taskCount,
41
- List<TaskReport> successTaskReports) {
42
- }
43
-
44
- @Override
45
- public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int taskIndex) {
46
- return new AerospikePageOutput(taskSource, schema, taskIndex);
47
- }
48
-
49
- public interface PluginTask
50
- extends Task {
51
- @Config("hosts")
52
- List<HostTask> getHost();
53
-
54
- @Config("command")
55
- String getCommand();
56
-
57
- @Config("namespace")
58
- String getNamespace();
59
-
60
- @Config("set_name")
61
- String getSetName();
62
-
63
- @Config("key_name")
64
- @ConfigDefault("key")
65
- Optional<String> getKeyName();
66
-
67
- @Config("client_policy")
68
- @ConfigDefault("null")
69
- Optional<ClientPolicyTask> getClientPolicy();
70
-
71
- @Config("write_policy")
72
- @ConfigDefault("null")
73
- Optional<WritePolicyTask> getWritePolicy();
74
-
75
- @Config("single_bin_name")
76
- @ConfigDefault("null")
77
- Optional<String> getSingleBinName();
78
-
79
- @Config("splitters")
80
- @ConfigDefault("null")
81
- Optional<Map<String, SplitterTask>> getSplitters();
82
-
83
- @Config("parallel")
84
- @ConfigDefault("false")
85
- Boolean getParallel();
86
-
87
- }
88
-
89
- public interface SplitterTask
90
- extends Task {
91
-
92
- @Config("separator")
93
- @ConfigDefault(",")
94
- String getSeparator();
95
-
96
- @Config("element_type")
97
- @ConfigDefault("string")
98
- String getElementType();
99
- }
100
-
101
- public interface HostTask
102
- extends Task {
103
-
104
- @Config("name")
105
- String getName();
106
-
107
- @Config("port")
108
- int getPort();
109
- }
110
-
111
- public interface ClientPolicyTask
112
- extends Task {
113
-
114
- @Config("user")
115
- @ConfigDefault("null")
116
- Optional<String> getUser();
117
-
118
- @Config("password")
119
- @ConfigDefault("null")
120
- Optional<String> getPassword();
121
-
122
- @Config("timeout")
123
- @ConfigDefault("null")
124
- Optional<Integer> getTimeout();
125
-
126
- @Config("max_threads")
127
- @ConfigDefault("null")
128
- Optional<Integer> getMaxThreads();
129
-
130
- @Config("max_socket_idle")
131
- @ConfigDefault("null")
132
- Optional<Integer> getMaxSocketIdle();
133
-
134
- @Config("tend_interval")
135
- @ConfigDefault("null")
136
- Optional<Integer> getTendInterval();
137
-
138
- @Config("fail_if_not_connected")
139
- @ConfigDefault("null")
140
- Optional<Boolean> getFailIfNotConnected();
141
-
142
- }
143
-
144
- public interface WritePolicyTask
145
- extends Task {
146
-
147
- @Config("generation")
148
- @ConfigDefault("null")
149
- Optional<Integer> getGeneration();
150
-
151
- @Config("expiration")
152
- @ConfigDefault("null")
153
- Optional<Integer> getExpiration();
154
-
155
- @Config("max_retries")
156
- @ConfigDefault("null")
157
- Optional<Integer> getMaxRetries();
158
-
159
- @Config("send_key")
160
- @ConfigDefault("null")
161
- Optional<Boolean> getSendKey();
162
-
163
- @Config("sleep_between_retries")
164
- @ConfigDefault("null")
165
- Optional<Integer> getSleepBetweenRetries();
166
-
167
- }
168
- }
@@ -1,218 +0,0 @@
1
- package org.embulk.output.aerospike;
2
-
3
- import com.aerospike.client.*;
4
- import com.aerospike.client.async.AsyncClient;
5
- import com.aerospike.client.async.AsyncClientPolicy;
6
- import com.aerospike.client.listener.DeleteListener;
7
- import com.aerospike.client.listener.WriteListener;
8
- import com.aerospike.client.policy.WritePolicy;
9
- import com.google.common.base.Optional;
10
- import org.embulk.config.TaskReport;
11
- import org.embulk.config.TaskSource;
12
- import org.embulk.spi.*;
13
- import org.embulk.spi.type.Type;
14
- import org.jruby.ir.Tuple;
15
- import org.slf4j.Logger;
16
-
17
- import java.util.*;
18
- import java.util.concurrent.atomic.AtomicLong;
19
- import java.util.function.Consumer;
20
- import java.util.stream.Collectors;
21
- import java.util.stream.Stream;
22
- import java.util.stream.StreamSupport;
23
-
24
- public class AerospikePageOutput implements TransactionalPageOutput {
25
-
26
- private final Logger log = Exec.getLogger(AerospikePageOutput.class);
27
- private final AerospikeOutputPlugin.PluginTask task;
28
- private final AtomicLong counter = new AtomicLong();
29
- private final AsyncClient aerospike;
30
- private final PageReader reader;
31
-
32
- public AerospikePageOutput(TaskSource taskSource, final Schema schema, int taskIndex) {
33
- reader = new PageReader(schema);
34
- task = taskSource.loadTask(AerospikeOutputPlugin.PluginTask.class);
35
- List<Host> hosts = task.getHost().stream()
36
- .map(host -> new Host(host.getName(), host.getPort()))
37
- .collect(Collectors.toList());
38
-
39
- AsyncClientPolicy policy = new AsyncClientPolicy();
40
- if (task.getClientPolicy().isPresent()) {
41
- AerospikeOutputPlugin.ClientPolicyTask cpTask = task.getClientPolicy().get();
42
- policy.failIfNotConnected = cpTask.getFailIfNotConnected().or(policy.failIfNotConnected);
43
- policy.maxThreads = cpTask.getMaxThreads().or(policy.maxThreads);
44
- policy.maxSocketIdle = cpTask.getMaxSocketIdle().or(policy.maxSocketIdle);
45
- policy.password = cpTask.getPassword().orNull();
46
- policy.user = cpTask.getUser().orNull();
47
- policy.timeout = cpTask.getTimeout().or(policy.timeout);
48
- policy.tendInterval = cpTask.getTendInterval().or(policy.tendInterval);
49
- }
50
-
51
- WritePolicy wp = new WritePolicy();
52
- if (task.getWritePolicy().isPresent()) {
53
- AerospikeOutputPlugin.WritePolicyTask wpTask = task.getWritePolicy().get();
54
- wp.sendKey = wpTask.getSendKey().or(wp.sendKey);
55
- wp.expiration = wpTask.getExpiration().or(wp.expiration);
56
- wp.maxRetries = wpTask.getMaxRetries().or(wp.maxRetries);
57
- wp.generation = wpTask.getGeneration().or(wp.generation);
58
- wp.sleepBetweenRetries = wpTask.getSleepBetweenRetries().or(wp.sleepBetweenRetries);
59
- }
60
-
61
- policy.asyncWritePolicyDefault = wp;
62
- aerospike = new AsyncClient(policy, hosts.toArray(new Host[hosts.size()]));
63
- }
64
-
65
- @Override
66
- public void add(Page page) {
67
-
68
- if (!aerospike.isConnected()) {
69
- System.out.println("not connected");
70
- return;
71
- }
72
-
73
- reader.setPage(page);
74
-
75
- Iterator<Tuple<Key, Map<String, Object>>> it = new Iterator<Tuple<Key, Map<String, Object>>>() {
76
- @Override public boolean hasNext() { return reader.nextRecord(); }
77
- @Override public Tuple<Key, Map<String, Object>> next() {
78
- Schema sc = reader.getSchema();
79
- Map<String, Object> bins = new HashMap<>();
80
- Object keyObj = "";
81
-
82
- for (Column column : sc.getColumns()) {
83
- String name = column.getName();
84
- String keyName = task.getKeyName().get();
85
- Type type = column.getType();
86
- switch (type.getName()) {
87
- case "string": {
88
- if (reader.isNull(column)) break;
89
- final String value = reader.getString(column);
90
- if (name.equals(keyName)) {
91
- keyObj = value;
92
- break;
93
- }
94
- final Optional<Object> v = task.getSplitters().transform(stMap -> {
95
- if (stMap.containsKey(name)) {
96
-
97
- List<String> values = Arrays.asList(value.split(stMap.get(name).getSeparator()));
98
- switch (stMap.get(name).getElementType()) {
99
- case "long":
100
- return values.stream().map(s -> s.isEmpty() ? "0" : s).map(Long::valueOf).collect(Collectors.toList());
101
- case "double":
102
- return values.stream().map(s -> s.isEmpty() ? "0.0" : s).map(Double::valueOf).collect(Collectors.toList());
103
- case "string":
104
- default:
105
- return values;
106
- }
107
- } else {
108
- return value;
109
- }
110
- });
111
- bins.put(name, v.or(value));
112
- break;
113
- }
114
- case "long": {
115
- if (reader.isNull(column)) break;
116
- Long value = reader.getLong(column);
117
- if (name.equals(keyName)) {
118
- keyObj = value;
119
- break;
120
- }
121
- bins.put(name, value);
122
- break;
123
- }
124
- case "double": {
125
- if (reader.isNull(column)) break;
126
- Double value = reader.getDouble(column);
127
- if (name.equals(keyName)) {
128
- keyObj = value;
129
- break;
130
- }
131
- bins.put(name, value);
132
- break;
133
- }
134
- case "boolean": {
135
- if (reader.isNull(column)) break;
136
- Boolean value = reader.getBoolean(column);
137
- if (name.equals(keyName)) {
138
- keyObj = value;
139
- break;
140
- }
141
- bins.put(name, value);
142
- break;
143
- }
144
- case "timestamp": {
145
- if (reader.isNull(column)) break;
146
- Long value = reader.getTimestamp(column).toEpochMilli();
147
- if (name.equals(keyName)) {
148
- keyObj = value;
149
- break;
150
- }
151
- bins.put(name, value);
152
- break;
153
- }
154
- default:
155
- break;
156
- }
157
- }
158
-
159
- if (log.isDebugEnabled()) log.debug(keyObj.toString());
160
- Key key = new Key(task.getNamespace(), task.getSetName(), Value.get(keyObj));
161
- return new Tuple<>(key, bins);
162
- }
163
- };
164
-
165
- Spliterator<Tuple<Key, Map<String, Object>>> spliterator = Spliterators.spliteratorUnknownSize(it, Spliterator.IMMUTABLE);
166
-
167
- Stream<Tuple<Key, Map<String, Object>>> stream = StreamSupport.stream(spliterator, task.getParallel());
168
-
169
- Consumer<Tuple<Key, List<Bin>>> action;
170
- switch (task.getCommand()) {
171
- case "put":
172
- action = rec -> aerospike.put(null, new WriteListener() {
173
- @Override public void onSuccess(Key key) { counter.addAndGet(1L); }
174
- @Override public void onFailure(AerospikeException e) { log.error(e.getMessage(), e); }
175
- }, rec.a, rec.b.toArray(new Bin[rec.b.size()]));
176
- break;
177
- case "delete":
178
- action = rec -> aerospike.delete(null, new DeleteListener() {
179
- @Override public void onSuccess(Key key, boolean existed) { counter.addAndGet(1L); }
180
- @Override public void onFailure(AerospikeException e) { log.error(e.getMessage(), e); }
181
- }, rec.a);
182
- break;
183
- default:
184
- return;
185
- }
186
-
187
- stream.map(t -> {
188
- List<Bin> bins = new ArrayList<>();
189
- if (task.getSingleBinName().isPresent())
190
- bins.add(new Bin(task.getSingleBinName().get(), t.b));
191
- else
192
- t.b.entrySet().forEach(rec -> bins.add(new Bin(rec.getKey(), rec.getValue())));
193
- return new Tuple<>(t.a, bins);
194
- }).forEach(action);
195
- }
196
-
197
- @Override
198
- public void finish() {
199
- log.info("finish %s [%l]", task.getCommand(), counter.longValue());
200
- }
201
-
202
- @Override
203
- public void close() {
204
- reader.close();
205
- aerospike.close();
206
- }
207
-
208
- @Override
209
- public void abort() {
210
- log.warn("abort");
211
- }
212
-
213
- @Override
214
- public TaskReport commit() {
215
- return Exec.newTaskReport();
216
- }
217
-
218
- }