embulk-input-pubsub 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.scalafmt.conf +13 -0
- data/LICENSE +21 -0
- data/README.md +75 -0
- data/build.gradle +87 -0
- data/classpath/animal-sniffer-annotations-1.18.jar +0 -0
- data/classpath/annotations-4.1.1.4.jar +0 -0
- data/classpath/auto-value-annotations-1.6.6.jar +0 -0
- data/classpath/checker-compat-qual-2.5.5.jar +0 -0
- data/classpath/commons-codec-1.11.jar +0 -0
- data/classpath/commons-lang3-3.5.jar +0 -0
- data/classpath/commons-logging-1.2.jar +0 -0
- data/classpath/embulk-input-pubsub-0.0.1-shadow.jar +0 -0
- data/classpath/error_prone_annotations-2.3.2.jar +0 -0
- data/classpath/google-auth-library-credentials-0.18.0.jar +0 -0
- data/classpath/google-auth-library-oauth2-http-0.18.0.jar +0 -0
- data/classpath/google-cloud-core-1.91.3.jar +0 -0
- data/classpath/google-cloud-core-grpc-1.91.3.jar +0 -0
- data/classpath/google-http-client-1.32.1.jar +0 -0
- data/classpath/google-http-client-jackson2-1.32.1.jar +0 -0
- data/classpath/grpc-alts-1.23.0.jar +0 -0
- data/classpath/grpc-auth-1.23.0.jar +0 -0
- data/classpath/grpc-context-1.24.1.jar +0 -0
- data/classpath/grpc-google-cloud-pubsub-v1-1.82.0.jar +0 -0
- data/classpath/grpc-grpclb-1.23.0.jar +0 -0
- data/classpath/grpc-protobuf-1.24.1.jar +0 -0
- data/classpath/grpc-protobuf-lite-1.24.1.jar +0 -0
- data/classpath/gson-2.8.5.jar +0 -0
- data/classpath/httpclient-4.5.10.jar +0 -0
- data/classpath/httpcore-4.4.12.jar +0 -0
- data/classpath/j2objc-annotations-1.3.jar +0 -0
- data/classpath/jackson-core-2.9.9.jar +0 -0
- data/classpath/javax.annotation-api-1.3.2.jar +0 -0
- data/classpath/jsr305-3.0.2.jar +0 -0
- data/classpath/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar +0 -0
- data/classpath/opencensus-api-0.24.0.jar +0 -0
- data/classpath/opencensus-contrib-grpc-metrics-0.21.0.jar +0 -0
- data/classpath/opencensus-contrib-http-util-0.24.0.jar +0 -0
- data/classpath/perfmark-api-0.17.0.jar +0 -0
- data/classpath/proto-google-cloud-pubsub-v1-1.82.0.jar +0 -0
- data/classpath/proto-google-common-protos-1.17.0.jar +0 -0
- data/classpath/proto-google-iam-v1-0.13.0.jar +0 -0
- data/classpath/protobuf-java-3.10.0.jar +0 -0
- data/classpath/protobuf-java-util-3.10.0.jar +0 -0
- data/classpath/scala-library-2.13.1.jar +0 -0
- data/classpath/threetenbp-1.3.3.jar +0 -0
- data/examples/pubsub2stdout.yaml +10 -0
- data/gradle.properties +1 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/input/pubsub.rb +3 -0
- data/src/main/java/com/embulk/input/pubsub/checkpoint/Checkpoint.java +734 -0
- data/src/main/java/com/embulk/input/pubsub/checkpoint/CheckpointOrBuilder.java +33 -0
- data/src/main/java/com/embulk/input/pubsub/checkpoint/CheckpointProtos.java +61 -0
- data/src/main/resources/checkpoint.proto +11 -0
- data/src/main/scala/org/embulk/input/pubsub/PluginTask.scala +42 -0
- data/src/main/scala/org/embulk/input/pubsub/PubsubBatchSubscriber.scala +103 -0
- data/src/main/scala/org/embulk/input/pubsub/PubsubInputPlugin.scala +142 -0
- data/src/main/scala/org/embulk/input/pubsub/checkpoint/StoredCheckpoint.scala +123 -0
- metadata +105 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
// source: src/main/resources/checkpoint.proto
|
3
|
+
|
4
|
+
package com.embulk.input.pubsub.checkpoint;
|
5
|
+
|
6
|
+
public interface CheckpointOrBuilder extends
|
7
|
+
// @@protoc_insertion_point(interface_extends:Checkpoint)
|
8
|
+
com.google.protobuf.MessageOrBuilder {
|
9
|
+
|
10
|
+
/**
|
11
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
12
|
+
*/
|
13
|
+
java.util.List<com.google.pubsub.v1.PubsubMessage>
|
14
|
+
getMessagesList();
|
15
|
+
/**
|
16
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
17
|
+
*/
|
18
|
+
com.google.pubsub.v1.PubsubMessage getMessages(int index);
|
19
|
+
/**
|
20
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
21
|
+
*/
|
22
|
+
int getMessagesCount();
|
23
|
+
/**
|
24
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
25
|
+
*/
|
26
|
+
java.util.List<? extends com.google.pubsub.v1.PubsubMessageOrBuilder>
|
27
|
+
getMessagesOrBuilderList();
|
28
|
+
/**
|
29
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
30
|
+
*/
|
31
|
+
com.google.pubsub.v1.PubsubMessageOrBuilder getMessagesOrBuilder(
|
32
|
+
int index);
|
33
|
+
}
|
@@ -0,0 +1,61 @@
|
|
1
|
+
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
// source: src/main/resources/checkpoint.proto
|
3
|
+
|
4
|
+
package com.embulk.input.pubsub.checkpoint;
|
5
|
+
|
6
|
+
public final class CheckpointProtos {
|
7
|
+
private CheckpointProtos() {}
|
8
|
+
public static void registerAllExtensions(
|
9
|
+
com.google.protobuf.ExtensionRegistryLite registry) {
|
10
|
+
}
|
11
|
+
|
12
|
+
public static void registerAllExtensions(
|
13
|
+
com.google.protobuf.ExtensionRegistry registry) {
|
14
|
+
registerAllExtensions(
|
15
|
+
(com.google.protobuf.ExtensionRegistryLite) registry);
|
16
|
+
}
|
17
|
+
static final com.google.protobuf.Descriptors.Descriptor
|
18
|
+
internal_static_Checkpoint_descriptor;
|
19
|
+
static final
|
20
|
+
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
|
21
|
+
internal_static_Checkpoint_fieldAccessorTable;
|
22
|
+
|
23
|
+
public static com.google.protobuf.Descriptors.FileDescriptor
|
24
|
+
getDescriptor() {
|
25
|
+
return descriptor;
|
26
|
+
}
|
27
|
+
private static com.google.protobuf.Descriptors.FileDescriptor
|
28
|
+
descriptor;
|
29
|
+
static {
|
30
|
+
java.lang.String[] descriptorData = {
|
31
|
+
"\n#src/main/resources/checkpoint.proto\032(g" +
|
32
|
+
"oogleapis/google/pubsub/v1/pubsub.proto\"" +
|
33
|
+
"?\n\nCheckpoint\0221\n\010messages\030\001 \003(\0132\037.google" +
|
34
|
+
".pubsub.v1.PubsubMessageB8\n\"com.embulk.i" +
|
35
|
+
"nput.pubsub.checkpointB\020CheckpointProtos" +
|
36
|
+
"P\001b\006proto3"
|
37
|
+
};
|
38
|
+
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
|
39
|
+
new com.google.protobuf.Descriptors.FileDescriptor. InternalDescriptorAssigner() {
|
40
|
+
public com.google.protobuf.ExtensionRegistry assignDescriptors(
|
41
|
+
com.google.protobuf.Descriptors.FileDescriptor root) {
|
42
|
+
descriptor = root;
|
43
|
+
return null;
|
44
|
+
}
|
45
|
+
};
|
46
|
+
com.google.protobuf.Descriptors.FileDescriptor
|
47
|
+
.internalBuildGeneratedFileFrom(descriptorData,
|
48
|
+
new com.google.protobuf.Descriptors.FileDescriptor[] {
|
49
|
+
com.google.pubsub.v1.PubsubProto.getDescriptor(),
|
50
|
+
}, assigner);
|
51
|
+
internal_static_Checkpoint_descriptor =
|
52
|
+
getDescriptor().getMessageTypes().get(0);
|
53
|
+
internal_static_Checkpoint_fieldAccessorTable = new
|
54
|
+
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
|
55
|
+
internal_static_Checkpoint_descriptor,
|
56
|
+
new java.lang.String[] { "Messages", });
|
57
|
+
com.google.pubsub.v1.PubsubProto.getDescriptor();
|
58
|
+
}
|
59
|
+
|
60
|
+
// @@protoc_insertion_point(outer_class_scope)
|
61
|
+
}
|
@@ -0,0 +1,11 @@
|
|
1
|
+
syntax = "proto3";
|
2
|
+
|
3
|
+
option java_multiple_files = true;
|
4
|
+
option java_outer_classname = "CheckpointProtos";
|
5
|
+
option java_package = "com.embulk.input.pubsub.checkpoint";
|
6
|
+
|
7
|
+
import "googleapis/google/pubsub/v1/pubsub.proto";
|
8
|
+
|
9
|
+
message Checkpoint {
|
10
|
+
repeated google.pubsub.v1.PubsubMessage messages = 1;
|
11
|
+
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
package org.embulk.input.pubsub
|
2
|
+
|
3
|
+
import java.util.Optional
|
4
|
+
|
5
|
+
import org.embulk.config.{Config, ConfigDefault, ConfigInject, Task}
|
6
|
+
import org.embulk.spi.BufferAllocator
|
7
|
+
|
8
|
+
trait PluginTask extends Task {
|
9
|
+
|
10
|
+
@Config("project_id")
|
11
|
+
def getProjectId: String
|
12
|
+
|
13
|
+
@Config("subscription_id")
|
14
|
+
def getSubscriptionId: String
|
15
|
+
|
16
|
+
@Config("json_keyfile")
|
17
|
+
def getJsonKeyfile: String
|
18
|
+
|
19
|
+
@Config("num_tasks")
|
20
|
+
@ConfigDefault("1")
|
21
|
+
def getNumTasks: Int
|
22
|
+
|
23
|
+
@Config("max_messages")
|
24
|
+
@ConfigDefault("10")
|
25
|
+
def getMaxMessages: Int
|
26
|
+
|
27
|
+
@Config("checkpoint_basedir")
|
28
|
+
@ConfigDefault("null")
|
29
|
+
def getCheckpointBasedir: Optional[String]
|
30
|
+
|
31
|
+
@Config("checkpoint")
|
32
|
+
@ConfigDefault("null")
|
33
|
+
def getCheckpoint: Optional[String]
|
34
|
+
def setCheckpoint(checkpoint: Optional[String]): Unit
|
35
|
+
|
36
|
+
@Config("payload_encoding")
|
37
|
+
@ConfigDefault("\"string\"")
|
38
|
+
def getPayloadEncoding: String
|
39
|
+
|
40
|
+
@ConfigInject
|
41
|
+
def getBufferAllocator: BufferAllocator
|
42
|
+
}
|
@@ -0,0 +1,103 @@
|
|
1
|
+
package org.embulk.input.pubsub
|
2
|
+
|
3
|
+
import java.io.FileInputStream
|
4
|
+
|
5
|
+
import com.google.api.gax.core.FixedCredentialsProvider
|
6
|
+
import com.google.auth.oauth2.GoogleCredentials
|
7
|
+
import com.google.cloud.pubsub.v1.stub.{
|
8
|
+
GrpcSubscriberStub,
|
9
|
+
SubscriberStubSettings
|
10
|
+
}
|
11
|
+
import com.google.protobuf.Empty
|
12
|
+
import com.google.pubsub.v1.{
|
13
|
+
AcknowledgeRequest,
|
14
|
+
ProjectSubscriptionName,
|
15
|
+
PullRequest,
|
16
|
+
PullResponse
|
17
|
+
}
|
18
|
+
import org.embulk.input.pubsub.checkpoint.StoredCheckpoint
|
19
|
+
|
20
|
+
import scala.jdk.CollectionConverters._
|
21
|
+
import scala.util.{Success, Try}
|
22
|
+
|
23
|
+
/**
|
24
|
+
* A subscriber for Cloud Pub/Sub calls batch based pulls with checkpoint.
|
25
|
+
*
|
26
|
+
* @param projectId
|
27
|
+
* @param subscriptionName
|
28
|
+
* @param pathToCredJson
|
29
|
+
*/
|
30
|
+
case class PubsubBatchSubscriber private (
|
31
|
+
projectId: String,
|
32
|
+
subscriptionName: String,
|
33
|
+
pathToCredJson: String
|
34
|
+
) {
|
35
|
+
private val credentials =
|
36
|
+
GoogleCredentials.fromStream(new FileInputStream(pathToCredJson))
|
37
|
+
private val settings = SubscriberStubSettings
|
38
|
+
.newBuilder()
|
39
|
+
.setCredentialsProvider(FixedCredentialsProvider.create(credentials))
|
40
|
+
.setTransportChannelProvider(
|
41
|
+
SubscriberStubSettings.defaultGrpcTransportProviderBuilder().build()
|
42
|
+
)
|
43
|
+
.build()
|
44
|
+
|
45
|
+
def pull(count: Int, checkpointDir: Option[String]): Try[StoredCheckpoint] = {
|
46
|
+
val subscription =
|
47
|
+
ProjectSubscriptionName.of(projectId, subscriptionName).toString
|
48
|
+
val subscriber = GrpcSubscriberStub.create(settings)
|
49
|
+
|
50
|
+
for {
|
51
|
+
res <- pullImpl(subscriber, subscription, count)
|
52
|
+
messages = res.getReceivedMessagesList.asScala
|
53
|
+
checkpoint <- StoredCheckpoint.create(
|
54
|
+
messages.map(_.getMessage).toSeq,
|
55
|
+
checkpointDir
|
56
|
+
)
|
57
|
+
_ <- ackImpl(subscriber, subscription, messages.map(_.getAckId))
|
58
|
+
} yield checkpoint
|
59
|
+
}
|
60
|
+
|
61
|
+
private def pullImpl(
|
62
|
+
subscriber: GrpcSubscriberStub,
|
63
|
+
subscription: String,
|
64
|
+
count: Int
|
65
|
+
): Try[PullResponse] = {
|
66
|
+
val req = PullRequest
|
67
|
+
.newBuilder()
|
68
|
+
.setSubscription(subscription)
|
69
|
+
.setReturnImmediately(true)
|
70
|
+
.setMaxMessages(count)
|
71
|
+
.build()
|
72
|
+
|
73
|
+
Try(subscriber.pullCallable().call(req))
|
74
|
+
}
|
75
|
+
|
76
|
+
private def ackImpl(
|
77
|
+
subscriber: GrpcSubscriberStub,
|
78
|
+
subscription: String,
|
79
|
+
ackIds: Iterable[String]
|
80
|
+
): Try[Empty] = {
|
81
|
+
if (ackIds.nonEmpty) {
|
82
|
+
val ack = AcknowledgeRequest
|
83
|
+
.newBuilder()
|
84
|
+
.setSubscription(subscription)
|
85
|
+
.addAllAckIds(ackIds.asJava)
|
86
|
+
.build()
|
87
|
+
|
88
|
+
Try(subscriber.acknowledgeCallable().call(ack))
|
89
|
+
} else {
|
90
|
+
Success(Empty.getDefaultInstance)
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
}
|
95
|
+
|
96
|
+
object PubsubBatchSubscriber {
|
97
|
+
def of(task: PluginTask): PubsubBatchSubscriber =
|
98
|
+
PubsubBatchSubscriber(
|
99
|
+
task.getProjectId,
|
100
|
+
task.getSubscriptionId,
|
101
|
+
task.getJsonKeyfile
|
102
|
+
)
|
103
|
+
}
|
@@ -0,0 +1,142 @@
|
|
1
|
+
package org.embulk.input.pubsub
|
2
|
+
|
3
|
+
import java.nio.charset.StandardCharsets
|
4
|
+
import java.util.{Base64, Optional, List => JList}
|
5
|
+
|
6
|
+
import com.fasterxml.jackson.databind.ObjectMapper
|
7
|
+
import org.embulk.config.{
|
8
|
+
ConfigDiff,
|
9
|
+
ConfigException,
|
10
|
+
ConfigSource,
|
11
|
+
TaskReport,
|
12
|
+
TaskSource
|
13
|
+
}
|
14
|
+
import org.embulk.input.pubsub.checkpoint.StoredCheckpoint
|
15
|
+
import org.embulk.spi.`type`.Types
|
16
|
+
import org.embulk.spi.{
|
17
|
+
DataException,
|
18
|
+
Exec,
|
19
|
+
InputPlugin,
|
20
|
+
PageBuilder,
|
21
|
+
PageOutput,
|
22
|
+
Schema
|
23
|
+
}
|
24
|
+
import org.embulk.spi.json.JsonParser
|
25
|
+
import org.slf4j.LoggerFactory
|
26
|
+
|
27
|
+
import scala.jdk.OptionConverters._
|
28
|
+
import scala.jdk.CollectionConverters._
|
29
|
+
import scala.util.{Failure, Success}
|
30
|
+
|
31
|
+
case class PubsubInputPlugin() extends InputPlugin {
|
32
|
+
private val logger = LoggerFactory.getLogger(this.getClass)
|
33
|
+
|
34
|
+
private val jsonParser = new JsonParser()
|
35
|
+
private val objectMapper = new ObjectMapper()
|
36
|
+
|
37
|
+
private val schema = Schema
|
38
|
+
.builder()
|
39
|
+
.add("payload", Types.STRING) // string or base64 encoded bytes
|
40
|
+
.add("attribute", Types.JSON)
|
41
|
+
.build()
|
42
|
+
|
43
|
+
override def transaction(
|
44
|
+
config: ConfigSource,
|
45
|
+
control: InputPlugin.Control
|
46
|
+
): ConfigDiff = {
|
47
|
+
val task = config.loadConfig(classOf[PluginTask])
|
48
|
+
|
49
|
+
if (!task.getCheckpoint.isPresent) {
|
50
|
+
val sub = PubsubBatchSubscriber.of(task)
|
51
|
+
val checkpoint =
|
52
|
+
sub.pull(task.getMaxMessages, task.getCheckpointBasedir.toScala).get
|
53
|
+
task.setCheckpoint(Optional.of(checkpoint.id))
|
54
|
+
|
55
|
+
logger.info(s"Created a new checkpoint! : ${checkpoint.id}")
|
56
|
+
}
|
57
|
+
|
58
|
+
resume(task.dump(), schema, task.getNumTasks, control)
|
59
|
+
}
|
60
|
+
|
61
|
+
override def resume(
|
62
|
+
taskSource: TaskSource,
|
63
|
+
schema: Schema,
|
64
|
+
taskCount: Int,
|
65
|
+
control: InputPlugin.Control
|
66
|
+
): ConfigDiff = {
|
67
|
+
control.run(taskSource, schema, taskCount)
|
68
|
+
Exec.newConfigDiff()
|
69
|
+
}
|
70
|
+
|
71
|
+
override def cleanup(
|
72
|
+
taskSource: TaskSource,
|
73
|
+
schema: Schema,
|
74
|
+
taskCount: Int,
|
75
|
+
successTaskReports: JList[TaskReport]
|
76
|
+
): Unit = {
|
77
|
+
val task = taskSource.loadTask(classOf[PluginTask])
|
78
|
+
|
79
|
+
val checkpointId = task.getCheckpoint.get()
|
80
|
+
val checkpoint =
|
81
|
+
StoredCheckpoint.from(checkpointId, task.getCheckpointBasedir.isPresent)
|
82
|
+
checkpoint match {
|
83
|
+
case Success(sc) =>
|
84
|
+
sc.cleanup match {
|
85
|
+
case Success(_) =>
|
86
|
+
case Failure(e) => logger.error(s"failed to cleanup: ${e.toString}")
|
87
|
+
}
|
88
|
+
case Failure(e) =>
|
89
|
+
logger.error(s"failed to fetch checkpoint: ${e.toString}")
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
override def run(
|
94
|
+
taskSource: TaskSource,
|
95
|
+
schema: Schema,
|
96
|
+
taskIndex: Int,
|
97
|
+
output: PageOutput
|
98
|
+
): TaskReport = {
|
99
|
+
val task = taskSource.loadTask(classOf[PluginTask])
|
100
|
+
val allocator = task.getBufferAllocator
|
101
|
+
val pageBuilder = new PageBuilder(allocator, schema, output)
|
102
|
+
|
103
|
+
val encoder = task.getPayloadEncoding match {
|
104
|
+
case "string" =>
|
105
|
+
(data: Array[Byte]) => new String(data, StandardCharsets.UTF_8)
|
106
|
+
case "binary" =>
|
107
|
+
(data: Array[Byte]) => Base64.getEncoder.encodeToString(data)
|
108
|
+
case e => throw new ConfigException(s"unsupported encoding: ${e}")
|
109
|
+
}
|
110
|
+
|
111
|
+
val checkpointId = task.getCheckpoint.get()
|
112
|
+
val checkpoint =
|
113
|
+
StoredCheckpoint.from(checkpointId, task.getCheckpointBasedir.isPresent)
|
114
|
+
val messages = checkpoint match {
|
115
|
+
case Success(cp) => cp.content.getMessagesList.asScala
|
116
|
+
case _ =>
|
117
|
+
throw new DataException(s"unexpected checkpoint state: ${checkpoint}")
|
118
|
+
}
|
119
|
+
|
120
|
+
messages.foreach { msg =>
|
121
|
+
pageBuilder.setString(
|
122
|
+
pageBuilder.getSchema.getColumn(0),
|
123
|
+
encoder(msg.getData.toByteArray)
|
124
|
+
)
|
125
|
+
|
126
|
+
val json = objectMapper.writeValueAsString(msg.getAttributesMap)
|
127
|
+
pageBuilder.setJson(
|
128
|
+
pageBuilder.getSchema.getColumn(1),
|
129
|
+
jsonParser.parse(json)
|
130
|
+
)
|
131
|
+
|
132
|
+
pageBuilder.addRecord()
|
133
|
+
}
|
134
|
+
pageBuilder.finish()
|
135
|
+
|
136
|
+
Exec.newTaskReport()
|
137
|
+
}
|
138
|
+
|
139
|
+
override def guess(config: ConfigSource): ConfigDiff =
|
140
|
+
Exec.newConfigDiff()
|
141
|
+
|
142
|
+
}
|
@@ -0,0 +1,123 @@
|
|
1
|
+
package org.embulk.input.pubsub.checkpoint
|
2
|
+
|
3
|
+
import java.io.{File, FileInputStream, FileOutputStream}
|
4
|
+
|
5
|
+
import com.embulk.input.pubsub.checkpoint.Checkpoint
|
6
|
+
import com.google.pubsub.v1.PubsubMessage
|
7
|
+
import org.embulk.config.ConfigException
|
8
|
+
|
9
|
+
import scala.collection.mutable
|
10
|
+
import scala.util.{Failure, Success, Try}
|
11
|
+
import scala.jdk.CollectionConverters._
|
12
|
+
|
13
|
+
/**
|
14
|
+
* A checkpoint stored in a (maybe)persistent storage.
|
15
|
+
*/
|
16
|
+
sealed trait StoredCheckpoint {
|
17
|
+
def id: String
|
18
|
+
def content: Checkpoint
|
19
|
+
def cleanup: Try[Unit]
|
20
|
+
}
|
21
|
+
|
22
|
+
object StoredCheckpoint {
|
23
|
+
def create(
|
24
|
+
messages: Seq[PubsubMessage],
|
25
|
+
dir: Option[String]
|
26
|
+
): Try[StoredCheckpoint] = {
|
27
|
+
dir match {
|
28
|
+
case Some(d) =>
|
29
|
+
LocalFileStoredCheckpoint.withPersistency(d, messages)
|
30
|
+
case _ =>
|
31
|
+
val content = Checkpoint
|
32
|
+
.newBuilder()
|
33
|
+
.addAllMessages(messages.asJava)
|
34
|
+
.build()
|
35
|
+
Success(MemoryStoredStoredCheckpoint.withoutPersistency(content))
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
def from(id: String, persistent: Boolean): Try[StoredCheckpoint] = {
|
40
|
+
if (persistent) {
|
41
|
+
LocalFileStoredCheckpoint.from(id)
|
42
|
+
} else {
|
43
|
+
MemoryStoredStoredCheckpoint.from(id)
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
/**
|
49
|
+
* A checkpoint stored in only memory which doesn't have persistence.
|
50
|
+
*
|
51
|
+
* @param id
|
52
|
+
* @param content
|
53
|
+
*/
|
54
|
+
case class MemoryStoredStoredCheckpoint private (
|
55
|
+
id: String,
|
56
|
+
content: Checkpoint
|
57
|
+
) extends StoredCheckpoint {
|
58
|
+
import MemoryStoredStoredCheckpoint._
|
59
|
+
|
60
|
+
override def cleanup: Try[Unit] = {
|
61
|
+
storage.remove(id) match {
|
62
|
+
case Some(_) => Success(())
|
63
|
+
case _ =>
|
64
|
+
Failure(new ConfigException(s"A checkpoint ${id} is not deletable"))
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
object MemoryStoredStoredCheckpoint {
|
70
|
+
private val storage = mutable.Map[String, MemoryStoredStoredCheckpoint]()
|
71
|
+
|
72
|
+
def from(key: String): Try[MemoryStoredStoredCheckpoint] = Try(storage(key))
|
73
|
+
|
74
|
+
def withoutPersistency(content: Checkpoint): StoredCheckpoint = {
|
75
|
+
val id = content.hashCode().toString
|
76
|
+
val checkpoint = MemoryStoredStoredCheckpoint(id, content)
|
77
|
+
storage.put(id, checkpoint)
|
78
|
+
checkpoint
|
79
|
+
}
|
80
|
+
|
81
|
+
}
|
82
|
+
|
83
|
+
/**
|
84
|
+
* A checkpoint stored in local filesystem.
|
85
|
+
*
|
86
|
+
* @param id
|
87
|
+
* @param content
|
88
|
+
*/
|
89
|
+
case class LocalFileStoredCheckpoint private (id: String, content: Checkpoint)
|
90
|
+
extends StoredCheckpoint {
|
91
|
+
override def cleanup: Try[Unit] = {
|
92
|
+
for {
|
93
|
+
f <- Try(new File(id))
|
94
|
+
_ <- Try(f.delete)
|
95
|
+
} yield ()
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
object LocalFileStoredCheckpoint {
|
100
|
+
def from(path: String): Try[LocalFileStoredCheckpoint] = {
|
101
|
+
for {
|
102
|
+
in <- Try(new FileInputStream(path))
|
103
|
+
c <- Try(Checkpoint.parseFrom(in))
|
104
|
+
} yield LocalFileStoredCheckpoint(path, c)
|
105
|
+
}
|
106
|
+
|
107
|
+
def withPersistency(
|
108
|
+
prefix: String,
|
109
|
+
messages: Seq[PubsubMessage]
|
110
|
+
): Try[LocalFileStoredCheckpoint] = {
|
111
|
+
val path = s"${prefix}checkpoint-${messages.hashCode().toString}"
|
112
|
+
val content = Checkpoint
|
113
|
+
.newBuilder()
|
114
|
+
.addAllMessages(messages.asJava)
|
115
|
+
.build()
|
116
|
+
|
117
|
+
for {
|
118
|
+
out <- Try(new FileOutputStream(path))
|
119
|
+
_ <- Try(content.writeTo(out))
|
120
|
+
_ <- Try(out.close())
|
121
|
+
} yield LocalFileStoredCheckpoint(path, content)
|
122
|
+
}
|
123
|
+
}
|