embulk-input-pubsub 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.scalafmt.conf +13 -0
- data/LICENSE +21 -0
- data/README.md +75 -0
- data/build.gradle +87 -0
- data/classpath/animal-sniffer-annotations-1.18.jar +0 -0
- data/classpath/annotations-4.1.1.4.jar +0 -0
- data/classpath/auto-value-annotations-1.6.6.jar +0 -0
- data/classpath/checker-compat-qual-2.5.5.jar +0 -0
- data/classpath/commons-codec-1.11.jar +0 -0
- data/classpath/commons-lang3-3.5.jar +0 -0
- data/classpath/commons-logging-1.2.jar +0 -0
- data/classpath/embulk-input-pubsub-0.0.1-shadow.jar +0 -0
- data/classpath/error_prone_annotations-2.3.2.jar +0 -0
- data/classpath/google-auth-library-credentials-0.18.0.jar +0 -0
- data/classpath/google-auth-library-oauth2-http-0.18.0.jar +0 -0
- data/classpath/google-cloud-core-1.91.3.jar +0 -0
- data/classpath/google-cloud-core-grpc-1.91.3.jar +0 -0
- data/classpath/google-http-client-1.32.1.jar +0 -0
- data/classpath/google-http-client-jackson2-1.32.1.jar +0 -0
- data/classpath/grpc-alts-1.23.0.jar +0 -0
- data/classpath/grpc-auth-1.23.0.jar +0 -0
- data/classpath/grpc-context-1.24.1.jar +0 -0
- data/classpath/grpc-google-cloud-pubsub-v1-1.82.0.jar +0 -0
- data/classpath/grpc-grpclb-1.23.0.jar +0 -0
- data/classpath/grpc-protobuf-1.24.1.jar +0 -0
- data/classpath/grpc-protobuf-lite-1.24.1.jar +0 -0
- data/classpath/gson-2.8.5.jar +0 -0
- data/classpath/httpclient-4.5.10.jar +0 -0
- data/classpath/httpcore-4.4.12.jar +0 -0
- data/classpath/j2objc-annotations-1.3.jar +0 -0
- data/classpath/jackson-core-2.9.9.jar +0 -0
- data/classpath/javax.annotation-api-1.3.2.jar +0 -0
- data/classpath/jsr305-3.0.2.jar +0 -0
- data/classpath/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar +0 -0
- data/classpath/opencensus-api-0.24.0.jar +0 -0
- data/classpath/opencensus-contrib-grpc-metrics-0.21.0.jar +0 -0
- data/classpath/opencensus-contrib-http-util-0.24.0.jar +0 -0
- data/classpath/perfmark-api-0.17.0.jar +0 -0
- data/classpath/proto-google-cloud-pubsub-v1-1.82.0.jar +0 -0
- data/classpath/proto-google-common-protos-1.17.0.jar +0 -0
- data/classpath/proto-google-iam-v1-0.13.0.jar +0 -0
- data/classpath/protobuf-java-3.10.0.jar +0 -0
- data/classpath/protobuf-java-util-3.10.0.jar +0 -0
- data/classpath/scala-library-2.13.1.jar +0 -0
- data/classpath/threetenbp-1.3.3.jar +0 -0
- data/examples/pubsub2stdout.yaml +10 -0
- data/gradle.properties +1 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/input/pubsub.rb +3 -0
- data/src/main/java/com/embulk/input/pubsub/checkpoint/Checkpoint.java +734 -0
- data/src/main/java/com/embulk/input/pubsub/checkpoint/CheckpointOrBuilder.java +33 -0
- data/src/main/java/com/embulk/input/pubsub/checkpoint/CheckpointProtos.java +61 -0
- data/src/main/resources/checkpoint.proto +11 -0
- data/src/main/scala/org/embulk/input/pubsub/PluginTask.scala +42 -0
- data/src/main/scala/org/embulk/input/pubsub/PubsubBatchSubscriber.scala +103 -0
- data/src/main/scala/org/embulk/input/pubsub/PubsubInputPlugin.scala +142 -0
- data/src/main/scala/org/embulk/input/pubsub/checkpoint/StoredCheckpoint.scala +123 -0
- metadata +105 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
// source: src/main/resources/checkpoint.proto
|
3
|
+
|
4
|
+
package com.embulk.input.pubsub.checkpoint;
|
5
|
+
|
6
|
+
public interface CheckpointOrBuilder extends
|
7
|
+
// @@protoc_insertion_point(interface_extends:Checkpoint)
|
8
|
+
com.google.protobuf.MessageOrBuilder {
|
9
|
+
|
10
|
+
/**
|
11
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
12
|
+
*/
|
13
|
+
java.util.List<com.google.pubsub.v1.PubsubMessage>
|
14
|
+
getMessagesList();
|
15
|
+
/**
|
16
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
17
|
+
*/
|
18
|
+
com.google.pubsub.v1.PubsubMessage getMessages(int index);
|
19
|
+
/**
|
20
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
21
|
+
*/
|
22
|
+
int getMessagesCount();
|
23
|
+
/**
|
24
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
25
|
+
*/
|
26
|
+
java.util.List<? extends com.google.pubsub.v1.PubsubMessageOrBuilder>
|
27
|
+
getMessagesOrBuilderList();
|
28
|
+
/**
|
29
|
+
* <code>repeated .google.pubsub.v1.PubsubMessage messages = 1;</code>
|
30
|
+
*/
|
31
|
+
com.google.pubsub.v1.PubsubMessageOrBuilder getMessagesOrBuilder(
|
32
|
+
int index);
|
33
|
+
}
|
@@ -0,0 +1,61 @@
|
|
1
|
+
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
// source: src/main/resources/checkpoint.proto
|
3
|
+
|
4
|
+
package com.embulk.input.pubsub.checkpoint;
|
5
|
+
|
6
|
+
public final class CheckpointProtos {
|
7
|
+
private CheckpointProtos() {}
|
8
|
+
public static void registerAllExtensions(
|
9
|
+
com.google.protobuf.ExtensionRegistryLite registry) {
|
10
|
+
}
|
11
|
+
|
12
|
+
public static void registerAllExtensions(
|
13
|
+
com.google.protobuf.ExtensionRegistry registry) {
|
14
|
+
registerAllExtensions(
|
15
|
+
(com.google.protobuf.ExtensionRegistryLite) registry);
|
16
|
+
}
|
17
|
+
static final com.google.protobuf.Descriptors.Descriptor
|
18
|
+
internal_static_Checkpoint_descriptor;
|
19
|
+
static final
|
20
|
+
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
|
21
|
+
internal_static_Checkpoint_fieldAccessorTable;
|
22
|
+
|
23
|
+
public static com.google.protobuf.Descriptors.FileDescriptor
|
24
|
+
getDescriptor() {
|
25
|
+
return descriptor;
|
26
|
+
}
|
27
|
+
private static com.google.protobuf.Descriptors.FileDescriptor
|
28
|
+
descriptor;
|
29
|
+
static {
|
30
|
+
java.lang.String[] descriptorData = {
|
31
|
+
"\n#src/main/resources/checkpoint.proto\032(g" +
|
32
|
+
"oogleapis/google/pubsub/v1/pubsub.proto\"" +
|
33
|
+
"?\n\nCheckpoint\0221\n\010messages\030\001 \003(\0132\037.google" +
|
34
|
+
".pubsub.v1.PubsubMessageB8\n\"com.embulk.i" +
|
35
|
+
"nput.pubsub.checkpointB\020CheckpointProtos" +
|
36
|
+
"P\001b\006proto3"
|
37
|
+
};
|
38
|
+
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
|
39
|
+
new com.google.protobuf.Descriptors.FileDescriptor. InternalDescriptorAssigner() {
|
40
|
+
public com.google.protobuf.ExtensionRegistry assignDescriptors(
|
41
|
+
com.google.protobuf.Descriptors.FileDescriptor root) {
|
42
|
+
descriptor = root;
|
43
|
+
return null;
|
44
|
+
}
|
45
|
+
};
|
46
|
+
com.google.protobuf.Descriptors.FileDescriptor
|
47
|
+
.internalBuildGeneratedFileFrom(descriptorData,
|
48
|
+
new com.google.protobuf.Descriptors.FileDescriptor[] {
|
49
|
+
com.google.pubsub.v1.PubsubProto.getDescriptor(),
|
50
|
+
}, assigner);
|
51
|
+
internal_static_Checkpoint_descriptor =
|
52
|
+
getDescriptor().getMessageTypes().get(0);
|
53
|
+
internal_static_Checkpoint_fieldAccessorTable = new
|
54
|
+
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
|
55
|
+
internal_static_Checkpoint_descriptor,
|
56
|
+
new java.lang.String[] { "Messages", });
|
57
|
+
com.google.pubsub.v1.PubsubProto.getDescriptor();
|
58
|
+
}
|
59
|
+
|
60
|
+
// @@protoc_insertion_point(outer_class_scope)
|
61
|
+
}
|
@@ -0,0 +1,11 @@
|
|
1
|
+
syntax = "proto3";
|
2
|
+
|
3
|
+
option java_multiple_files = true;
|
4
|
+
option java_outer_classname = "CheckpointProtos";
|
5
|
+
option java_package = "com.embulk.input.pubsub.checkpoint";
|
6
|
+
|
7
|
+
import "googleapis/google/pubsub/v1/pubsub.proto";
|
8
|
+
|
9
|
+
message Checkpoint {
|
10
|
+
repeated google.pubsub.v1.PubsubMessage messages = 1;
|
11
|
+
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
package org.embulk.input.pubsub
|
2
|
+
|
3
|
+
import java.util.Optional
|
4
|
+
|
5
|
+
import org.embulk.config.{Config, ConfigDefault, ConfigInject, Task}
|
6
|
+
import org.embulk.spi.BufferAllocator
|
7
|
+
|
8
|
+
trait PluginTask extends Task {
|
9
|
+
|
10
|
+
@Config("project_id")
|
11
|
+
def getProjectId: String
|
12
|
+
|
13
|
+
@Config("subscription_id")
|
14
|
+
def getSubscriptionId: String
|
15
|
+
|
16
|
+
@Config("json_keyfile")
|
17
|
+
def getJsonKeyfile: String
|
18
|
+
|
19
|
+
@Config("num_tasks")
|
20
|
+
@ConfigDefault("1")
|
21
|
+
def getNumTasks: Int
|
22
|
+
|
23
|
+
@Config("max_messages")
|
24
|
+
@ConfigDefault("10")
|
25
|
+
def getMaxMessages: Int
|
26
|
+
|
27
|
+
@Config("checkpoint_basedir")
|
28
|
+
@ConfigDefault("null")
|
29
|
+
def getCheckpointBasedir: Optional[String]
|
30
|
+
|
31
|
+
@Config("checkpoint")
|
32
|
+
@ConfigDefault("null")
|
33
|
+
def getCheckpoint: Optional[String]
|
34
|
+
def setCheckpoint(checkpoint: Optional[String]): Unit
|
35
|
+
|
36
|
+
@Config("payload_encoding")
|
37
|
+
@ConfigDefault("\"string\"")
|
38
|
+
def getPayloadEncoding: String
|
39
|
+
|
40
|
+
@ConfigInject
|
41
|
+
def getBufferAllocator: BufferAllocator
|
42
|
+
}
|
@@ -0,0 +1,103 @@
|
|
1
|
+
package org.embulk.input.pubsub
|
2
|
+
|
3
|
+
import java.io.FileInputStream
|
4
|
+
|
5
|
+
import com.google.api.gax.core.FixedCredentialsProvider
|
6
|
+
import com.google.auth.oauth2.GoogleCredentials
|
7
|
+
import com.google.cloud.pubsub.v1.stub.{
|
8
|
+
GrpcSubscriberStub,
|
9
|
+
SubscriberStubSettings
|
10
|
+
}
|
11
|
+
import com.google.protobuf.Empty
|
12
|
+
import com.google.pubsub.v1.{
|
13
|
+
AcknowledgeRequest,
|
14
|
+
ProjectSubscriptionName,
|
15
|
+
PullRequest,
|
16
|
+
PullResponse
|
17
|
+
}
|
18
|
+
import org.embulk.input.pubsub.checkpoint.StoredCheckpoint
|
19
|
+
|
20
|
+
import scala.jdk.CollectionConverters._
|
21
|
+
import scala.util.{Success, Try}
|
22
|
+
|
23
|
+
/**
|
24
|
+
* A subscriber for Cloud Pub/Sub calls batch based pulls with checkpoint.
|
25
|
+
*
|
26
|
+
* @param projectId
|
27
|
+
* @param subscriptionName
|
28
|
+
* @param pathToCredJson
|
29
|
+
*/
|
30
|
+
case class PubsubBatchSubscriber private (
|
31
|
+
projectId: String,
|
32
|
+
subscriptionName: String,
|
33
|
+
pathToCredJson: String
|
34
|
+
) {
|
35
|
+
private val credentials =
|
36
|
+
GoogleCredentials.fromStream(new FileInputStream(pathToCredJson))
|
37
|
+
private val settings = SubscriberStubSettings
|
38
|
+
.newBuilder()
|
39
|
+
.setCredentialsProvider(FixedCredentialsProvider.create(credentials))
|
40
|
+
.setTransportChannelProvider(
|
41
|
+
SubscriberStubSettings.defaultGrpcTransportProviderBuilder().build()
|
42
|
+
)
|
43
|
+
.build()
|
44
|
+
|
45
|
+
def pull(count: Int, checkpointDir: Option[String]): Try[StoredCheckpoint] = {
|
46
|
+
val subscription =
|
47
|
+
ProjectSubscriptionName.of(projectId, subscriptionName).toString
|
48
|
+
val subscriber = GrpcSubscriberStub.create(settings)
|
49
|
+
|
50
|
+
for {
|
51
|
+
res <- pullImpl(subscriber, subscription, count)
|
52
|
+
messages = res.getReceivedMessagesList.asScala
|
53
|
+
checkpoint <- StoredCheckpoint.create(
|
54
|
+
messages.map(_.getMessage).toSeq,
|
55
|
+
checkpointDir
|
56
|
+
)
|
57
|
+
_ <- ackImpl(subscriber, subscription, messages.map(_.getAckId))
|
58
|
+
} yield checkpoint
|
59
|
+
}
|
60
|
+
|
61
|
+
private def pullImpl(
|
62
|
+
subscriber: GrpcSubscriberStub,
|
63
|
+
subscription: String,
|
64
|
+
count: Int
|
65
|
+
): Try[PullResponse] = {
|
66
|
+
val req = PullRequest
|
67
|
+
.newBuilder()
|
68
|
+
.setSubscription(subscription)
|
69
|
+
.setReturnImmediately(true)
|
70
|
+
.setMaxMessages(count)
|
71
|
+
.build()
|
72
|
+
|
73
|
+
Try(subscriber.pullCallable().call(req))
|
74
|
+
}
|
75
|
+
|
76
|
+
private def ackImpl(
|
77
|
+
subscriber: GrpcSubscriberStub,
|
78
|
+
subscription: String,
|
79
|
+
ackIds: Iterable[String]
|
80
|
+
): Try[Empty] = {
|
81
|
+
if (ackIds.nonEmpty) {
|
82
|
+
val ack = AcknowledgeRequest
|
83
|
+
.newBuilder()
|
84
|
+
.setSubscription(subscription)
|
85
|
+
.addAllAckIds(ackIds.asJava)
|
86
|
+
.build()
|
87
|
+
|
88
|
+
Try(subscriber.acknowledgeCallable().call(ack))
|
89
|
+
} else {
|
90
|
+
Success(Empty.getDefaultInstance)
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
}
|
95
|
+
|
96
|
+
object PubsubBatchSubscriber {
|
97
|
+
def of(task: PluginTask): PubsubBatchSubscriber =
|
98
|
+
PubsubBatchSubscriber(
|
99
|
+
task.getProjectId,
|
100
|
+
task.getSubscriptionId,
|
101
|
+
task.getJsonKeyfile
|
102
|
+
)
|
103
|
+
}
|
@@ -0,0 +1,142 @@
|
|
1
|
+
package org.embulk.input.pubsub
|
2
|
+
|
3
|
+
import java.nio.charset.StandardCharsets
|
4
|
+
import java.util.{Base64, Optional, List => JList}
|
5
|
+
|
6
|
+
import com.fasterxml.jackson.databind.ObjectMapper
|
7
|
+
import org.embulk.config.{
|
8
|
+
ConfigDiff,
|
9
|
+
ConfigException,
|
10
|
+
ConfigSource,
|
11
|
+
TaskReport,
|
12
|
+
TaskSource
|
13
|
+
}
|
14
|
+
import org.embulk.input.pubsub.checkpoint.StoredCheckpoint
|
15
|
+
import org.embulk.spi.`type`.Types
|
16
|
+
import org.embulk.spi.{
|
17
|
+
DataException,
|
18
|
+
Exec,
|
19
|
+
InputPlugin,
|
20
|
+
PageBuilder,
|
21
|
+
PageOutput,
|
22
|
+
Schema
|
23
|
+
}
|
24
|
+
import org.embulk.spi.json.JsonParser
|
25
|
+
import org.slf4j.LoggerFactory
|
26
|
+
|
27
|
+
import scala.jdk.OptionConverters._
|
28
|
+
import scala.jdk.CollectionConverters._
|
29
|
+
import scala.util.{Failure, Success}
|
30
|
+
|
31
|
+
case class PubsubInputPlugin() extends InputPlugin {
|
32
|
+
private val logger = LoggerFactory.getLogger(this.getClass)
|
33
|
+
|
34
|
+
private val jsonParser = new JsonParser()
|
35
|
+
private val objectMapper = new ObjectMapper()
|
36
|
+
|
37
|
+
private val schema = Schema
|
38
|
+
.builder()
|
39
|
+
.add("payload", Types.STRING) // string or base64 encoded bytes
|
40
|
+
.add("attribute", Types.JSON)
|
41
|
+
.build()
|
42
|
+
|
43
|
+
override def transaction(
|
44
|
+
config: ConfigSource,
|
45
|
+
control: InputPlugin.Control
|
46
|
+
): ConfigDiff = {
|
47
|
+
val task = config.loadConfig(classOf[PluginTask])
|
48
|
+
|
49
|
+
if (!task.getCheckpoint.isPresent) {
|
50
|
+
val sub = PubsubBatchSubscriber.of(task)
|
51
|
+
val checkpoint =
|
52
|
+
sub.pull(task.getMaxMessages, task.getCheckpointBasedir.toScala).get
|
53
|
+
task.setCheckpoint(Optional.of(checkpoint.id))
|
54
|
+
|
55
|
+
logger.info(s"Created a new checkpoint! : ${checkpoint.id}")
|
56
|
+
}
|
57
|
+
|
58
|
+
resume(task.dump(), schema, task.getNumTasks, control)
|
59
|
+
}
|
60
|
+
|
61
|
+
override def resume(
|
62
|
+
taskSource: TaskSource,
|
63
|
+
schema: Schema,
|
64
|
+
taskCount: Int,
|
65
|
+
control: InputPlugin.Control
|
66
|
+
): ConfigDiff = {
|
67
|
+
control.run(taskSource, schema, taskCount)
|
68
|
+
Exec.newConfigDiff()
|
69
|
+
}
|
70
|
+
|
71
|
+
override def cleanup(
|
72
|
+
taskSource: TaskSource,
|
73
|
+
schema: Schema,
|
74
|
+
taskCount: Int,
|
75
|
+
successTaskReports: JList[TaskReport]
|
76
|
+
): Unit = {
|
77
|
+
val task = taskSource.loadTask(classOf[PluginTask])
|
78
|
+
|
79
|
+
val checkpointId = task.getCheckpoint.get()
|
80
|
+
val checkpoint =
|
81
|
+
StoredCheckpoint.from(checkpointId, task.getCheckpointBasedir.isPresent)
|
82
|
+
checkpoint match {
|
83
|
+
case Success(sc) =>
|
84
|
+
sc.cleanup match {
|
85
|
+
case Success(_) =>
|
86
|
+
case Failure(e) => logger.error(s"failed to cleanup: ${e.toString}")
|
87
|
+
}
|
88
|
+
case Failure(e) =>
|
89
|
+
logger.error(s"failed to fetch checkpoint: ${e.toString}")
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
override def run(
|
94
|
+
taskSource: TaskSource,
|
95
|
+
schema: Schema,
|
96
|
+
taskIndex: Int,
|
97
|
+
output: PageOutput
|
98
|
+
): TaskReport = {
|
99
|
+
val task = taskSource.loadTask(classOf[PluginTask])
|
100
|
+
val allocator = task.getBufferAllocator
|
101
|
+
val pageBuilder = new PageBuilder(allocator, schema, output)
|
102
|
+
|
103
|
+
val encoder = task.getPayloadEncoding match {
|
104
|
+
case "string" =>
|
105
|
+
(data: Array[Byte]) => new String(data, StandardCharsets.UTF_8)
|
106
|
+
case "binary" =>
|
107
|
+
(data: Array[Byte]) => Base64.getEncoder.encodeToString(data)
|
108
|
+
case e => throw new ConfigException(s"unsupported encoding: ${e}")
|
109
|
+
}
|
110
|
+
|
111
|
+
val checkpointId = task.getCheckpoint.get()
|
112
|
+
val checkpoint =
|
113
|
+
StoredCheckpoint.from(checkpointId, task.getCheckpointBasedir.isPresent)
|
114
|
+
val messages = checkpoint match {
|
115
|
+
case Success(cp) => cp.content.getMessagesList.asScala
|
116
|
+
case _ =>
|
117
|
+
throw new DataException(s"unexpected checkpoint state: ${checkpoint}")
|
118
|
+
}
|
119
|
+
|
120
|
+
messages.foreach { msg =>
|
121
|
+
pageBuilder.setString(
|
122
|
+
pageBuilder.getSchema.getColumn(0),
|
123
|
+
encoder(msg.getData.toByteArray)
|
124
|
+
)
|
125
|
+
|
126
|
+
val json = objectMapper.writeValueAsString(msg.getAttributesMap)
|
127
|
+
pageBuilder.setJson(
|
128
|
+
pageBuilder.getSchema.getColumn(1),
|
129
|
+
jsonParser.parse(json)
|
130
|
+
)
|
131
|
+
|
132
|
+
pageBuilder.addRecord()
|
133
|
+
}
|
134
|
+
pageBuilder.finish()
|
135
|
+
|
136
|
+
Exec.newTaskReport()
|
137
|
+
}
|
138
|
+
|
139
|
+
override def guess(config: ConfigSource): ConfigDiff =
|
140
|
+
Exec.newConfigDiff()
|
141
|
+
|
142
|
+
}
|
@@ -0,0 +1,123 @@
|
|
1
|
+
package org.embulk.input.pubsub.checkpoint
|
2
|
+
|
3
|
+
import java.io.{File, FileInputStream, FileOutputStream}
|
4
|
+
|
5
|
+
import com.embulk.input.pubsub.checkpoint.Checkpoint
|
6
|
+
import com.google.pubsub.v1.PubsubMessage
|
7
|
+
import org.embulk.config.ConfigException
|
8
|
+
|
9
|
+
import scala.collection.mutable
|
10
|
+
import scala.util.{Failure, Success, Try}
|
11
|
+
import scala.jdk.CollectionConverters._
|
12
|
+
|
13
|
+
/**
|
14
|
+
* A checkpoint stored in a (maybe)persistent storage.
|
15
|
+
*/
|
16
|
+
sealed trait StoredCheckpoint {
|
17
|
+
def id: String
|
18
|
+
def content: Checkpoint
|
19
|
+
def cleanup: Try[Unit]
|
20
|
+
}
|
21
|
+
|
22
|
+
object StoredCheckpoint {
|
23
|
+
def create(
|
24
|
+
messages: Seq[PubsubMessage],
|
25
|
+
dir: Option[String]
|
26
|
+
): Try[StoredCheckpoint] = {
|
27
|
+
dir match {
|
28
|
+
case Some(d) =>
|
29
|
+
LocalFileStoredCheckpoint.withPersistency(d, messages)
|
30
|
+
case _ =>
|
31
|
+
val content = Checkpoint
|
32
|
+
.newBuilder()
|
33
|
+
.addAllMessages(messages.asJava)
|
34
|
+
.build()
|
35
|
+
Success(MemoryStoredStoredCheckpoint.withoutPersistency(content))
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
def from(id: String, persistent: Boolean): Try[StoredCheckpoint] = {
|
40
|
+
if (persistent) {
|
41
|
+
LocalFileStoredCheckpoint.from(id)
|
42
|
+
} else {
|
43
|
+
MemoryStoredStoredCheckpoint.from(id)
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
/**
|
49
|
+
* A checkpoint stored in only memory which doesn't have persistence.
|
50
|
+
*
|
51
|
+
* @param id
|
52
|
+
* @param content
|
53
|
+
*/
|
54
|
+
case class MemoryStoredStoredCheckpoint private (
|
55
|
+
id: String,
|
56
|
+
content: Checkpoint
|
57
|
+
) extends StoredCheckpoint {
|
58
|
+
import MemoryStoredStoredCheckpoint._
|
59
|
+
|
60
|
+
override def cleanup: Try[Unit] = {
|
61
|
+
storage.remove(id) match {
|
62
|
+
case Some(_) => Success(())
|
63
|
+
case _ =>
|
64
|
+
Failure(new ConfigException(s"A checkpoint ${id} is not deletable"))
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
object MemoryStoredStoredCheckpoint {
|
70
|
+
private val storage = mutable.Map[String, MemoryStoredStoredCheckpoint]()
|
71
|
+
|
72
|
+
def from(key: String): Try[MemoryStoredStoredCheckpoint] = Try(storage(key))
|
73
|
+
|
74
|
+
def withoutPersistency(content: Checkpoint): StoredCheckpoint = {
|
75
|
+
val id = content.hashCode().toString
|
76
|
+
val checkpoint = MemoryStoredStoredCheckpoint(id, content)
|
77
|
+
storage.put(id, checkpoint)
|
78
|
+
checkpoint
|
79
|
+
}
|
80
|
+
|
81
|
+
}
|
82
|
+
|
83
|
+
/**
|
84
|
+
* A checkpoint stored in local filesystem.
|
85
|
+
*
|
86
|
+
* @param id
|
87
|
+
* @param content
|
88
|
+
*/
|
89
|
+
case class LocalFileStoredCheckpoint private (id: String, content: Checkpoint)
|
90
|
+
extends StoredCheckpoint {
|
91
|
+
override def cleanup: Try[Unit] = {
|
92
|
+
for {
|
93
|
+
f <- Try(new File(id))
|
94
|
+
_ <- Try(f.delete)
|
95
|
+
} yield ()
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
object LocalFileStoredCheckpoint {
|
100
|
+
def from(path: String): Try[LocalFileStoredCheckpoint] = {
|
101
|
+
for {
|
102
|
+
in <- Try(new FileInputStream(path))
|
103
|
+
c <- Try(Checkpoint.parseFrom(in))
|
104
|
+
} yield LocalFileStoredCheckpoint(path, c)
|
105
|
+
}
|
106
|
+
|
107
|
+
def withPersistency(
|
108
|
+
prefix: String,
|
109
|
+
messages: Seq[PubsubMessage]
|
110
|
+
): Try[LocalFileStoredCheckpoint] = {
|
111
|
+
val path = s"${prefix}checkpoint-${messages.hashCode().toString}"
|
112
|
+
val content = Checkpoint
|
113
|
+
.newBuilder()
|
114
|
+
.addAllMessages(messages.asJava)
|
115
|
+
.build()
|
116
|
+
|
117
|
+
for {
|
118
|
+
out <- Try(new FileOutputStream(path))
|
119
|
+
_ <- Try(content.writeTo(out))
|
120
|
+
_ <- Try(out.close())
|
121
|
+
} yield LocalFileStoredCheckpoint(path, content)
|
122
|
+
}
|
123
|
+
}
|