elasticgraph-warehouse_lambda 1.0.3.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +102 -0
- data/lib/elastic_graph/warehouse_lambda/config.rb +47 -0
- data/lib/elastic_graph/warehouse_lambda/lambda_function.rb +43 -0
- data/lib/elastic_graph/warehouse_lambda/warehouse_dumper.rb +143 -0
- data/lib/elastic_graph/warehouse_lambda.rb +120 -0
- metadata +166 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 1e5870b53e8b751aea6f3fd4b932482da4a2eaac9a1a81cf971ab478958fc2a5
|
|
4
|
+
data.tar.gz: 8270c7bb72f34432d5372661463732b73d3dbf146f48c807223d015ed5bebbad
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7cda9ec664426b19468fa308e33f100d656c802131f68208f93e9ad70eb3da3e60ba7295b59163c153aaa8eb07874272f702a928d96822ad92357827d84e5640
|
|
7
|
+
data.tar.gz: c5d0306aa7f53fbf092e35ba42f9fd6501cb6cb4cee7ec919d846649289a0d72c11a2e07d36774d485a52690ffb6b19441575d930a11ecdd503519219ab8e0b0
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 - 2026 Block, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# ElasticGraph::WarehouseLambda
|
|
2
|
+
|
|
3
|
+
Write ElasticGraph-shaped JSONL files to S3, packaged for AWS Lambda.
|
|
4
|
+
|
|
5
|
+
This gem adapts ElasticGraph's indexing pipeline so that, instead of writing to the datastore,
|
|
6
|
+
it writes batched, gzipped [JSON Lines](https://jsonlines.org/) (JSONL) files to Amazon S3. Each line in the file
|
|
7
|
+
conforms to a specific JSON Schema version for the corresponding object type, with files partitioned by schema version.
|
|
8
|
+
|
|
9
|
+
**Note:** This code does not deduplicate when writing to S3, so the data will contain all events
|
|
10
|
+
and versions published, plus any Lambda retries. Consumers of the S3 bucket are responsible for
|
|
11
|
+
deduplicating records by version when loading data into a warehouse.
|
|
12
|
+
|
|
13
|
+
## Dependency Diagram
|
|
14
|
+
|
|
15
|
+
```mermaid
|
|
16
|
+
graph LR;
|
|
17
|
+
classDef targetGemStyle fill:#FADBD8,stroke:#EC7063,color:#000,stroke-width:2px;
|
|
18
|
+
classDef otherEgGemStyle fill:#A9DFBF,stroke:#2ECC71,color:#000;
|
|
19
|
+
classDef externalGemStyle fill:#E0EFFF,stroke:#70A1D7,color:#2980B9;
|
|
20
|
+
elasticgraph-warehouse_lambda["elasticgraph-warehouse_lambda"];
|
|
21
|
+
class elasticgraph-warehouse_lambda targetGemStyle;
|
|
22
|
+
elasticgraph-indexer_lambda["elasticgraph-indexer_lambda"];
|
|
23
|
+
elasticgraph-warehouse_lambda --> elasticgraph-indexer_lambda;
|
|
24
|
+
class elasticgraph-indexer_lambda otherEgGemStyle;
|
|
25
|
+
elasticgraph-lambda_support["elasticgraph-lambda_support"];
|
|
26
|
+
elasticgraph-warehouse_lambda --> elasticgraph-lambda_support;
|
|
27
|
+
class elasticgraph-lambda_support otherEgGemStyle;
|
|
28
|
+
aws-sdk-s3["aws-sdk-s3"];
|
|
29
|
+
elasticgraph-warehouse_lambda --> aws-sdk-s3;
|
|
30
|
+
class aws-sdk-s3 externalGemStyle;
|
|
31
|
+
ox["ox"];
|
|
32
|
+
elasticgraph-warehouse_lambda --> ox;
|
|
33
|
+
class ox externalGemStyle;
|
|
34
|
+
click aws-sdk-s3 href "https://rubygems.org/gems/aws-sdk-s3" "Open on RubyGems.org" _blank;
|
|
35
|
+
click ox href "https://rubygems.org/gems/ox" "Open on RubyGems.org" _blank;
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## What it does
|
|
39
|
+
|
|
40
|
+
- Consumes ElasticGraph indexing operations and groups them by GraphQL type and JSON schema version
|
|
41
|
+
- Transforms each operation into a flattened JSON document that matches your ElasticGraph schema
|
|
42
|
+
- Writes one gzipped JSONL file per type per JSON schema version per batch to S3 with deterministic keys:
|
|
43
|
+
- `s3://<bucket>/<s3_path_prefix>/<TypeName>/v<json_schema_version>/<YYYY-MM-DD>/<uuid>.jsonl.gz`
|
|
44
|
+
- Emits structured logs for observability (counts, sizes, S3 key, etc.)
|
|
45
|
+
|
|
46
|
+
## When to use it
|
|
47
|
+
|
|
48
|
+
Use this when you need a durable, append-only export of ElasticGraph data suitable for ingestion
|
|
49
|
+
by downstream systems (e.g., data warehouses, lakehouses, or offline analytics pipelines). It's a
|
|
50
|
+
drop-in replacement for the Indexer's datastore router: instead of indexing into the datastore,
|
|
51
|
+
you persist JSONL to S3.
|
|
52
|
+
|
|
53
|
+
This is particularly useful when you want to pair a full-fledged SQL-based data warehouse
|
|
54
|
+
(such as Databricks) with ElasticGraph. Managing the warehouse via ElasticGraph ensures
|
|
55
|
+
that the schema and data in the warehouse exactly matches what's exposed from your
|
|
56
|
+
ElasticGraph GraphQL API, so that clients can seamlessly switch between GraphQL and SQL.
|
|
57
|
+
|
|
58
|
+
## Configuration
|
|
59
|
+
|
|
60
|
+
Configuration is sourced from your normal ElasticGraph YAML settings (via ELASTICGRAPH_YAML_CONFIG).
|
|
61
|
+
|
|
62
|
+
```yaml
|
|
63
|
+
warehouse:
|
|
64
|
+
s3_path_prefix: dumped-data/Data001 # Required: full S3 key prefix for organizing exports
|
|
65
|
+
s3_bucket_name: my-bucket-name # Required: the S3 bucket to write JSONL files into
|
|
66
|
+
aws_region: us-west-2 # Optional: AWS region for S3 bucket (defaults to AWS SDK region resolution)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Key format
|
|
70
|
+
|
|
71
|
+
Files are written with the following S3 key format:
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
<s3_path_prefix>/<TypeName>/v<json_schema_version>/<YYYY-MM-DD>/<uuid>.jsonl.gz
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
- **s3_path_prefix**: Configurable in YAML (warehouse.s3_path_prefix). This is the full prefix you control,
|
|
78
|
+
so you can organize your data however you like (e.g., "dumped-data/Data001" or "prod/analytics/v2").
|
|
79
|
+
- **TypeName**: GraphQL type from the ElasticGraph event
|
|
80
|
+
- **json_schema_version**: The JSON Schema version **selected based on the ingested event's requested version**
|
|
81
|
+
(or the closest available version if the exact version isn't available). This ensures data partitioning
|
|
82
|
+
matches the actual schema version used to process each event, making it easier to handle schema evolution
|
|
83
|
+
and version-specific data processing.
|
|
84
|
+
- **YYYY-MM-DD**: UTC date when the batch was processed (aligns with common data warehouse
|
|
85
|
+
partitioning strategies)
|
|
86
|
+
- **uuid**: A random UUID for uniqueness
|
|
87
|
+
|
|
88
|
+
## Runtime and deps
|
|
89
|
+
|
|
90
|
+
- Runs in AWS Lambda via elasticgraph-lambda_support
|
|
91
|
+
- Depends on elasticgraph-indexer_lambda for event preparation and schema artifacts
|
|
92
|
+
- Uses aws-sdk-s3 to write to S3
|
|
93
|
+
|
|
94
|
+
## Observability
|
|
95
|
+
|
|
96
|
+
The Lambda logs structured events:
|
|
97
|
+
- WarehouseLambdaReceivedBatch: counts per type for incoming batch
|
|
98
|
+
- DumpedToWarehouseFile: S3 key, type, record_count, json_size, gzip_size
|
|
99
|
+
|
|
100
|
+
## License
|
|
101
|
+
|
|
102
|
+
MIT © Block, Inc.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Copyright 2024 - 2026 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/support/config"
|
|
10
|
+
|
|
11
|
+
module ElasticGraph
|
|
12
|
+
# AWS Lambda integration for exporting ElasticGraph indexing data to S3 as gzipped JSONL files.
|
|
13
|
+
# This allows downstream analytics pipelines, data warehouses, and lakehouses to consume
|
|
14
|
+
# ElasticGraph data without querying the primary datastore.
|
|
15
|
+
class WarehouseLambda
|
|
16
|
+
# Configuration for the warehouse lambda.
|
|
17
|
+
#
|
|
18
|
+
# Defines S3 settings for exporting ElasticGraph data as gzipped JSONL files.
|
|
19
|
+
class Config < Support::Config.define(:s3_path_prefix, :s3_bucket_name, :aws_region)
|
|
20
|
+
json_schema at: "warehouse",
|
|
21
|
+
optional: true,
|
|
22
|
+
description: "Configuration for the warehouse lambda used by `elasticgraph-warehouse_lambda`.",
|
|
23
|
+
properties: {
|
|
24
|
+
s3_path_prefix: {
|
|
25
|
+
description: "The S3 path prefix to use when storing data files.",
|
|
26
|
+
type: "string",
|
|
27
|
+
pattern: /^\S+$/.source, # No whitespace allowed
|
|
28
|
+
examples: ["Data001", "my-prefix"]
|
|
29
|
+
},
|
|
30
|
+
s3_bucket_name: {
|
|
31
|
+
description: "The S3 bucket name to write JSONL files into.",
|
|
32
|
+
type: "string",
|
|
33
|
+
pattern: /^\S+$/.source, # No whitespace allowed
|
|
34
|
+
examples: ["my-warehouse-bucket", "data-lake-prod"]
|
|
35
|
+
},
|
|
36
|
+
aws_region: {
|
|
37
|
+
description: "Optional AWS region for the S3 bucket. If not specified, uses AWS SDK default region resolution (AWS_REGION env var, instance metadata, etc.).",
|
|
38
|
+
type: ["string", "null"],
|
|
39
|
+
pattern: /^\S+$/.source, # No whitespace allowed
|
|
40
|
+
examples: ["us-west-2", "eu-central-1"],
|
|
41
|
+
default: nil
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
required: ["s3_path_prefix", "s3_bucket_name"]
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 2024 - 2026 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/lambda_support/lambda_function"
|
|
10
|
+
require "json"
|
|
11
|
+
|
|
12
|
+
module ElasticGraph
|
|
13
|
+
class WarehouseLambda
|
|
14
|
+
# @private
|
|
15
|
+
class LambdaFunction
|
|
16
|
+
prepend LambdaSupport::LambdaFunction
|
|
17
|
+
|
|
18
|
+
# @dynamic sqs_processor
|
|
19
|
+
attr_reader :sqs_processor
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
require "elastic_graph/warehouse_lambda"
|
|
23
|
+
require "elastic_graph/indexer_lambda/sqs_processor"
|
|
24
|
+
|
|
25
|
+
warehouse_lambda = WarehouseLambda.warehouse_lambda_from_env
|
|
26
|
+
ignore_sqs_latency_timestamps_from_arns = ::JSON.parse(ENV.fetch("IGNORE_SQS_LATENCY_TIMESTAMPS_FROM_ARNS", "[]")).to_set
|
|
27
|
+
|
|
28
|
+
@sqs_processor = IndexerLambda::SqsProcessor.new(
|
|
29
|
+
warehouse_lambda.processor,
|
|
30
|
+
ignore_sqs_latency_timestamps_from_arns: ignore_sqs_latency_timestamps_from_arns,
|
|
31
|
+
logger: warehouse_lambda.logger
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def handle_request(event:, context:)
|
|
36
|
+
sqs_processor.process(event)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Lambda handler for `elasticgraph-warehouse_lambda`.
|
|
43
|
+
DumpWarehouseData = ElasticGraph::WarehouseLambda::LambdaFunction.new
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Copyright 2024 - 2026 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/constants"
|
|
10
|
+
require "elastic_graph/indexer/datastore_indexing_router"
|
|
11
|
+
require "elastic_graph/indexer/operation/result"
|
|
12
|
+
require "json"
|
|
13
|
+
require "securerandom"
|
|
14
|
+
require "time"
|
|
15
|
+
require "zlib"
|
|
16
|
+
|
|
17
|
+
module ElasticGraph
|
|
18
|
+
class WarehouseLambda
|
|
19
|
+
# Responsible for dumping data into a data warehouse. Implements the same interface as `DatastoreIndexingRouter` from
|
|
20
|
+
# `elasticgraph-indexer` so that it can be used in place of the standard datastore indexing router.
|
|
21
|
+
class WarehouseDumper
|
|
22
|
+
# @return [String] message type for logging when a batch is received
|
|
23
|
+
LOG_MSG_RECEIVED_BATCH = "WarehouseLambdaReceivedBatch"
|
|
24
|
+
|
|
25
|
+
# @return [String] message type for logging when a file is dumped to S3
|
|
26
|
+
LOG_MSG_DUMPED_FILE = "DumpedToWarehouseFile"
|
|
27
|
+
|
|
28
|
+
def initialize(logger:, s3_client:, s3_bucket_name:, s3_file_prefix:, clock:)
|
|
29
|
+
@logger = logger
|
|
30
|
+
@s3_client = s3_client
|
|
31
|
+
@s3_bucket_name = s3_bucket_name
|
|
32
|
+
@s3_file_prefix = s3_file_prefix
|
|
33
|
+
@clock = clock
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Processes a batch of indexing operations by dumping them to S3 as gzipped JSONL files.
|
|
37
|
+
# Operations are grouped by GraphQL type and JSON schema version, with each group written to a separate file.
|
|
38
|
+
#
|
|
39
|
+
# @param operations [Array<Operation>] the indexing operations to process
|
|
40
|
+
# @param refresh [Boolean] ignored (included for interface compatibility with DatastoreIndexingRouter)
|
|
41
|
+
# @return [BulkResult] result containing success status for all operations
|
|
42
|
+
def bulk(operations, refresh: false)
|
|
43
|
+
operations_by_type_and_json_schema_version = operations.group_by { |op| [op.event.fetch("type"), op.event.fetch(JSON_SCHEMA_VERSION_KEY)] }
|
|
44
|
+
|
|
45
|
+
@logger.info({
|
|
46
|
+
"message_type" => LOG_MSG_RECEIVED_BATCH,
|
|
47
|
+
"record_counts_by_type" => operations_by_type_and_json_schema_version.transform_keys { |(type, _json_schema_version)| type }.transform_values(&:size)
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
operations_by_type_and_json_schema_version.each do |(type, json_schema_version), operations|
|
|
51
|
+
# Operations coming from the indexer are always Update operations for warehouse dumping
|
|
52
|
+
update_operations = operations # : ::Array[::ElasticGraph::Indexer::Operation::Update]
|
|
53
|
+
jsonl_data = build_jsonl_file_from(update_operations)
|
|
54
|
+
|
|
55
|
+
# Skip S3 upload if all operations were filtered out (no data to write)
|
|
56
|
+
next if jsonl_data.empty?
|
|
57
|
+
|
|
58
|
+
gzip_data = compress(jsonl_data)
|
|
59
|
+
s3_key = generate_s3_key_for(type, json_schema_version)
|
|
60
|
+
|
|
61
|
+
# Use if_none_match: "*" to prevent overwrites (defense-in-depth, though UUIDs make collisions impossible)
|
|
62
|
+
@s3_client.put_object(
|
|
63
|
+
bucket: @s3_bucket_name,
|
|
64
|
+
key: s3_key,
|
|
65
|
+
body: gzip_data,
|
|
66
|
+
checksum_algorithm: :sha256,
|
|
67
|
+
if_none_match: "*"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@logger.info({
|
|
71
|
+
"message_type" => LOG_MSG_DUMPED_FILE,
|
|
72
|
+
"s3_bucket" => @s3_bucket_name,
|
|
73
|
+
"s3_key" => s3_key,
|
|
74
|
+
"type" => type,
|
|
75
|
+
JSON_SCHEMA_VERSION_KEY => json_schema_version,
|
|
76
|
+
"record_count" => operations.size,
|
|
77
|
+
"json_size" => jsonl_data.bytesize,
|
|
78
|
+
"gzip_size" => gzip_data.bytesize
|
|
79
|
+
})
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
ops_and_results = operations.map do |op|
|
|
83
|
+
[op, ::ElasticGraph::Indexer::Operation::Result.success_of(op)]
|
|
84
|
+
end # : ::Array[[::ElasticGraph::Indexer::_Operation, ::ElasticGraph::Indexer::Operation::Result]]
|
|
85
|
+
|
|
86
|
+
::ElasticGraph::Indexer::DatastoreIndexingRouter::BulkResult.new({"warehouse" => ops_and_results})
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Returns existing event versions for the given operations.
|
|
90
|
+
# Always returns an empty hash since the warehouse doesn't maintain version state.
|
|
91
|
+
#
|
|
92
|
+
# @param operations [Array<Operation>] the operations to check (unused)
|
|
93
|
+
# @return [Hash] empty hash (warehouse doesn't track versions)
|
|
94
|
+
def source_event_versions_in_index(operations)
|
|
95
|
+
{}
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def generate_s3_key_for(type, json_schema_version)
|
|
101
|
+
date = @clock.now.utc.strftime("%Y-%m-%d")
|
|
102
|
+
uuid = ::SecureRandom.uuid
|
|
103
|
+
|
|
104
|
+
[
|
|
105
|
+
@s3_file_prefix,
|
|
106
|
+
type,
|
|
107
|
+
"v#{json_schema_version}",
|
|
108
|
+
date,
|
|
109
|
+
"#{uuid}.jsonl.gz"
|
|
110
|
+
].join("/")
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def build_jsonl_file_from(operations)
|
|
114
|
+
operation_payloads = operations.filter_map do |op|
|
|
115
|
+
# Only include operations where the update target matches the event type (excludes derived indices)
|
|
116
|
+
next nil if op.update_target.type != op.event.fetch("type")
|
|
117
|
+
|
|
118
|
+
params = op.to_datastore_bulk[1].fetch(:script).fetch(:params)
|
|
119
|
+
data = params.fetch("data").merge({
|
|
120
|
+
"id" => params.fetch("id"),
|
|
121
|
+
"__eg_version" => params.fetch("version")
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
::JSON.generate(data)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
operation_payloads.join("\n")
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def compress(jsonl_data)
|
|
131
|
+
io = ::StringIO.new
|
|
132
|
+
gz = ::Zlib::GzipWriter.new(io, ::Zlib::DEFAULT_COMPRESSION, ::Zlib::DEFAULT_STRATEGY)
|
|
133
|
+
|
|
134
|
+
begin
|
|
135
|
+
gz << jsonl_data
|
|
136
|
+
ensure
|
|
137
|
+
gz.close
|
|
138
|
+
end
|
|
139
|
+
io.string
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Copyright 2024 - 2026 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/datastore_core"
|
|
10
|
+
require "elastic_graph/indexer/config"
|
|
11
|
+
require "elastic_graph/lambda_support"
|
|
12
|
+
require "elastic_graph/support/from_yaml_file"
|
|
13
|
+
require "elastic_graph/warehouse_lambda/config"
|
|
14
|
+
|
|
15
|
+
module ElasticGraph
|
|
16
|
+
# Wraps an {Indexer} to dump data to S3 instead of indexing to a datastore.
|
|
17
|
+
# This is a stateful wrapper class (unlike {IndexerLambda} and {GraphQLLambda},
|
|
18
|
+
# which are namespace modules), as it manages the relationship between the
|
|
19
|
+
# indexer, S3 client, and warehouse dumper.
|
|
20
|
+
#
|
|
21
|
+
# @private
|
|
22
|
+
class WarehouseLambda
|
|
23
|
+
extend Support::FromYamlFile
|
|
24
|
+
|
|
25
|
+
# Builds an `ElasticGraph::WarehouseLambda` instance from our lambda ENV vars.
|
|
26
|
+
def self.warehouse_lambda_from_env
|
|
27
|
+
LambdaSupport.build_from_env(WarehouseLambda)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @return [Config] warehouse configuration
|
|
31
|
+
# @return [Indexer::Config] indexer configuration
|
|
32
|
+
# @return [DatastoreCore] datastore core for accessing schema artifacts
|
|
33
|
+
# @return [Logger] logger instance from datastore core
|
|
34
|
+
# @return [Module] clock module for time generation
|
|
35
|
+
# @dynamic config, indexer_config, datastore_core, logger, clock, indexer
|
|
36
|
+
attr_reader :config, :indexer_config, :datastore_core, :logger, :clock
|
|
37
|
+
|
|
38
|
+
# Builds an `ElasticGraph::WarehouseLambda` instance from parsed YAML configuration.
|
|
39
|
+
#
|
|
40
|
+
# @param parsed_yaml [Hash] parsed YAML configuration
|
|
41
|
+
# @yield [Datastore::Client] optional block to customize the datastore client
|
|
42
|
+
# @return [WarehouseLambda] configured warehouse lambda instance
|
|
43
|
+
def self.from_parsed_yaml(parsed_yaml, &datastore_client_customization_block)
|
|
44
|
+
new(
|
|
45
|
+
config: Config.from_parsed_yaml!(parsed_yaml),
|
|
46
|
+
indexer_config: Indexer::Config.from_parsed_yaml(parsed_yaml) || Indexer::Config.new,
|
|
47
|
+
datastore_core: DatastoreCore.from_parsed_yaml(parsed_yaml, &datastore_client_customization_block)
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Initializes a WarehouseLambda instance.
|
|
52
|
+
#
|
|
53
|
+
# @param config [Config] warehouse configuration
|
|
54
|
+
# @param indexer_config [Config] indexer configuration
|
|
55
|
+
# @param datastore_core [DatastoreCore] datastore core for accessing schema artifacts
|
|
56
|
+
# @param clock [Module] clock module for time generation (defaults to {::Time})
|
|
57
|
+
# @param s3_client [Aws::S3::Client, nil] optional S3 client (for testing)
|
|
58
|
+
def initialize(config:, indexer_config:, datastore_core:, clock: ::Time, s3_client: nil)
|
|
59
|
+
@config = config
|
|
60
|
+
@indexer_config = indexer_config
|
|
61
|
+
@datastore_core = datastore_core
|
|
62
|
+
@logger = datastore_core.logger
|
|
63
|
+
@clock = clock
|
|
64
|
+
@s3_client = s3_client
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Returns the processor from the indexer for event processing.
|
|
68
|
+
#
|
|
69
|
+
# @return [Processor] the processor that handles incoming events
|
|
70
|
+
def processor
|
|
71
|
+
indexer.processor
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Returns the indexer instance, lazily building it on first access.
|
|
75
|
+
#
|
|
76
|
+
# @return [Indexer] the indexer that processes events
|
|
77
|
+
def indexer
|
|
78
|
+
@indexer ||= begin
|
|
79
|
+
require "elastic_graph/indexer"
|
|
80
|
+
Indexer.new(
|
|
81
|
+
config: indexer_config,
|
|
82
|
+
datastore_core: datastore_core,
|
|
83
|
+
datastore_router: warehouse_dumper,
|
|
84
|
+
clock: clock
|
|
85
|
+
)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Returns the warehouse dumper instance, lazily building it on first access.
|
|
90
|
+
#
|
|
91
|
+
# @return [WarehouseDumper] the dumper that writes data to S3
|
|
92
|
+
def warehouse_dumper
|
|
93
|
+
@warehouse_dumper ||= begin
|
|
94
|
+
require "elastic_graph/warehouse_lambda/warehouse_dumper"
|
|
95
|
+
WarehouseDumper.new(
|
|
96
|
+
logger: logger,
|
|
97
|
+
s3_client: s3_client,
|
|
98
|
+
s3_bucket_name: config.s3_bucket_name,
|
|
99
|
+
s3_file_prefix: config.s3_path_prefix,
|
|
100
|
+
clock: clock
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Returns the S3 client instance, lazily building it on first access.
|
|
106
|
+
#
|
|
107
|
+
# @return [Aws::S3::Client] the S3 client for uploading data
|
|
108
|
+
def s3_client
|
|
109
|
+
@s3_client ||= begin
|
|
110
|
+
require "aws-sdk-s3"
|
|
111
|
+
|
|
112
|
+
if (region = config.aws_region)
|
|
113
|
+
::Aws::S3::Client.new(region: region)
|
|
114
|
+
else
|
|
115
|
+
::Aws::S3::Client.new
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: elasticgraph-warehouse_lambda
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 1.0.3.rc1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Myron Marston
|
|
8
|
+
- Josh Wilson
|
|
9
|
+
- Block Engineering
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: elasticgraph-indexer_lambda
|
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
|
17
|
+
requirements:
|
|
18
|
+
- - '='
|
|
19
|
+
- !ruby/object:Gem::Version
|
|
20
|
+
version: 1.0.3.rc1
|
|
21
|
+
type: :runtime
|
|
22
|
+
prerelease: false
|
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
24
|
+
requirements:
|
|
25
|
+
- - '='
|
|
26
|
+
- !ruby/object:Gem::Version
|
|
27
|
+
version: 1.0.3.rc1
|
|
28
|
+
- !ruby/object:Gem::Dependency
|
|
29
|
+
name: elasticgraph-lambda_support
|
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
|
31
|
+
requirements:
|
|
32
|
+
- - '='
|
|
33
|
+
- !ruby/object:Gem::Version
|
|
34
|
+
version: 1.0.3.rc1
|
|
35
|
+
type: :runtime
|
|
36
|
+
prerelease: false
|
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
38
|
+
requirements:
|
|
39
|
+
- - '='
|
|
40
|
+
- !ruby/object:Gem::Version
|
|
41
|
+
version: 1.0.3.rc1
|
|
42
|
+
- !ruby/object:Gem::Dependency
|
|
43
|
+
name: aws-sdk-s3
|
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
|
45
|
+
requirements:
|
|
46
|
+
- - "~>"
|
|
47
|
+
- !ruby/object:Gem::Version
|
|
48
|
+
version: '1.212'
|
|
49
|
+
type: :runtime
|
|
50
|
+
prerelease: false
|
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
52
|
+
requirements:
|
|
53
|
+
- - "~>"
|
|
54
|
+
- !ruby/object:Gem::Version
|
|
55
|
+
version: '1.212'
|
|
56
|
+
- !ruby/object:Gem::Dependency
|
|
57
|
+
name: ox
|
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
|
59
|
+
requirements:
|
|
60
|
+
- - "~>"
|
|
61
|
+
- !ruby/object:Gem::Version
|
|
62
|
+
version: '2.14'
|
|
63
|
+
- - ">="
|
|
64
|
+
- !ruby/object:Gem::Version
|
|
65
|
+
version: 2.14.23
|
|
66
|
+
type: :runtime
|
|
67
|
+
prerelease: false
|
|
68
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
69
|
+
requirements:
|
|
70
|
+
- - "~>"
|
|
71
|
+
- !ruby/object:Gem::Version
|
|
72
|
+
version: '2.14'
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: 2.14.23
|
|
76
|
+
- !ruby/object:Gem::Dependency
|
|
77
|
+
name: aws_lambda_ric
|
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '3.1'
|
|
83
|
+
- - ">="
|
|
84
|
+
- !ruby/object:Gem::Version
|
|
85
|
+
version: 3.1.3
|
|
86
|
+
type: :development
|
|
87
|
+
prerelease: false
|
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
89
|
+
requirements:
|
|
90
|
+
- - "~>"
|
|
91
|
+
- !ruby/object:Gem::Version
|
|
92
|
+
version: '3.1'
|
|
93
|
+
- - ">="
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: 3.1.3
|
|
96
|
+
- !ruby/object:Gem::Dependency
|
|
97
|
+
name: elasticgraph-elasticsearch
|
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
|
99
|
+
requirements:
|
|
100
|
+
- - '='
|
|
101
|
+
- !ruby/object:Gem::Version
|
|
102
|
+
version: 1.0.3.rc1
|
|
103
|
+
type: :development
|
|
104
|
+
prerelease: false
|
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
106
|
+
requirements:
|
|
107
|
+
- - '='
|
|
108
|
+
- !ruby/object:Gem::Version
|
|
109
|
+
version: 1.0.3.rc1
|
|
110
|
+
- !ruby/object:Gem::Dependency
|
|
111
|
+
name: elasticgraph-opensearch
|
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
|
113
|
+
requirements:
|
|
114
|
+
- - '='
|
|
115
|
+
- !ruby/object:Gem::Version
|
|
116
|
+
version: 1.0.3.rc1
|
|
117
|
+
type: :development
|
|
118
|
+
prerelease: false
|
|
119
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
120
|
+
requirements:
|
|
121
|
+
- - '='
|
|
122
|
+
- !ruby/object:Gem::Version
|
|
123
|
+
version: 1.0.3.rc1
|
|
124
|
+
email:
|
|
125
|
+
- joshuaw@squareup.com
|
|
126
|
+
executables: []
|
|
127
|
+
extensions: []
|
|
128
|
+
extra_rdoc_files: []
|
|
129
|
+
files:
|
|
130
|
+
- LICENSE.txt
|
|
131
|
+
- README.md
|
|
132
|
+
- lib/elastic_graph/warehouse_lambda.rb
|
|
133
|
+
- lib/elastic_graph/warehouse_lambda/config.rb
|
|
134
|
+
- lib/elastic_graph/warehouse_lambda/lambda_function.rb
|
|
135
|
+
- lib/elastic_graph/warehouse_lambda/warehouse_dumper.rb
|
|
136
|
+
homepage: https://block.github.io/elasticgraph/
|
|
137
|
+
licenses:
|
|
138
|
+
- MIT
|
|
139
|
+
metadata:
|
|
140
|
+
bug_tracker_uri: https://github.com/block/elasticgraph/issues
|
|
141
|
+
changelog_uri: https://github.com/block/elasticgraph/releases/tag/v1.0.3.rc1
|
|
142
|
+
documentation_uri: https://block.github.io/elasticgraph/api-docs/v1.0.3.rc1/
|
|
143
|
+
homepage_uri: https://block.github.io/elasticgraph/
|
|
144
|
+
source_code_uri: https://github.com/block/elasticgraph/tree/v1.0.3.rc1/elasticgraph-warehouse_lambda
|
|
145
|
+
gem_category: lambda
|
|
146
|
+
rdoc_options: []
|
|
147
|
+
require_paths:
|
|
148
|
+
- lib
|
|
149
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
150
|
+
requirements:
|
|
151
|
+
- - ">="
|
|
152
|
+
- !ruby/object:Gem::Version
|
|
153
|
+
version: '3.4'
|
|
154
|
+
- - "<"
|
|
155
|
+
- !ruby/object:Gem::Version
|
|
156
|
+
version: '4.1'
|
|
157
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
158
|
+
requirements:
|
|
159
|
+
- - ">="
|
|
160
|
+
- !ruby/object:Gem::Version
|
|
161
|
+
version: '0'
|
|
162
|
+
requirements: []
|
|
163
|
+
rubygems_version: 4.0.3
|
|
164
|
+
specification_version: 4
|
|
165
|
+
summary: ElasticGraph lambda for ingesting data into a warehouse.
|
|
166
|
+
test_files: []
|