elasticgraph-datastore_core 0.18.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-datastore_core.gemspec +21 -0
- data/lib/elastic_graph/datastore_core/config.rb +58 -0
- data/lib/elastic_graph/datastore_core/configuration/client_faraday_adapter.rb +38 -0
- data/lib/elastic_graph/datastore_core/configuration/cluster_definition.rb +52 -0
- data/lib/elastic_graph/datastore_core/configuration/index_definition.rb +110 -0
- data/lib/elastic_graph/datastore_core/index_config_normalizer.rb +79 -0
- data/lib/elastic_graph/datastore_core/index_definition/base.rb +162 -0
- data/lib/elastic_graph/datastore_core/index_definition/index.rb +64 -0
- data/lib/elastic_graph/datastore_core/index_definition/rollover_index.rb +48 -0
- data/lib/elastic_graph/datastore_core/index_definition/rollover_index_template.rb +232 -0
- data/lib/elastic_graph/datastore_core/index_definition.rb +51 -0
- data/lib/elastic_graph/datastore_core.rb +100 -0
- metadata +404 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7bd50dac79cefe5466066b048ebb298df524a33a904082b0871af9429a7521ed
|
4
|
+
data.tar.gz: 666da201ff6ef33c81c2d7b4926068913188f735ec1f9777ed68255c3433bc2a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9ec51fffb49a31152bfa0718bfb16d0063c964ca24128710639d287dac4d10ccc4ea0fbe969caae6f7c39df28ddb80b3a2bc225c2c7af1c4bb7d6f5baf5c1459
|
7
|
+
data.tar.gz: 671573cb43b2880450bac584ba4edd21154188a2de245bd195569cc166a0e323038caba947cc7d74bbf0b403b78952b37a800bea81fe602f125188dffa529275
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 Block, Inc.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require_relative "../gemspec_helper"
|
10
|
+
|
11
|
+
ElasticGraphGemspecHelper.define_elasticgraph_gem(gemspec_file: __FILE__, category: :core) do |spec, eg_version|
|
12
|
+
spec.summary = "ElasticGraph gem containing the core datastore support types and logic."
|
13
|
+
|
14
|
+
spec.add_dependency "elasticgraph-schema_artifacts", eg_version
|
15
|
+
spec.add_dependency "elasticgraph-support", eg_version
|
16
|
+
|
17
|
+
spec.add_development_dependency "elasticgraph-admin", eg_version
|
18
|
+
spec.add_development_dependency "elasticgraph-elasticsearch", eg_version
|
19
|
+
spec.add_development_dependency "elasticgraph-opensearch", eg_version
|
20
|
+
spec.add_development_dependency "elasticgraph-schema_definition", eg_version
|
21
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/configuration/client_faraday_adapter"
|
10
|
+
require "elastic_graph/datastore_core/configuration/cluster_definition"
|
11
|
+
require "elastic_graph/datastore_core/configuration/index_definition"
|
12
|
+
require "elastic_graph/error"
|
13
|
+
|
14
|
+
module ElasticGraph
|
15
|
+
class DatastoreCore
|
16
|
+
# Defines the configuration related to datastores.
|
17
|
+
class Config < ::Data.define(
|
18
|
+
# Configuration of the faraday adapter to use with the datastore client.
|
19
|
+
:client_faraday_adapter,
|
20
|
+
# Map of datastore cluster definitions, keyed by cluster name. The names will be referenced within
|
21
|
+
# `index_definitions` by `query_cluster` and `index_into_clusters` to identify
|
22
|
+
# datastore clusters. Each definition has a `url` and `settings`. `settings` contains datastore
|
23
|
+
# settings in the flattened name form, e.g. `"cluster.max_shards_per_node": 2000`.
|
24
|
+
:clusters,
|
25
|
+
# Map of index definition names to `IndexDefinition` objects containing customizations
|
26
|
+
# for the named index definitions for this environment.
|
27
|
+
:index_definitions,
|
28
|
+
# Determines if we log requests/responses to/from the datastore.
|
29
|
+
# Defaults to `false`.
|
30
|
+
:log_traffic,
|
31
|
+
# Passed down to the datastore client, controls the number of times ElasticGraph attempts a call against
|
32
|
+
# the datastore before failing. Retrying a handful of times is generally advantageous, since some sporadic
|
33
|
+
# failures are expected during the course of operation, and better to retry than fail the entire call.
|
34
|
+
# Defaults to 3.
|
35
|
+
:max_client_retries
|
36
|
+
)
|
37
|
+
# Helper method to build an instance from parsed YAML config.
|
38
|
+
def self.from_parsed_yaml(parsed_yaml)
|
39
|
+
parsed_yaml = parsed_yaml.fetch("datastore")
|
40
|
+
extra_keys = parsed_yaml.keys - EXPECTED_KEYS
|
41
|
+
|
42
|
+
unless extra_keys.empty?
|
43
|
+
raise ConfigError, "Unknown `datastore` config settings: #{extra_keys.join(", ")}"
|
44
|
+
end
|
45
|
+
|
46
|
+
new(
|
47
|
+
client_faraday_adapter: Configuration::ClientFaradayAdapter.from_parsed_yaml(parsed_yaml),
|
48
|
+
clusters: Configuration::ClusterDefinition.definitions_by_name_hash_from(parsed_yaml.fetch("clusters")),
|
49
|
+
index_definitions: Configuration::IndexDefinition.definitions_by_name_hash_from(parsed_yaml.fetch("index_definitions")),
|
50
|
+
log_traffic: parsed_yaml.fetch("log_traffic", false),
|
51
|
+
max_client_retries: parsed_yaml.fetch("max_client_retries", 3)
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
EXPECTED_KEYS = members.map(&:to_s)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class DatastoreCore
|
11
|
+
module Configuration
|
12
|
+
class ClientFaradayAdapter < ::Data.define(
|
13
|
+
# The faraday adapter to use with the datastore client, such as `httpx` or `typhoeus`.
|
14
|
+
# For more info, see:
|
15
|
+
# https://github.com/elastic/elasticsearch-ruby/commit/a7bbdbf2a96168c1b33dca46ee160d2d4d75ada0
|
16
|
+
:name,
|
17
|
+
# A Ruby library to require which provides the named adapter (optional).
|
18
|
+
:require
|
19
|
+
)
|
20
|
+
def self.from_parsed_yaml(parsed_yaml)
|
21
|
+
parsed_yaml = parsed_yaml.fetch("client_faraday_adapter") || {}
|
22
|
+
extra_keys = parsed_yaml.keys - EXPECTED_KEYS
|
23
|
+
|
24
|
+
unless extra_keys.empty?
|
25
|
+
raise ConfigError, "Unknown `datastore.client_faraday_adapter` config settings: #{extra_keys.join(", ")}"
|
26
|
+
end
|
27
|
+
|
28
|
+
new(
|
29
|
+
name: parsed_yaml["name"]&.to_sym,
|
30
|
+
require: parsed_yaml["require"]
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
EXPECTED_KEYS = members.map(&:to_s)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class DatastoreCore
|
13
|
+
module Configuration
|
14
|
+
class ClusterDefinition < ::Data.define(:url, :backend_client_class, :settings)
|
15
|
+
def self.from_hash(hash)
|
16
|
+
extra_keys = hash.keys - EXPECTED_KEYS
|
17
|
+
|
18
|
+
unless extra_keys.empty?
|
19
|
+
raise ConfigError, "Unknown `datastore.clusters` config settings: #{extra_keys.join(", ")}"
|
20
|
+
end
|
21
|
+
|
22
|
+
backend_name = hash["backend"]
|
23
|
+
backend_client_class =
|
24
|
+
case backend_name
|
25
|
+
when "elasticsearch"
|
26
|
+
require "elastic_graph/elasticsearch/client"
|
27
|
+
Elasticsearch::Client
|
28
|
+
when "opensearch"
|
29
|
+
require "elastic_graph/opensearch/client"
|
30
|
+
OpenSearch::Client
|
31
|
+
else
|
32
|
+
raise ConfigError, "Unknown `datastore.clusters` backend: `#{backend_name}`. Valid backends are `elasticsearch` and `opensearch`."
|
33
|
+
end
|
34
|
+
|
35
|
+
new(
|
36
|
+
url: hash.fetch("url"),
|
37
|
+
backend_client_class: backend_client_class,
|
38
|
+
settings: hash.fetch("settings")
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.definitions_by_name_hash_from(cluster_def_hash_by_name)
|
43
|
+
cluster_def_hash_by_name.transform_values do |cluster_def_hash|
|
44
|
+
from_hash(cluster_def_hash)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
EXPECTED_KEYS = members.map(&:to_s) - ["backend_client_class"] + ["backend"]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/support/time_set"
|
10
|
+
require "elastic_graph/error"
|
11
|
+
require "time"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class DatastoreCore
|
15
|
+
module Configuration
|
16
|
+
# Defines environment-specific customizations for an index definition.
|
17
|
+
#
|
18
|
+
# - ignore_routing_values: routing values for which we will ignore routing as configured on the index.
|
19
|
+
# This is intended to be used when a single routing value contains such a large portion of the dataset that it creates lopsided shards.
|
20
|
+
# By including that routing value in this config setting, it'll spread that value's data across all shards instead of concentrating it on a single shard.
|
21
|
+
# - query_cluster: named search cluster to be used for queries on this index.
|
22
|
+
# - index_into_cluster: named search clusters to index data into.
|
23
|
+
# - setting_overrides: overrides for index (or index template) settings.
|
24
|
+
# - setting_overrides_by_timestamp: overrides for index template settings for specific dates,
|
25
|
+
# allowing us to have different settings than the template for some timestamp.
|
26
|
+
# - custom_timestamp_ranges: defines indices for a custom timestamp range (rather than relying
|
27
|
+
# on the configured rollover frequency).
|
28
|
+
# - use_updates_for_indexing: when `true`, opts the index into using the `update` API instead of the `index` API for indexing.
|
29
|
+
# (Defaults to `true`).
|
30
|
+
class IndexDefinition < ::Data.define(
|
31
|
+
:ignore_routing_values,
|
32
|
+
:query_cluster,
|
33
|
+
:index_into_clusters,
|
34
|
+
:setting_overrides,
|
35
|
+
:setting_overrides_by_timestamp,
|
36
|
+
:custom_timestamp_ranges,
|
37
|
+
:use_updates_for_indexing
|
38
|
+
)
|
39
|
+
def initialize(ignore_routing_values:, **rest)
|
40
|
+
__skip__ = super(ignore_routing_values: ignore_routing_values.to_set, **rest)
|
41
|
+
|
42
|
+
# Verify the custom ranges are disjoint.
|
43
|
+
# Yeah, this is O(N^2), which isn't great, but we expect a _very_ small number of custom
|
44
|
+
# ranges (0-2) so this should be ok.
|
45
|
+
return if custom_timestamp_ranges
|
46
|
+
.map(&:time_set)
|
47
|
+
.combination(2)
|
48
|
+
.none? do |s1_s2|
|
49
|
+
s1, s2 = s1_s2
|
50
|
+
s1.intersect?(s2)
|
51
|
+
end
|
52
|
+
|
53
|
+
raise ConfigError, "Your configured `custom_timestamp_ranges` are not disjoint, as required."
|
54
|
+
end
|
55
|
+
|
56
|
+
def without_env_overrides
|
57
|
+
with(setting_overrides: {}, setting_overrides_by_timestamp: {}, custom_timestamp_ranges: [])
|
58
|
+
end
|
59
|
+
|
60
|
+
def custom_timestamp_range_for(timestamp)
|
61
|
+
custom_timestamp_ranges.find do |range|
|
62
|
+
range.time_set.member?(timestamp)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.definitions_by_name_hash_from(index_def_hash_by_name)
|
67
|
+
index_def_hash_by_name.transform_values do |index_def_hash|
|
68
|
+
__skip__ = from(**index_def_hash.transform_keys(&:to_sym))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.from(custom_timestamp_ranges:, use_updates_for_indexing: true, **rest)
|
73
|
+
__skip__ = new(
|
74
|
+
custom_timestamp_ranges: CustomTimestampRange.ranges_from(custom_timestamp_ranges),
|
75
|
+
use_updates_for_indexing: use_updates_for_indexing,
|
76
|
+
**rest
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Represents an index definition that is based on a custom timestamp range.
|
81
|
+
class CustomTimestampRange < ::Data.define(:index_name_suffix, :setting_overrides, :time_set)
|
82
|
+
def initialize(index_name_suffix:, setting_overrides:, time_set:)
|
83
|
+
super
|
84
|
+
|
85
|
+
if time_set.empty?
|
86
|
+
raise ConfigError, "Custom timestamp range with suffix `#{index_name_suffix}` is invalid: no timestamps exist in it."
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.ranges_from(range_hashes)
|
91
|
+
range_hashes.map do |range_hash|
|
92
|
+
__skip__ = from(**range_hash.transform_keys(&:to_sym))
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
private_class_method def self.from(index_name_suffix:, setting_overrides:, **predicates_hash)
|
97
|
+
if predicates_hash.empty?
|
98
|
+
raise ConfigSettingNotSetError, "Custom timestamp range with suffix `#{index_name_suffix}` lacks boundary definitions."
|
99
|
+
end
|
100
|
+
|
101
|
+
range_options = predicates_hash.transform_values { |iso8601_string| ::Time.iso8601(iso8601_string) }
|
102
|
+
time_set = Support::TimeSet.of_range(**range_options)
|
103
|
+
|
104
|
+
new(index_name_suffix: index_name_suffix, setting_overrides: setting_overrides, time_set: time_set)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class DatastoreCore
|
11
|
+
module IndexConfigNormalizer
|
12
|
+
# These are settings that the datastore exposes when you fetch an index, but that you can
|
13
|
+
# never set. We need to ignore them when figuring out what settings to update.
|
14
|
+
#
|
15
|
+
# Note: `index.routing.allocation.include._tier_preference` is not a read-only setting, but
|
16
|
+
# we want to treat it as one, because (1) Elasticsearch 7.10+ sets it and (2) we do not want
|
17
|
+
# to ever write it at this time.
|
18
|
+
#
|
19
|
+
# Note: `index.history.uuid` is a weird setting that sometimes shows up in managed AWS OpenSearch
|
20
|
+
# clusters, but only on _some_ indices. It's not documented and we don't want to mess with it here,
|
21
|
+
# so we want to treat it as a read only setting.
|
22
|
+
READ_ONLY_SETTINGS = %w[
|
23
|
+
index.creation_date
|
24
|
+
index.history.uuid
|
25
|
+
index.provided_name
|
26
|
+
index.replication.type
|
27
|
+
index.routing.allocation.include._tier_preference
|
28
|
+
index.uuid
|
29
|
+
index.version.created
|
30
|
+
index.version.upgraded
|
31
|
+
]
|
32
|
+
|
33
|
+
# Normalizes the provided index configuration so that it is in a stable form that we can compare to what
|
34
|
+
# the datastore returns when we query it for the configuration of an index. This includes:
|
35
|
+
#
|
36
|
+
# - Dropping read-only settings that we never interact with but that the datastore automatically sets on an index.
|
37
|
+
# Omitting them makes it easier for us to compare our desired configuration to what is in the datastore.
|
38
|
+
# - Converting setting values to a normalized string form. The datastore oddly returns setting values as strings
|
39
|
+
# (e.g. `"false"` or `"7"` instead of `false` or `7`), so this matches that behavior.
|
40
|
+
# - Drops `type: object` from a mapping when there are `properties` because the datastore omits it in that
|
41
|
+
# situation, treating it as the default type.
|
42
|
+
def self.normalize(index_config)
|
43
|
+
if (settings = index_config["settings"])
|
44
|
+
index_config = index_config.merge("settings" => normalize_settings(settings))
|
45
|
+
end
|
46
|
+
|
47
|
+
if (mappings = index_config["mappings"])
|
48
|
+
index_config = index_config.merge("mappings" => normalize_mappings(mappings))
|
49
|
+
end
|
50
|
+
|
51
|
+
index_config
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.normalize_mappings(mappings)
|
55
|
+
return mappings unless (properties = mappings["properties"])
|
56
|
+
|
57
|
+
mappings = mappings.except("type") if mappings["type"] == "object"
|
58
|
+
mappings.merge("properties" => properties.transform_values { |prop| normalize_mappings(prop) })
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.normalize_settings(settings)
|
62
|
+
settings
|
63
|
+
.except(*READ_ONLY_SETTINGS)
|
64
|
+
.to_h { |name, value| [name, normalize_setting_value(value)] }
|
65
|
+
end
|
66
|
+
|
67
|
+
private_class_method def self.normalize_setting_value(value)
|
68
|
+
case value
|
69
|
+
when nil
|
70
|
+
nil
|
71
|
+
when ::Array
|
72
|
+
value.map { |v| normalize_setting_value(v) }
|
73
|
+
else
|
74
|
+
value.to_s
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/index_config_normalizer"
|
10
|
+
require "elastic_graph/error"
|
11
|
+
require "elastic_graph/support/hash_util"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class DatastoreCore
|
15
|
+
module IndexDefinition
|
16
|
+
# This module contains common implementation logic for both the rollover and non-rollover
|
17
|
+
# implementations of the common IndexDefinition type.
|
18
|
+
module Base
|
19
|
+
# Returns any setting overrides for this index from the environment-specific config file,
|
20
|
+
# after flattening it so that it can be directly used in a create index request.
|
21
|
+
def flattened_env_setting_overrides
|
22
|
+
@flattened_env_setting_overrides ||= Support::HashUtil.flatten_and_stringify_keys(
|
23
|
+
env_index_config.setting_overrides,
|
24
|
+
prefix: "index"
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Gets the routing value for the given `prepared_record`. Notably, `prepared_record` must be previously
|
29
|
+
# prepared with an `Indexer::RecordPreparer` in order to ensure that it uses internal index
|
30
|
+
# field names (to align with `route_with_path`/`route_with` which also use the internal name) rather
|
31
|
+
# than the public field name (which can differ).
|
32
|
+
def routing_value_for_prepared_record(prepared_record, route_with_path: route_with, id_path: "id")
|
33
|
+
return nil unless has_custom_routing?
|
34
|
+
|
35
|
+
unless route_with_path
|
36
|
+
raise ConfigError, "`#{self}` uses custom routing, but `route_with_path` is misconfigured (was `nil`)"
|
37
|
+
end
|
38
|
+
|
39
|
+
config_routing_value = Support::HashUtil.fetch_value_at_path(prepared_record, route_with_path).to_s
|
40
|
+
return config_routing_value unless ignored_values_for_routing.include?(config_routing_value)
|
41
|
+
|
42
|
+
Support::HashUtil.fetch_value_at_path(prepared_record, id_path).to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
def has_custom_routing?
|
46
|
+
route_with != "id"
|
47
|
+
end
|
48
|
+
|
49
|
+
# Indicates if a search on this index definition may hit incomplete documents. An incomplete document
|
50
|
+
# can occur when multiple event types flow into the same index. An index that has only one source type
|
51
|
+
# can never have incomplete documents, but an index that has 2 or more sources can have incomplete
|
52
|
+
# documents when the "primary" event type hasn't yet been received for a document.
|
53
|
+
#
|
54
|
+
# This case is notable because we need to apply automatic filtering in order to hide documents that are
|
55
|
+
# not yet complete.
|
56
|
+
#
|
57
|
+
# Note: determining this value sometimes requires that we query the datastore for the record of all
|
58
|
+
# sources that an index has ever had. This value changes very, very rarely, and we don't want to slow
|
59
|
+
# down every GraphQL query by adding the extra query against the datastore, so we cache the value here.
|
60
|
+
def searches_could_hit_incomplete_docs?
|
61
|
+
return @searches_could_hit_incomplete_docs if defined?(@searches_could_hit_incomplete_docs)
|
62
|
+
|
63
|
+
if current_sources.size > 1
|
64
|
+
# We know that incomplete docs are possible, without needing to check sources recorded in `_meta`.
|
65
|
+
@searches_could_hit_incomplete_docs = true
|
66
|
+
else
|
67
|
+
# While our current configuration can't produce incomplete documents, some may already exist in the index
|
68
|
+
# if we previously had some `sourced_from` fields (but no longer have them). Here we check for the sources
|
69
|
+
# we've recorded in `_meta` to account for that.
|
70
|
+
client = datastore_clients_by_name.fetch(cluster_to_query)
|
71
|
+
recorded_sources = mappings_in_datastore(client).dig("_meta", "ElasticGraph", "sources") || []
|
72
|
+
sources = recorded_sources.union(current_sources.to_a)
|
73
|
+
|
74
|
+
@searches_could_hit_incomplete_docs = sources.size > 1
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def cluster_to_query
|
79
|
+
env_index_config.query_cluster
|
80
|
+
end
|
81
|
+
|
82
|
+
def clusters_to_index_into
|
83
|
+
env_index_config.index_into_clusters.tap do |clusters_to_index_into|
|
84
|
+
raise ConfigError, "No `index_into_clusters` defined for #{self} in env_index_config" unless clusters_to_index_into
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def use_updates_for_indexing?
|
89
|
+
env_index_config.use_updates_for_indexing
|
90
|
+
end
|
91
|
+
|
92
|
+
def ignored_values_for_routing
|
93
|
+
env_index_config.ignore_routing_values
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns a list of all defined datastore clusters this index resides within.
|
97
|
+
def all_accessible_cluster_names
|
98
|
+
@all_accessible_cluster_names ||=
|
99
|
+
# Using `_` because steep doesn't understand that `compact` removes nils.
|
100
|
+
(clusters_to_index_into + [_ = cluster_to_query]).compact.uniq.select do |name|
|
101
|
+
defined_clusters.include?(name)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def accessible_cluster_names_to_index_into
|
106
|
+
@accessible_cluster_names_to_index_into ||= clusters_to_index_into.select do |name|
|
107
|
+
defined_clusters.include?(name)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Indicates whether not the index is be accessible from GraphQL queries, by virtue of
|
112
|
+
# the `cluster_to_query` being a defined cluster or not. This will be used to
|
113
|
+
# hide GraphQL schema elements that can't be queried when our config omits the means
|
114
|
+
# to query an index (e.g. due to lacking a configured URL).
|
115
|
+
def accessible_from_queries?
|
116
|
+
return false unless (cluster = cluster_to_query)
|
117
|
+
defined_clusters.include?(cluster)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Returns a list of indices related to this template in the datastore cluster this
|
121
|
+
# index definition is configured to query. Note that for performance reasons, this method
|
122
|
+
# memoizes the result of querying the datastore for its current list of indices, and as
|
123
|
+
# a result the return value may be out of date. If it is absolutely essential that you get
|
124
|
+
# an up-to-date list of related indices, use `related_rollover_indices(datastore_client`) instead of
|
125
|
+
# this method.
|
126
|
+
#
|
127
|
+
# Note, however, that indices generally change *very* rarely (say, monthly or yearly) and as such
|
128
|
+
# this will very rarely be out of date, even with the memoization.
|
129
|
+
def known_related_query_rollover_indices
|
130
|
+
@known_related_query_rollover_indices ||= cluster_to_query&.then do |name|
|
131
|
+
# For query purposes, we only want indices that exist. If we return a query that is defined in our configuration
|
132
|
+
# but does not exist, and that gets used in a search index expression (even for the purposes of excluding it!),
|
133
|
+
# the datastore will return an error.
|
134
|
+
related_rollover_indices(datastore_clients_by_name.fetch(name), only_if_exists: true)
|
135
|
+
end || []
|
136
|
+
end
|
137
|
+
|
138
|
+
# Returns a set of all of the field paths to subfields of the special `LIST_COUNTS_FIELD`
|
139
|
+
# that contains the element counts of all list fields. The returned set is filtered based
|
140
|
+
# on the provided `source` to only contain the paths of fields that are populated by the
|
141
|
+
# given source.
|
142
|
+
def list_counts_field_paths_for_source(source)
|
143
|
+
@list_counts_field_paths_for_source ||= {} # : ::Hash[::String, ::Set[::String]]
|
144
|
+
@list_counts_field_paths_for_source[source] ||= identify_list_counts_field_paths_for_source(source)
|
145
|
+
end
|
146
|
+
|
147
|
+
def to_s
|
148
|
+
"#<#{self.class.name} #{name}>"
|
149
|
+
end
|
150
|
+
alias_method :inspect, :to_s
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def identify_list_counts_field_paths_for_source(source)
|
155
|
+
fields_by_path.filter_map do |path, field|
|
156
|
+
path if field.source == source && path.split(".").include?(LIST_COUNTS_FIELD)
|
157
|
+
end.to_set
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/index_config_normalizer"
|
10
|
+
require "elastic_graph/datastore_core/index_definition/base"
|
11
|
+
require "elastic_graph/support/memoizable_data"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class DatastoreCore
|
15
|
+
module IndexDefinition
|
16
|
+
class Index < Support::MemoizableData.define(
|
17
|
+
:name, :route_with, :default_sort_clauses, :current_sources, :fields_by_path,
|
18
|
+
:env_index_config, :defined_clusters, :datastore_clients_by_name
|
19
|
+
)
|
20
|
+
# `Data.define` provides all these methods:
|
21
|
+
# @dynamic name, route_with, default_sort_clauses, current_sources, fields_by_path, env_index_config, defined_clusters, datastore_clients_by_name, initialize
|
22
|
+
|
23
|
+
# `include IndexDefinition::Base` provides all these methods. Steep should be able to detect it
|
24
|
+
# but can't for some reason so we have to declare them with `@dynamic`.
|
25
|
+
# @dynamic flattened_env_setting_overrides, routing_value_for_prepared_record, has_custom_routing?, cluster_to_query, use_updates_for_indexing?
|
26
|
+
# @dynamic clusters_to_index_into, all_accessible_cluster_names, ignored_values_for_routing, searches_could_hit_incomplete_docs?
|
27
|
+
# @dynamic accessible_cluster_names_to_index_into, accessible_from_queries?, known_related_query_rollover_indices, list_counts_field_paths_for_source
|
28
|
+
include IndexDefinition::Base
|
29
|
+
|
30
|
+
def mappings_in_datastore(datastore_client)
|
31
|
+
IndexConfigNormalizer.normalize_mappings(datastore_client.get_index(name)["mappings"] || {})
|
32
|
+
end
|
33
|
+
|
34
|
+
# `ignore_unavailable: true` is needed to prevent errors when we delete non-existing non-rollover indices
|
35
|
+
def delete_from_datastore(datastore_client)
|
36
|
+
datastore_client.delete_indices(name)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Indicates if this is a rollover index definition.
|
40
|
+
#
|
41
|
+
# Use of this is considered a mild code smell. When feasible, it's generally better to
|
42
|
+
# implement a new polymorphic API on the IndexDefinition interface, rather
|
43
|
+
# then branching on the value of this predicate.
|
44
|
+
def rollover_index_template?
|
45
|
+
false
|
46
|
+
end
|
47
|
+
|
48
|
+
def index_expression_for_search
|
49
|
+
name
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns an index name to use for write operations.
|
53
|
+
def index_name_for_writes(record, timestamp_field_path: nil)
|
54
|
+
name
|
55
|
+
end
|
56
|
+
|
57
|
+
# A concrete index has no related indices (really only rollover indices do).
|
58
|
+
def related_rollover_indices(datastore_client, only_if_exists: false)
|
59
|
+
[]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "delegate"
|
10
|
+
require "elastic_graph/datastore_core/index_definition/index"
|
11
|
+
|
12
|
+
module ElasticGraph
|
13
|
+
class DatastoreCore
|
14
|
+
module IndexDefinition
|
15
|
+
# Represents a concrete index for specific time range, derived from a RolloverIndexTemplate.
|
16
|
+
class RolloverIndex < DelegateClass(Index)
|
17
|
+
# @dynamic time_set
|
18
|
+
attr_reader :time_set
|
19
|
+
|
20
|
+
def initialize(index, time_set)
|
21
|
+
super(index)
|
22
|
+
@time_set = time_set
|
23
|
+
end
|
24
|
+
|
25
|
+
# We need to override `==` so that two `RolloverIndex` objects that wrap the same `Index` object are
|
26
|
+
# considered equal. Oddly enough, the `DelegateClass` implementation of `==` returns `true` if `other`
|
27
|
+
# is the wrapped object, but not if it's another instance of the same `DelegateClass` wrapping the same
|
28
|
+
# instance.
|
29
|
+
#
|
30
|
+
# https://github.com/ruby/ruby/blob/v3_0_3/lib/delegate.rb#L156-L159
|
31
|
+
#
|
32
|
+
# We need this because we want two `RolloverIndex` instances that wrap the same
|
33
|
+
# underlying `Index` instance to be considered equal (something a test relies upon,
|
34
|
+
# but also generally useful and expected).
|
35
|
+
def ==(other)
|
36
|
+
if RolloverIndex === other
|
37
|
+
__getobj__ == other.__getobj__ && time_set == other.time_set
|
38
|
+
else
|
39
|
+
# :nocov: -- this method isn't explicitly covered by tests (not worth writing a test just to cover this line).
|
40
|
+
super
|
41
|
+
# :nocov:
|
42
|
+
end
|
43
|
+
end
|
44
|
+
alias_method :eql?, :==
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|