elasticgraph-datastore_core 0.18.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-datastore_core.gemspec +21 -0
- data/lib/elastic_graph/datastore_core/config.rb +58 -0
- data/lib/elastic_graph/datastore_core/configuration/client_faraday_adapter.rb +38 -0
- data/lib/elastic_graph/datastore_core/configuration/cluster_definition.rb +52 -0
- data/lib/elastic_graph/datastore_core/configuration/index_definition.rb +110 -0
- data/lib/elastic_graph/datastore_core/index_config_normalizer.rb +79 -0
- data/lib/elastic_graph/datastore_core/index_definition/base.rb +162 -0
- data/lib/elastic_graph/datastore_core/index_definition/index.rb +64 -0
- data/lib/elastic_graph/datastore_core/index_definition/rollover_index.rb +48 -0
- data/lib/elastic_graph/datastore_core/index_definition/rollover_index_template.rb +232 -0
- data/lib/elastic_graph/datastore_core/index_definition.rb +51 -0
- data/lib/elastic_graph/datastore_core.rb +100 -0
- metadata +404 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7bd50dac79cefe5466066b048ebb298df524a33a904082b0871af9429a7521ed
|
4
|
+
data.tar.gz: 666da201ff6ef33c81c2d7b4926068913188f735ec1f9777ed68255c3433bc2a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9ec51fffb49a31152bfa0718bfb16d0063c964ca24128710639d287dac4d10ccc4ea0fbe969caae6f7c39df28ddb80b3a2bc225c2c7af1c4bb7d6f5baf5c1459
|
7
|
+
data.tar.gz: 671573cb43b2880450bac584ba4edd21154188a2de245bd195569cc166a0e323038caba947cc7d74bbf0b403b78952b37a800bea81fe602f125188dffa529275
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 Block, Inc.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require_relative "../gemspec_helper"
|
10
|
+
|
11
|
+
ElasticGraphGemspecHelper.define_elasticgraph_gem(gemspec_file: __FILE__, category: :core) do |spec, eg_version|
|
12
|
+
spec.summary = "ElasticGraph gem containing the core datastore support types and logic."
|
13
|
+
|
14
|
+
spec.add_dependency "elasticgraph-schema_artifacts", eg_version
|
15
|
+
spec.add_dependency "elasticgraph-support", eg_version
|
16
|
+
|
17
|
+
spec.add_development_dependency "elasticgraph-admin", eg_version
|
18
|
+
spec.add_development_dependency "elasticgraph-elasticsearch", eg_version
|
19
|
+
spec.add_development_dependency "elasticgraph-opensearch", eg_version
|
20
|
+
spec.add_development_dependency "elasticgraph-schema_definition", eg_version
|
21
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/configuration/client_faraday_adapter"
|
10
|
+
require "elastic_graph/datastore_core/configuration/cluster_definition"
|
11
|
+
require "elastic_graph/datastore_core/configuration/index_definition"
|
12
|
+
require "elastic_graph/error"
|
13
|
+
|
14
|
+
module ElasticGraph
|
15
|
+
class DatastoreCore
|
16
|
+
# Defines the configuration related to datastores.
|
17
|
+
class Config < ::Data.define(
|
18
|
+
# Configuration of the faraday adapter to use with the datastore client.
|
19
|
+
:client_faraday_adapter,
|
20
|
+
# Map of datastore cluster definitions, keyed by cluster name. The names will be referenced within
|
21
|
+
# `index_definitions` by `query_cluster` and `index_into_clusters` to identify
|
22
|
+
# datastore clusters. Each definition has a `url` and `settings`. `settings` contains datastore
|
23
|
+
# settings in the flattened name form, e.g. `"cluster.max_shards_per_node": 2000`.
|
24
|
+
:clusters,
|
25
|
+
# Map of index definition names to `IndexDefinition` objects containing customizations
|
26
|
+
# for the named index definitions for this environment.
|
27
|
+
:index_definitions,
|
28
|
+
# Determines if we log requests/responses to/from the datastore.
|
29
|
+
# Defaults to `false`.
|
30
|
+
:log_traffic,
|
31
|
+
# Passed down to the datastore client, controls the number of times ElasticGraph attempts a call against
|
32
|
+
# the datastore before failing. Retrying a handful of times is generally advantageous, since some sporadic
|
33
|
+
# failures are expected during the course of operation, and better to retry than fail the entire call.
|
34
|
+
# Defaults to 3.
|
35
|
+
:max_client_retries
|
36
|
+
)
|
37
|
+
# Helper method to build an instance from parsed YAML config.
|
38
|
+
def self.from_parsed_yaml(parsed_yaml)
|
39
|
+
parsed_yaml = parsed_yaml.fetch("datastore")
|
40
|
+
extra_keys = parsed_yaml.keys - EXPECTED_KEYS
|
41
|
+
|
42
|
+
unless extra_keys.empty?
|
43
|
+
raise ConfigError, "Unknown `datastore` config settings: #{extra_keys.join(", ")}"
|
44
|
+
end
|
45
|
+
|
46
|
+
new(
|
47
|
+
client_faraday_adapter: Configuration::ClientFaradayAdapter.from_parsed_yaml(parsed_yaml),
|
48
|
+
clusters: Configuration::ClusterDefinition.definitions_by_name_hash_from(parsed_yaml.fetch("clusters")),
|
49
|
+
index_definitions: Configuration::IndexDefinition.definitions_by_name_hash_from(parsed_yaml.fetch("index_definitions")),
|
50
|
+
log_traffic: parsed_yaml.fetch("log_traffic", false),
|
51
|
+
max_client_retries: parsed_yaml.fetch("max_client_retries", 3)
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
EXPECTED_KEYS = members.map(&:to_s)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class DatastoreCore
|
11
|
+
module Configuration
|
12
|
+
class ClientFaradayAdapter < ::Data.define(
|
13
|
+
# The faraday adapter to use with the datastore client, such as `httpx` or `typhoeus`.
|
14
|
+
# For more info, see:
|
15
|
+
# https://github.com/elastic/elasticsearch-ruby/commit/a7bbdbf2a96168c1b33dca46ee160d2d4d75ada0
|
16
|
+
:name,
|
17
|
+
# A Ruby library to require which provides the named adapter (optional).
|
18
|
+
:require
|
19
|
+
)
|
20
|
+
def self.from_parsed_yaml(parsed_yaml)
|
21
|
+
parsed_yaml = parsed_yaml.fetch("client_faraday_adapter") || {}
|
22
|
+
extra_keys = parsed_yaml.keys - EXPECTED_KEYS
|
23
|
+
|
24
|
+
unless extra_keys.empty?
|
25
|
+
raise ConfigError, "Unknown `datastore.client_faraday_adapter` config settings: #{extra_keys.join(", ")}"
|
26
|
+
end
|
27
|
+
|
28
|
+
new(
|
29
|
+
name: parsed_yaml["name"]&.to_sym,
|
30
|
+
require: parsed_yaml["require"]
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
EXPECTED_KEYS = members.map(&:to_s)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class DatastoreCore
|
13
|
+
module Configuration
|
14
|
+
class ClusterDefinition < ::Data.define(:url, :backend_client_class, :settings)
|
15
|
+
def self.from_hash(hash)
|
16
|
+
extra_keys = hash.keys - EXPECTED_KEYS
|
17
|
+
|
18
|
+
unless extra_keys.empty?
|
19
|
+
raise ConfigError, "Unknown `datastore.clusters` config settings: #{extra_keys.join(", ")}"
|
20
|
+
end
|
21
|
+
|
22
|
+
backend_name = hash["backend"]
|
23
|
+
backend_client_class =
|
24
|
+
case backend_name
|
25
|
+
when "elasticsearch"
|
26
|
+
require "elastic_graph/elasticsearch/client"
|
27
|
+
Elasticsearch::Client
|
28
|
+
when "opensearch"
|
29
|
+
require "elastic_graph/opensearch/client"
|
30
|
+
OpenSearch::Client
|
31
|
+
else
|
32
|
+
raise ConfigError, "Unknown `datastore.clusters` backend: `#{backend_name}`. Valid backends are `elasticsearch` and `opensearch`."
|
33
|
+
end
|
34
|
+
|
35
|
+
new(
|
36
|
+
url: hash.fetch("url"),
|
37
|
+
backend_client_class: backend_client_class,
|
38
|
+
settings: hash.fetch("settings")
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.definitions_by_name_hash_from(cluster_def_hash_by_name)
|
43
|
+
cluster_def_hash_by_name.transform_values do |cluster_def_hash|
|
44
|
+
from_hash(cluster_def_hash)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
EXPECTED_KEYS = members.map(&:to_s) - ["backend_client_class"] + ["backend"]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/support/time_set"
|
10
|
+
require "elastic_graph/error"
|
11
|
+
require "time"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class DatastoreCore
|
15
|
+
module Configuration
|
16
|
+
# Defines environment-specific customizations for an index definition.
|
17
|
+
#
|
18
|
+
# - ignore_routing_values: routing values for which we will ignore routing as configured on the index.
|
19
|
+
# This is intended to be used when a single routing value contains such a large portion of the dataset that it creates lopsided shards.
|
20
|
+
# By including that routing value in this config setting, it'll spread that value's data across all shards instead of concentrating it on a single shard.
|
21
|
+
# - query_cluster: named search cluster to be used for queries on this index.
|
22
|
+
# - index_into_cluster: named search clusters to index data into.
|
23
|
+
# - setting_overrides: overrides for index (or index template) settings.
|
24
|
+
# - setting_overrides_by_timestamp: overrides for index template settings for specific dates,
|
25
|
+
# allowing us to have different settings than the template for some timestamp.
|
26
|
+
# - custom_timestamp_ranges: defines indices for a custom timestamp range (rather than relying
|
27
|
+
# on the configured rollover frequency).
|
28
|
+
# - use_updates_for_indexing: when `true`, opts the index into using the `update` API instead of the `index` API for indexing.
|
29
|
+
# (Defaults to `true`).
|
30
|
+
class IndexDefinition < ::Data.define(
|
31
|
+
:ignore_routing_values,
|
32
|
+
:query_cluster,
|
33
|
+
:index_into_clusters,
|
34
|
+
:setting_overrides,
|
35
|
+
:setting_overrides_by_timestamp,
|
36
|
+
:custom_timestamp_ranges,
|
37
|
+
:use_updates_for_indexing
|
38
|
+
)
|
39
|
+
def initialize(ignore_routing_values:, **rest)
|
40
|
+
__skip__ = super(ignore_routing_values: ignore_routing_values.to_set, **rest)
|
41
|
+
|
42
|
+
# Verify the custom ranges are disjoint.
|
43
|
+
# Yeah, this is O(N^2), which isn't great, but we expect a _very_ small number of custom
|
44
|
+
# ranges (0-2) so this should be ok.
|
45
|
+
return if custom_timestamp_ranges
|
46
|
+
.map(&:time_set)
|
47
|
+
.combination(2)
|
48
|
+
.none? do |s1_s2|
|
49
|
+
s1, s2 = s1_s2
|
50
|
+
s1.intersect?(s2)
|
51
|
+
end
|
52
|
+
|
53
|
+
raise ConfigError, "Your configured `custom_timestamp_ranges` are not disjoint, as required."
|
54
|
+
end
|
55
|
+
|
56
|
+
def without_env_overrides
|
57
|
+
with(setting_overrides: {}, setting_overrides_by_timestamp: {}, custom_timestamp_ranges: [])
|
58
|
+
end
|
59
|
+
|
60
|
+
def custom_timestamp_range_for(timestamp)
|
61
|
+
custom_timestamp_ranges.find do |range|
|
62
|
+
range.time_set.member?(timestamp)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.definitions_by_name_hash_from(index_def_hash_by_name)
|
67
|
+
index_def_hash_by_name.transform_values do |index_def_hash|
|
68
|
+
__skip__ = from(**index_def_hash.transform_keys(&:to_sym))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.from(custom_timestamp_ranges:, use_updates_for_indexing: true, **rest)
|
73
|
+
__skip__ = new(
|
74
|
+
custom_timestamp_ranges: CustomTimestampRange.ranges_from(custom_timestamp_ranges),
|
75
|
+
use_updates_for_indexing: use_updates_for_indexing,
|
76
|
+
**rest
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Represents an index definition that is based on a custom timestamp range.
|
81
|
+
class CustomTimestampRange < ::Data.define(:index_name_suffix, :setting_overrides, :time_set)
|
82
|
+
def initialize(index_name_suffix:, setting_overrides:, time_set:)
|
83
|
+
super
|
84
|
+
|
85
|
+
if time_set.empty?
|
86
|
+
raise ConfigError, "Custom timestamp range with suffix `#{index_name_suffix}` is invalid: no timestamps exist in it."
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.ranges_from(range_hashes)
|
91
|
+
range_hashes.map do |range_hash|
|
92
|
+
__skip__ = from(**range_hash.transform_keys(&:to_sym))
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
private_class_method def self.from(index_name_suffix:, setting_overrides:, **predicates_hash)
|
97
|
+
if predicates_hash.empty?
|
98
|
+
raise ConfigSettingNotSetError, "Custom timestamp range with suffix `#{index_name_suffix}` lacks boundary definitions."
|
99
|
+
end
|
100
|
+
|
101
|
+
range_options = predicates_hash.transform_values { |iso8601_string| ::Time.iso8601(iso8601_string) }
|
102
|
+
time_set = Support::TimeSet.of_range(**range_options)
|
103
|
+
|
104
|
+
new(index_name_suffix: index_name_suffix, setting_overrides: setting_overrides, time_set: time_set)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class DatastoreCore
|
11
|
+
module IndexConfigNormalizer
|
12
|
+
# These are settings that the datastore exposes when you fetch an index, but that you can
|
13
|
+
# never set. We need to ignore them when figuring out what settings to update.
|
14
|
+
#
|
15
|
+
# Note: `index.routing.allocation.include._tier_preference` is not a read-only setting, but
|
16
|
+
# we want to treat it as one, because (1) Elasticsearch 7.10+ sets it and (2) we do not want
|
17
|
+
# to ever write it at this time.
|
18
|
+
#
|
19
|
+
# Note: `index.history.uuid` is a weird setting that sometimes shows up in managed AWS OpenSearch
|
20
|
+
# clusters, but only on _some_ indices. It's not documented and we don't want to mess with it here,
|
21
|
+
# so we want to treat it as a read only setting.
|
22
|
+
READ_ONLY_SETTINGS = %w[
|
23
|
+
index.creation_date
|
24
|
+
index.history.uuid
|
25
|
+
index.provided_name
|
26
|
+
index.replication.type
|
27
|
+
index.routing.allocation.include._tier_preference
|
28
|
+
index.uuid
|
29
|
+
index.version.created
|
30
|
+
index.version.upgraded
|
31
|
+
]
|
32
|
+
|
33
|
+
# Normalizes the provided index configuration so that it is in a stable form that we can compare to what
|
34
|
+
# the datastore returns when we query it for the configuration of an index. This includes:
|
35
|
+
#
|
36
|
+
# - Dropping read-only settings that we never interact with but that the datastore automatically sets on an index.
|
37
|
+
# Omitting them makes it easier for us to compare our desired configuration to what is in the datastore.
|
38
|
+
# - Converting setting values to a normalized string form. The datastore oddly returns setting values as strings
|
39
|
+
# (e.g. `"false"` or `"7"` instead of `false` or `7`), so this matches that behavior.
|
40
|
+
# - Drops `type: object` from a mapping when there are `properties` because the datastore omits it in that
|
41
|
+
# situation, treating it as the default type.
|
42
|
+
def self.normalize(index_config)
|
43
|
+
if (settings = index_config["settings"])
|
44
|
+
index_config = index_config.merge("settings" => normalize_settings(settings))
|
45
|
+
end
|
46
|
+
|
47
|
+
if (mappings = index_config["mappings"])
|
48
|
+
index_config = index_config.merge("mappings" => normalize_mappings(mappings))
|
49
|
+
end
|
50
|
+
|
51
|
+
index_config
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.normalize_mappings(mappings)
|
55
|
+
return mappings unless (properties = mappings["properties"])
|
56
|
+
|
57
|
+
mappings = mappings.except("type") if mappings["type"] == "object"
|
58
|
+
mappings.merge("properties" => properties.transform_values { |prop| normalize_mappings(prop) })
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.normalize_settings(settings)
|
62
|
+
settings
|
63
|
+
.except(*READ_ONLY_SETTINGS)
|
64
|
+
.to_h { |name, value| [name, normalize_setting_value(value)] }
|
65
|
+
end
|
66
|
+
|
67
|
+
private_class_method def self.normalize_setting_value(value)
|
68
|
+
case value
|
69
|
+
when nil
|
70
|
+
nil
|
71
|
+
when ::Array
|
72
|
+
value.map { |v| normalize_setting_value(v) }
|
73
|
+
else
|
74
|
+
value.to_s
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/index_config_normalizer"
|
10
|
+
require "elastic_graph/error"
|
11
|
+
require "elastic_graph/support/hash_util"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class DatastoreCore
|
15
|
+
module IndexDefinition
|
16
|
+
# This module contains common implementation logic for both the rollover and non-rollover
|
17
|
+
# implementations of the common IndexDefinition type.
|
18
|
+
module Base
|
19
|
+
# Returns any setting overrides for this index from the environment-specific config file,
|
20
|
+
# after flattening it so that it can be directly used in a create index request.
|
21
|
+
def flattened_env_setting_overrides
|
22
|
+
@flattened_env_setting_overrides ||= Support::HashUtil.flatten_and_stringify_keys(
|
23
|
+
env_index_config.setting_overrides,
|
24
|
+
prefix: "index"
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Gets the routing value for the given `prepared_record`. Notably, `prepared_record` must be previously
|
29
|
+
# prepared with an `Indexer::RecordPreparer` in order to ensure that it uses internal index
|
30
|
+
# field names (to align with `route_with_path`/`route_with` which also use the internal name) rather
|
31
|
+
# than the public field name (which can differ).
|
32
|
+
def routing_value_for_prepared_record(prepared_record, route_with_path: route_with, id_path: "id")
|
33
|
+
return nil unless has_custom_routing?
|
34
|
+
|
35
|
+
unless route_with_path
|
36
|
+
raise ConfigError, "`#{self}` uses custom routing, but `route_with_path` is misconfigured (was `nil`)"
|
37
|
+
end
|
38
|
+
|
39
|
+
config_routing_value = Support::HashUtil.fetch_value_at_path(prepared_record, route_with_path).to_s
|
40
|
+
return config_routing_value unless ignored_values_for_routing.include?(config_routing_value)
|
41
|
+
|
42
|
+
Support::HashUtil.fetch_value_at_path(prepared_record, id_path).to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
def has_custom_routing?
|
46
|
+
route_with != "id"
|
47
|
+
end
|
48
|
+
|
49
|
+
# Indicates if a search on this index definition may hit incomplete documents. An incomplete document
|
50
|
+
# can occur when multiple event types flow into the same index. An index that has only one source type
|
51
|
+
# can never have incomplete documents, but an index that has 2 or more sources can have incomplete
|
52
|
+
# documents when the "primary" event type hasn't yet been received for a document.
|
53
|
+
#
|
54
|
+
# This case is notable because we need to apply automatic filtering in order to hide documents that are
|
55
|
+
# not yet complete.
|
56
|
+
#
|
57
|
+
# Note: determining this value sometimes requires that we query the datastore for the record of all
|
58
|
+
# sources that an index has ever had. This value changes very, very rarely, and we don't want to slow
|
59
|
+
# down every GraphQL query by adding the extra query against the datastore, so we cache the value here.
|
60
|
+
def searches_could_hit_incomplete_docs?
|
61
|
+
return @searches_could_hit_incomplete_docs if defined?(@searches_could_hit_incomplete_docs)
|
62
|
+
|
63
|
+
if current_sources.size > 1
|
64
|
+
# We know that incomplete docs are possible, without needing to check sources recorded in `_meta`.
|
65
|
+
@searches_could_hit_incomplete_docs = true
|
66
|
+
else
|
67
|
+
# While our current configuration can't produce incomplete documents, some may already exist in the index
|
68
|
+
# if we previously had some `sourced_from` fields (but no longer have them). Here we check for the sources
|
69
|
+
# we've recorded in `_meta` to account for that.
|
70
|
+
client = datastore_clients_by_name.fetch(cluster_to_query)
|
71
|
+
recorded_sources = mappings_in_datastore(client).dig("_meta", "ElasticGraph", "sources") || []
|
72
|
+
sources = recorded_sources.union(current_sources.to_a)
|
73
|
+
|
74
|
+
@searches_could_hit_incomplete_docs = sources.size > 1
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def cluster_to_query
|
79
|
+
env_index_config.query_cluster
|
80
|
+
end
|
81
|
+
|
82
|
+
def clusters_to_index_into
|
83
|
+
env_index_config.index_into_clusters.tap do |clusters_to_index_into|
|
84
|
+
raise ConfigError, "No `index_into_clusters` defined for #{self} in env_index_config" unless clusters_to_index_into
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def use_updates_for_indexing?
|
89
|
+
env_index_config.use_updates_for_indexing
|
90
|
+
end
|
91
|
+
|
92
|
+
def ignored_values_for_routing
|
93
|
+
env_index_config.ignore_routing_values
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns a list of all defined datastore clusters this index resides within.
|
97
|
+
def all_accessible_cluster_names
|
98
|
+
@all_accessible_cluster_names ||=
|
99
|
+
# Using `_` because steep doesn't understand that `compact` removes nils.
|
100
|
+
(clusters_to_index_into + [_ = cluster_to_query]).compact.uniq.select do |name|
|
101
|
+
defined_clusters.include?(name)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def accessible_cluster_names_to_index_into
|
106
|
+
@accessible_cluster_names_to_index_into ||= clusters_to_index_into.select do |name|
|
107
|
+
defined_clusters.include?(name)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Indicates whether not the index is be accessible from GraphQL queries, by virtue of
|
112
|
+
# the `cluster_to_query` being a defined cluster or not. This will be used to
|
113
|
+
# hide GraphQL schema elements that can't be queried when our config omits the means
|
114
|
+
# to query an index (e.g. due to lacking a configured URL).
|
115
|
+
def accessible_from_queries?
|
116
|
+
return false unless (cluster = cluster_to_query)
|
117
|
+
defined_clusters.include?(cluster)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Returns a list of indices related to this template in the datastore cluster this
|
121
|
+
# index definition is configured to query. Note that for performance reasons, this method
|
122
|
+
# memoizes the result of querying the datastore for its current list of indices, and as
|
123
|
+
# a result the return value may be out of date. If it is absolutely essential that you get
|
124
|
+
# an up-to-date list of related indices, use `related_rollover_indices(datastore_client`) instead of
|
125
|
+
# this method.
|
126
|
+
#
|
127
|
+
# Note, however, that indices generally change *very* rarely (say, monthly or yearly) and as such
|
128
|
+
# this will very rarely be out of date, even with the memoization.
|
129
|
+
def known_related_query_rollover_indices
|
130
|
+
@known_related_query_rollover_indices ||= cluster_to_query&.then do |name|
|
131
|
+
# For query purposes, we only want indices that exist. If we return a query that is defined in our configuration
|
132
|
+
# but does not exist, and that gets used in a search index expression (even for the purposes of excluding it!),
|
133
|
+
# the datastore will return an error.
|
134
|
+
related_rollover_indices(datastore_clients_by_name.fetch(name), only_if_exists: true)
|
135
|
+
end || []
|
136
|
+
end
|
137
|
+
|
138
|
+
# Returns a set of all of the field paths to subfields of the special `LIST_COUNTS_FIELD`
|
139
|
+
# that contains the element counts of all list fields. The returned set is filtered based
|
140
|
+
# on the provided `source` to only contain the paths of fields that are populated by the
|
141
|
+
# given source.
|
142
|
+
def list_counts_field_paths_for_source(source)
|
143
|
+
@list_counts_field_paths_for_source ||= {} # : ::Hash[::String, ::Set[::String]]
|
144
|
+
@list_counts_field_paths_for_source[source] ||= identify_list_counts_field_paths_for_source(source)
|
145
|
+
end
|
146
|
+
|
147
|
+
def to_s
|
148
|
+
"#<#{self.class.name} #{name}>"
|
149
|
+
end
|
150
|
+
alias_method :inspect, :to_s
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def identify_list_counts_field_paths_for_source(source)
|
155
|
+
fields_by_path.filter_map do |path, field|
|
156
|
+
path if field.source == source && path.split(".").include?(LIST_COUNTS_FIELD)
|
157
|
+
end.to_set
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/index_config_normalizer"
|
10
|
+
require "elastic_graph/datastore_core/index_definition/base"
|
11
|
+
require "elastic_graph/support/memoizable_data"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class DatastoreCore
|
15
|
+
module IndexDefinition
|
16
|
+
class Index < Support::MemoizableData.define(
|
17
|
+
:name, :route_with, :default_sort_clauses, :current_sources, :fields_by_path,
|
18
|
+
:env_index_config, :defined_clusters, :datastore_clients_by_name
|
19
|
+
)
|
20
|
+
# `Data.define` provides all these methods:
|
21
|
+
# @dynamic name, route_with, default_sort_clauses, current_sources, fields_by_path, env_index_config, defined_clusters, datastore_clients_by_name, initialize
|
22
|
+
|
23
|
+
# `include IndexDefinition::Base` provides all these methods. Steep should be able to detect it
|
24
|
+
# but can't for some reason so we have to declare them with `@dynamic`.
|
25
|
+
# @dynamic flattened_env_setting_overrides, routing_value_for_prepared_record, has_custom_routing?, cluster_to_query, use_updates_for_indexing?
|
26
|
+
# @dynamic clusters_to_index_into, all_accessible_cluster_names, ignored_values_for_routing, searches_could_hit_incomplete_docs?
|
27
|
+
# @dynamic accessible_cluster_names_to_index_into, accessible_from_queries?, known_related_query_rollover_indices, list_counts_field_paths_for_source
|
28
|
+
include IndexDefinition::Base
|
29
|
+
|
30
|
+
def mappings_in_datastore(datastore_client)
|
31
|
+
IndexConfigNormalizer.normalize_mappings(datastore_client.get_index(name)["mappings"] || {})
|
32
|
+
end
|
33
|
+
|
34
|
+
# `ignore_unavailable: true` is needed to prevent errors when we delete non-existing non-rollover indices
|
35
|
+
def delete_from_datastore(datastore_client)
|
36
|
+
datastore_client.delete_indices(name)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Indicates if this is a rollover index definition.
|
40
|
+
#
|
41
|
+
# Use of this is considered a mild code smell. When feasible, it's generally better to
|
42
|
+
# implement a new polymorphic API on the IndexDefinition interface, rather
|
43
|
+
# then branching on the value of this predicate.
|
44
|
+
def rollover_index_template?
|
45
|
+
false
|
46
|
+
end
|
47
|
+
|
48
|
+
def index_expression_for_search
|
49
|
+
name
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns an index name to use for write operations.
|
53
|
+
def index_name_for_writes(record, timestamp_field_path: nil)
|
54
|
+
name
|
55
|
+
end
|
56
|
+
|
57
|
+
# A concrete index has no related indices (really only rollover indices do).
|
58
|
+
def related_rollover_indices(datastore_client, only_if_exists: false)
|
59
|
+
[]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "delegate"
|
10
|
+
require "elastic_graph/datastore_core/index_definition/index"
|
11
|
+
|
12
|
+
module ElasticGraph
|
13
|
+
class DatastoreCore
|
14
|
+
module IndexDefinition
|
15
|
+
# Represents a concrete index for specific time range, derived from a RolloverIndexTemplate.
|
16
|
+
class RolloverIndex < DelegateClass(Index)
|
17
|
+
# @dynamic time_set
|
18
|
+
attr_reader :time_set
|
19
|
+
|
20
|
+
def initialize(index, time_set)
|
21
|
+
super(index)
|
22
|
+
@time_set = time_set
|
23
|
+
end
|
24
|
+
|
25
|
+
# We need to override `==` so that two `RolloverIndex` objects that wrap the same `Index` object are
|
26
|
+
# considered equal. Oddly enough, the `DelegateClass` implementation of `==` returns `true` if `other`
|
27
|
+
# is the wrapped object, but not if it's another instance of the same `DelegateClass` wrapping the same
|
28
|
+
# instance.
|
29
|
+
#
|
30
|
+
# https://github.com/ruby/ruby/blob/v3_0_3/lib/delegate.rb#L156-L159
|
31
|
+
#
|
32
|
+
# We need this because we want two `RolloverIndex` instances that wrap the same
|
33
|
+
# underlying `Index` instance to be considered equal (something a test relies upon,
|
34
|
+
# but also generally useful and expected).
|
35
|
+
def ==(other)
|
36
|
+
if RolloverIndex === other
|
37
|
+
__getobj__ == other.__getobj__ && time_set == other.time_set
|
38
|
+
else
|
39
|
+
# :nocov: -- this method isn't explicitly covered by tests (not worth writing a test just to cover this line).
|
40
|
+
super
|
41
|
+
# :nocov:
|
42
|
+
end
|
43
|
+
end
|
44
|
+
alias_method :eql?, :==
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|