clear-skies-aws 2.0.1__py3-none-any.whl → 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clear_skies_aws-2.0.1.dist-info → clear_skies_aws-2.0.3.dist-info}/METADATA +2 -2
- clear_skies_aws-2.0.3.dist-info/RECORD +63 -0
- {clear_skies_aws-2.0.1.dist-info → clear_skies_aws-2.0.3.dist-info}/WHEEL +1 -1
- clearskies_aws/__init__.py +27 -0
- clearskies_aws/actions/__init__.py +15 -0
- clearskies_aws/actions/action_aws.py +135 -0
- clearskies_aws/actions/assume_role.py +115 -0
- clearskies_aws/actions/ses.py +203 -0
- clearskies_aws/actions/sns.py +61 -0
- clearskies_aws/actions/sqs.py +81 -0
- clearskies_aws/actions/step_function.py +73 -0
- clearskies_aws/backends/__init__.py +19 -0
- clearskies_aws/backends/backend.py +106 -0
- clearskies_aws/backends/dynamo_db_backend.py +609 -0
- clearskies_aws/backends/dynamo_db_condition_parser.py +325 -0
- clearskies_aws/backends/dynamo_db_parti_ql_backend.py +965 -0
- clearskies_aws/backends/sqs_backend.py +61 -0
- clearskies_aws/configs/__init__.py +0 -0
- clearskies_aws/contexts/__init__.py +23 -0
- clearskies_aws/contexts/cli_web_socket_mock.py +20 -0
- clearskies_aws/contexts/lambda_alb.py +81 -0
- clearskies_aws/contexts/lambda_api_gateway.py +81 -0
- clearskies_aws/contexts/lambda_api_gateway_web_socket.py +79 -0
- clearskies_aws/contexts/lambda_invoke.py +138 -0
- clearskies_aws/contexts/lambda_sns.py +124 -0
- clearskies_aws/contexts/lambda_sqs_standard.py +139 -0
- clearskies_aws/di/__init__.py +6 -0
- clearskies_aws/di/aws_additional_config_auto_import.py +37 -0
- clearskies_aws/di/inject/__init__.py +6 -0
- clearskies_aws/di/inject/boto3.py +15 -0
- clearskies_aws/di/inject/boto3_session.py +13 -0
- clearskies_aws/di/inject/parameter_store.py +15 -0
- clearskies_aws/endpoints/__init__.py +1 -0
- clearskies_aws/endpoints/secrets_manager_rotation.py +194 -0
- clearskies_aws/endpoints/simple_body_routing.py +41 -0
- clearskies_aws/input_outputs/__init__.py +21 -0
- clearskies_aws/input_outputs/cli_web_socket_mock.py +20 -0
- clearskies_aws/input_outputs/lambda_alb.py +53 -0
- clearskies_aws/input_outputs/lambda_api_gateway.py +123 -0
- clearskies_aws/input_outputs/lambda_api_gateway_web_socket.py +73 -0
- clearskies_aws/input_outputs/lambda_input_output.py +89 -0
- clearskies_aws/input_outputs/lambda_invoke.py +88 -0
- clearskies_aws/input_outputs/lambda_sns.py +88 -0
- clearskies_aws/input_outputs/lambda_sqs_standard.py +86 -0
- clearskies_aws/mocks/__init__.py +1 -0
- clearskies_aws/mocks/actions/__init__.py +6 -0
- clearskies_aws/mocks/actions/ses.py +34 -0
- clearskies_aws/mocks/actions/sns.py +29 -0
- clearskies_aws/mocks/actions/sqs.py +29 -0
- clearskies_aws/mocks/actions/step_function.py +32 -0
- clearskies_aws/models/__init__.py +1 -0
- clearskies_aws/models/web_socket_connection_model.py +182 -0
- clearskies_aws/secrets/__init__.py +13 -0
- clearskies_aws/secrets/additional_configs/__init__.py +62 -0
- clearskies_aws/secrets/additional_configs/iam_db_auth.py +39 -0
- clearskies_aws/secrets/additional_configs/iam_db_auth_with_ssm.py +96 -0
- clearskies_aws/secrets/additional_configs/mysql_connection_dynamic_producer_via_ssh_cert_bastion.py +80 -0
- clearskies_aws/secrets/additional_configs/mysql_connection_dynamic_producer_via_ssm_bastion.py +162 -0
- clearskies_aws/secrets/akeyless_with_ssm_cache.py +60 -0
- clearskies_aws/secrets/parameter_store.py +52 -0
- clearskies_aws/secrets/secrets.py +16 -0
- clearskies_aws/secrets/secrets_manager.py +96 -0
- clear_skies_aws-2.0.1.dist-info/RECORD +0 -4
- {clear_skies_aws-2.0.1.dist-info → clear_skies_aws-2.0.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,609 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import json
|
|
5
|
+
from decimal import Decimal
|
|
6
|
+
from typing import Any, Callable
|
|
7
|
+
|
|
8
|
+
import clearskies
|
|
9
|
+
from boto3.dynamodb import conditions as dynamodb_conditions
|
|
10
|
+
from clearskies import model
|
|
11
|
+
from clearskies.autodoc.schema import String as AutoDocString
|
|
12
|
+
from clearskies.columns.boolean import Boolean
|
|
13
|
+
from clearskies.columns.float import Float
|
|
14
|
+
from clearskies.columns.integer import Integer
|
|
15
|
+
from types_boto3_dynamodb import DynamoDBServiceResource
|
|
16
|
+
|
|
17
|
+
from clearskies_aws.backends import backend
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DynamoDBBackend(backend.Backend):
|
|
21
|
+
"""
|
|
22
|
+
DynamoDB is complicated.
|
|
23
|
+
|
|
24
|
+
The issue is that we can't arbitrarily search/sort on columns (aka attributes). In order to perform meaningful
|
|
25
|
+
filtering on an attribute, then there must be an index which has that attriubte set as its HASH/Partition.
|
|
26
|
+
Sorting or searching outside of indexes doesn't work the same way as with a typical SQL database (which will
|
|
27
|
+
scan all records and search/sort accordingly). With DynamoDB AWS fetches a maximum number of records out of the
|
|
28
|
+
table, and then performs sorting/filtering on those. The searching will always happen on a subset of the data,
|
|
29
|
+
unless there are a sufficiently small number of records or the there is a supporting index. For sorting, DynamoDB
|
|
30
|
+
will not attempt to sort at all unless there is a supporting search attribute set in the index.
|
|
31
|
+
|
|
32
|
+
"true" searching is only possible on indexes (either the primary index or a global secondary index). For such
|
|
33
|
+
cases, DynamoDB can perform basic searching operations against the HASH/Partition attribute in such an index.
|
|
34
|
+
However, this still doesn't let us perform arbitrary sorting. Instead, each index can have an optional RANGE/Sort key.
|
|
35
|
+
If this exists, then we can sort in either ascending (the default) or descending order only if we have first
|
|
36
|
+
filtered on the HASH/partition attribute. This is the extent of sorting. It is not possible to sort arbitrary attributes
|
|
37
|
+
or specify multiple sort conditions. To repeat a bit more succinctly: DynamoDB can only filter against an attribute
|
|
38
|
+
that has an index set for it, and then can only sort filtered results if the index has the sort attribute set in the
|
|
39
|
+
RANGE/Sort attribute of the index.
|
|
40
|
+
|
|
41
|
+
This makes for very limited sorting capabilities. To help with this a little, DynamoDB offers local secondary indexes.
|
|
42
|
+
These indexes allow you to specify an additional sort attribute for a column that is already indexed (either via the
|
|
43
|
+
primary index or a global secondary index). In practice, changing the sort column means selecting a different index
|
|
44
|
+
when filtering results.
|
|
45
|
+
|
|
46
|
+
Let's bring it all together with an example. Imagine we have a table that represents books, and has the following
|
|
47
|
+
attributes:
|
|
48
|
+
|
|
49
|
+
1. Author
|
|
50
|
+
2. Title
|
|
51
|
+
3. Year Published
|
|
52
|
+
4. Genre
|
|
53
|
+
|
|
54
|
+
The primary index for our table has:
|
|
55
|
+
|
|
56
|
+
HASH/Partition: Author
|
|
57
|
+
RANGE/Sort: Title
|
|
58
|
+
|
|
59
|
+
We have a global secondary index:
|
|
60
|
+
|
|
61
|
+
HASH/Partition: Genre
|
|
62
|
+
RANGE/Sort: Title
|
|
63
|
+
|
|
64
|
+
And a local secondary index:
|
|
65
|
+
|
|
66
|
+
HASH/Partition: Author
|
|
67
|
+
Range/Sort: Year Published
|
|
68
|
+
|
|
69
|
+
This combination of indexes would allow us to filter/sort in the following ways:
|
|
70
|
+
|
|
71
|
+
1. Filter by Author, sort by Title
|
|
72
|
+
2. Filter by Author, sort by Year Published
|
|
73
|
+
3. Filter by Genre, sort by Title
|
|
74
|
+
|
|
75
|
+
Any other filter/sort options will become unreliable as soon as the table grows past the maximum result size.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
dynamodb: DynamoDBServiceResource
|
|
79
|
+
|
|
80
|
+
_allowed_configs = [
|
|
81
|
+
"table_name",
|
|
82
|
+
"wheres",
|
|
83
|
+
"sorts",
|
|
84
|
+
"limit",
|
|
85
|
+
"pagination",
|
|
86
|
+
"model_columns",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
_required_configs = [
|
|
90
|
+
"table_name",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
_table_indexes = None
|
|
94
|
+
|
|
95
|
+
_model_columns_cache = None
|
|
96
|
+
|
|
97
|
+
# this is the list of operators that we can use when querying a dynamodb index and their corresponding
|
|
98
|
+
# key method name in dynamodb
|
|
99
|
+
_index_operators = {
|
|
100
|
+
"=": "eq",
|
|
101
|
+
"<": "lt",
|
|
102
|
+
">": "gt",
|
|
103
|
+
">=": "gte",
|
|
104
|
+
"<=": "lte",
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
# this is a map from clearskies operators to the equivalent dynamodb attribute operators
|
|
108
|
+
_attribute_operators = {
|
|
109
|
+
"!=": "ne",
|
|
110
|
+
"<=": "lte",
|
|
111
|
+
">=": "gte",
|
|
112
|
+
">": "gt",
|
|
113
|
+
"<": "lt",
|
|
114
|
+
"=": "eq",
|
|
115
|
+
"IS NOT NULL": "exists",
|
|
116
|
+
"IS NULL": "not_exists",
|
|
117
|
+
"IS NOT": "ne",
|
|
118
|
+
"IS": "eq",
|
|
119
|
+
"LIKE": "", # requires special handling
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
def __init__(self):
|
|
123
|
+
if not self.environment.get("AWS_REGION", True):
|
|
124
|
+
raise ValueError("To use DynamoDB you must use set AWS_REGION in the .env file or an environment variable")
|
|
125
|
+
|
|
126
|
+
self.dynamodb = self.boto3.resource("dynamodb", region_name=self.environment.get("AWS_REGION", True))
|
|
127
|
+
self._table_indexes = {}
|
|
128
|
+
self._model_columns_cache = {}
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def clear_table_cache(cls):
|
|
132
|
+
cls._table_indexes = {}
|
|
133
|
+
cls._model_columns_cache = {}
|
|
134
|
+
|
|
135
|
+
def update(self, id, data, model):
|
|
136
|
+
# when we run an update column we must include the sort column on the primary
|
|
137
|
+
# index (if it exists)
|
|
138
|
+
sort_column_name = self._find_primary_sort_column(model)
|
|
139
|
+
key = {model.id_column_name: model.get_columns()[model.id_column_name]}
|
|
140
|
+
if sort_column_name:
|
|
141
|
+
key[sort_column_name] = data.get(
|
|
142
|
+
sort_column_name, model.get_columns()[sort_column_name].to_backend(model._data)
|
|
143
|
+
)
|
|
144
|
+
table = self.dynamodb.Table(model.destination_name())
|
|
145
|
+
|
|
146
|
+
data = self.excessive_type_casting(data)
|
|
147
|
+
|
|
148
|
+
updated = table.update_item(
|
|
149
|
+
Key=key,
|
|
150
|
+
UpdateExpression="SET " + ", ".join([f"#{column_name} = :{column_name}" for column_name in data.keys()]),
|
|
151
|
+
ExpressionAttributeValues={
|
|
152
|
+
**{f":{column_name}": value for (column_name, value) in data.items()},
|
|
153
|
+
},
|
|
154
|
+
ExpressionAttributeNames={
|
|
155
|
+
**{f"#{column_name}": column_name for column_name in data.keys()},
|
|
156
|
+
},
|
|
157
|
+
ReturnValues="ALL_NEW",
|
|
158
|
+
)
|
|
159
|
+
return self._map_from_boto3(updated["Attributes"])
|
|
160
|
+
|
|
161
|
+
def create(self, data, model):
|
|
162
|
+
table = self.dynamodb.Table(model.destination_name())
|
|
163
|
+
table.put_item(Item=data)
|
|
164
|
+
return {**data}
|
|
165
|
+
|
|
166
|
+
def excessive_type_casting(self, data):
|
|
167
|
+
for key, value in data.items():
|
|
168
|
+
if isinstance(value, float):
|
|
169
|
+
data[key] = Decimal(value)
|
|
170
|
+
return data
|
|
171
|
+
|
|
172
|
+
def delete(self, id, model):
|
|
173
|
+
table = self.dynamodb.Table(model.table_name())
|
|
174
|
+
table.delete_item(Key={model.id_column_name: model.__getattr__(model.id_column_name)})
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
def count(self, configuration, model):
|
|
178
|
+
response = self.dynamodb_query(configuration, model, "COUNT")
|
|
179
|
+
return response["Count"]
|
|
180
|
+
|
|
181
|
+
def records(
|
|
182
|
+
self, configuration: dict[str, Any], model: model.Model, next_page_data: dict[str, str] = None
|
|
183
|
+
) -> list[dict[str, Any]]:
|
|
184
|
+
response = self.dynamodb_query(configuration, model, "ALL_ATTRIBUTES")
|
|
185
|
+
if "LastEvaluatedKey" in response and response["LastEvaluatedKey"] is not None and type(next_page_data) == dict:
|
|
186
|
+
next_page_data["next_token"] = self.serialize_next_token_for_response(
|
|
187
|
+
self._map_from_boto3(response["LastEvaluatedKey"])
|
|
188
|
+
)
|
|
189
|
+
return [self._map_from_boto3(item) for item in response["Items"]]
|
|
190
|
+
|
|
191
|
+
def _dynamodb_query(self, configuration, model, select_type):
|
|
192
|
+
[filter_expression, key_condition_expression, index_name, scan_index_forward] = (
|
|
193
|
+
self._create_dynamodb_query_parameters(configuration, model)
|
|
194
|
+
)
|
|
195
|
+
table = self.dynamodb.Table(model.table_name())
|
|
196
|
+
|
|
197
|
+
# so we want to put together the kwargs for scan/query:
|
|
198
|
+
kwargs = {
|
|
199
|
+
"IndexName": index_name,
|
|
200
|
+
"KeyConditionExpression": key_condition_expression,
|
|
201
|
+
"FilterExpression": filter_expression,
|
|
202
|
+
"Select": select_type,
|
|
203
|
+
"ExclusiveStartKey": self.restore_next_token_from_config(configuration["pagination"].get("next_token")),
|
|
204
|
+
"Limit": configuration["limit"] if configuration["limit"] and select_type != "COUNT" else None,
|
|
205
|
+
}
|
|
206
|
+
# the trouble is that boto3 isn't okay with parameters of None.
|
|
207
|
+
# therefore, we need to remove any of the above keys that are None
|
|
208
|
+
kwargs = {key: value for (key, value) in kwargs.items() if value is not None}
|
|
209
|
+
|
|
210
|
+
if key_condition_expression:
|
|
211
|
+
# add the scan index forward setting for key conditions
|
|
212
|
+
kwargs["ScanIndexForward"] = scan_index_forward
|
|
213
|
+
return table.query(**kwargs)
|
|
214
|
+
return table.scan(**kwargs)
|
|
215
|
+
|
|
216
|
+
def _create_dynamodb_query_parameters(self, configuration, model):
|
|
217
|
+
# DynamoDB only supports sorting by a single column, and only if we can find a supporting index
|
|
218
|
+
# figure out if and what we are sorting by.
|
|
219
|
+
sort_column = None
|
|
220
|
+
sort_direction = "asc"
|
|
221
|
+
if "sorts" in configuration and configuration["sorts"]:
|
|
222
|
+
sort_column = configuration["sorts"][0]["column"]
|
|
223
|
+
sort_direction = configuration["sorts"][0]["direction"]
|
|
224
|
+
|
|
225
|
+
# if we have neither sort nor a where then we have a simple query and can finish up now.
|
|
226
|
+
if not sort_column and not configuration["wheres"]:
|
|
227
|
+
return [None, None, None, True]
|
|
228
|
+
|
|
229
|
+
# so the thing here is that if we find a condition that corresponds to an indexed
|
|
230
|
+
# column, then we may be able to use an index, which allows us to use the `query`
|
|
231
|
+
# method of dynamodb. Otherwise though we have to perform a scan operation, which
|
|
232
|
+
# only filters over a subset of records. We also have to convert our query conditions
|
|
233
|
+
# into dynamodb conditions. Finally, note that not all operators are supported by
|
|
234
|
+
# the query operation in dynamodb, so searching on an indexed column doesn't guarantee
|
|
235
|
+
# that we can use a query.
|
|
236
|
+
[key_condition_expression, index_name, remaining_conditions] = self._find_key_condition_expressions(
|
|
237
|
+
configuration["wheres"],
|
|
238
|
+
model.id_column_name,
|
|
239
|
+
sort_column,
|
|
240
|
+
model,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return [
|
|
244
|
+
self._as_attr_filter_expressions(remaining_conditions, model),
|
|
245
|
+
key_condition_expression,
|
|
246
|
+
index_name, # we don't need to specify the name of the primary index
|
|
247
|
+
sort_direction.lower() == "asc",
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
def _find_key_condition_expressions(self, conditions, id_column_name, sort_column, model):
|
|
251
|
+
indexes = self._get_indexes_for_model(model)
|
|
252
|
+
# we're going to do a lot to this array, so let's make sure and work on a copy to avoid
|
|
253
|
+
# the potential for subtle errors in the future.
|
|
254
|
+
conditions = [*conditions]
|
|
255
|
+
|
|
256
|
+
# let's make this easy and sort out conditions that are on an indexed column. While we're at it, apply
|
|
257
|
+
# some weights to decide which index to use. This is slightly tricky because there can be more than one
|
|
258
|
+
# index to use and we can't necessarily know for sure which to use. In general though, we can only search
|
|
259
|
+
# on an index if we have an equals search in the hash attribute. After that, we can either search on the
|
|
260
|
+
# range parameter for the index or perform simple searches (=, <, <=, >, >=) on the range parameter of the
|
|
261
|
+
# index. Therefore we can have ambiguity if there is an 'equals' search on multiple columns that are the
|
|
262
|
+
# hash attribute in different indexes. We also get some ambiguity if, after filtering on the hash index,
|
|
263
|
+
# we have a local index that matches the sort parameter and another index that matches a secondary search
|
|
264
|
+
# in the query. These are largely edge cases, so for now we'll pick a heuristic and make another approach
|
|
265
|
+
# down the road (likely by giving the programmer a way to specify which index to use).
|
|
266
|
+
|
|
267
|
+
# So what do we do? From a practical perspective, we want to figure out which conditions correspond
|
|
268
|
+
# to a searchable index, and then which ones may be usable as a secondary index. Then we want to
|
|
269
|
+
# choose which index to use with which conditions, and shove the rest into the remaining conditions
|
|
270
|
+
# which we return. Therefore, we need to collect some information about each condition
|
|
271
|
+
id_conditions = []
|
|
272
|
+
indexable_conditions = []
|
|
273
|
+
secondary_conditions = []
|
|
274
|
+
for index, condition in enumerate(conditions):
|
|
275
|
+
column_name = condition["column"]
|
|
276
|
+
# if the column isn't a hash index and isn't an equals search, then this condition "anchor" an index search.
|
|
277
|
+
if column_name not in indexes or condition["operator"] != "=":
|
|
278
|
+
# however, it may still contribute to a secondary condition in an index search, so record it
|
|
279
|
+
# if it uses a supporting operator
|
|
280
|
+
if condition["operator"] in self._index_operators:
|
|
281
|
+
secondary_conditions.append(index)
|
|
282
|
+
|
|
283
|
+
# if we get here then we have an '=' condition on a hash attribute in an index - we can use an index!
|
|
284
|
+
else:
|
|
285
|
+
# even better if it is for the id column!
|
|
286
|
+
if column_name == model.id_column_name:
|
|
287
|
+
id_conditions.append(index)
|
|
288
|
+
else:
|
|
289
|
+
indexable_conditions.append(index)
|
|
290
|
+
|
|
291
|
+
# Okay then! We can start working through our use cases. First of all, if we have an id=[value]
|
|
292
|
+
# search condition, and the id column is indexed, then just use that.
|
|
293
|
+
if id_conditions:
|
|
294
|
+
return self._finalize_key_condition_expression(
|
|
295
|
+
conditions,
|
|
296
|
+
id_conditions[0],
|
|
297
|
+
secondary_conditions,
|
|
298
|
+
sort_column,
|
|
299
|
+
indexes,
|
|
300
|
+
model,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# if we don't have an id condition but do have conditions that are performing an `=` search
|
|
304
|
+
# on HASH attributes, then we can also use an index! Unfortunately, if we have more than one
|
|
305
|
+
# of these, then we have no way to know which to use without some hints from the developer.
|
|
306
|
+
# for now, just use the first, but we'll add in index-hinting down the line if we need it.
|
|
307
|
+
if indexable_conditions:
|
|
308
|
+
return self._finalize_key_condition_expression(
|
|
309
|
+
conditions,
|
|
310
|
+
indexable_conditions[0],
|
|
311
|
+
secondary_conditions,
|
|
312
|
+
sort_column,
|
|
313
|
+
indexes,
|
|
314
|
+
model,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# If we get here then we can't use an index :(
|
|
318
|
+
return [None, None, conditions]
|
|
319
|
+
|
|
320
|
+
def _finalize_key_condition_expression(
|
|
321
|
+
self,
|
|
322
|
+
conditions,
|
|
323
|
+
primary_condition_index,
|
|
324
|
+
secondary_condition_indexes,
|
|
325
|
+
sort_column,
|
|
326
|
+
indexes,
|
|
327
|
+
model,
|
|
328
|
+
):
|
|
329
|
+
"""
|
|
330
|
+
Our job is to figure out exactly which index to use, and build the key expression.
|
|
331
|
+
|
|
332
|
+
We basically tell it everything we know, including what the "primary" condition is,
|
|
333
|
+
i.e. the condition that we expect to match against the HASH attribute of an index. This
|
|
334
|
+
is *always* a `[column]=[value]` condition, because that is all DynamoDB supports, and
|
|
335
|
+
the calling method must guarantee that there is an index on the table that has the given
|
|
336
|
+
column as a HASH attribute.
|
|
337
|
+
|
|
338
|
+
So why do we need to do anything else if the caller already knows which column it wants to
|
|
339
|
+
sort on, and that there is an index with that column as a HASH attribute? Because of the
|
|
340
|
+
RANGE attribute, i.e. the second column in the index! You can specify this second column
|
|
341
|
+
to support sorting after searching on the HASH column, or to perform additional filtering
|
|
342
|
+
after filtering on the hash column. Local secondary indexes make it possible to create
|
|
343
|
+
multiple indexes with the same HASH attribute but different RANGE attributes, which means
|
|
344
|
+
that even if we know what the "primary" column is that we want to search on, there is still
|
|
345
|
+
a possibility that we want to select different indexes depending on what our sort column
|
|
346
|
+
is or what additional conditions we have in our query.
|
|
347
|
+
|
|
348
|
+
The goal of this function is to sort that all out, decide which index we want to use
|
|
349
|
+
for our query, build the appropriate key expression, and return a new list of conditions
|
|
350
|
+
which has the conditions used in the key expression removed. Those left over conditions
|
|
351
|
+
are then destined for the FilterExpression.
|
|
352
|
+
"""
|
|
353
|
+
# the condition for the primary condition
|
|
354
|
+
index_condition = conditions[primary_condition_index]
|
|
355
|
+
index_data = indexes[index_condition["column"]]
|
|
356
|
+
|
|
357
|
+
# our secondary columns are just suggestions, so see if we can actually use any
|
|
358
|
+
index_condition_counts = {}
|
|
359
|
+
for condition_index in secondary_condition_indexes:
|
|
360
|
+
secondary_condition = conditions[condition_index]
|
|
361
|
+
secondary_column = secondary_condition["column"]
|
|
362
|
+
if secondary_column not in index_data["sortable_columns"]:
|
|
363
|
+
continue
|
|
364
|
+
secondary_index = index_data["sortable_columns"][secondary_column]
|
|
365
|
+
if secondary_index not in index_condition_counts:
|
|
366
|
+
index_condition_counts[secondary_index] = {"count": 0, "condition_indexes": []}
|
|
367
|
+
index_condition_counts[secondary_index]["count"] += 1
|
|
368
|
+
index_condition_counts[secondary_index]["condition_indexes"].append(condition_index)
|
|
369
|
+
|
|
370
|
+
# now we can decide which index to use. Prefer an index that hits some secondary conditions,
|
|
371
|
+
# or an index that hits the sort column, or the default index.
|
|
372
|
+
used_condition_indexes = [primary_condition_index]
|
|
373
|
+
if index_condition_counts:
|
|
374
|
+
index_to_use = max(index_condition_counts, key=lambda key: index_condition_counts[key]["count"])
|
|
375
|
+
used_condition_indexes.extend(index_condition_counts[index_to_use]["condition_indexes"])
|
|
376
|
+
elif sort_column in index_data["sortable_columns"]:
|
|
377
|
+
index_to_use = index_data["sortable_columns"][sort_column]
|
|
378
|
+
else:
|
|
379
|
+
index_to_use = index_data["default_index_name"]
|
|
380
|
+
|
|
381
|
+
# now build our key expression. For every condition in used_condition_indexes, add it to
|
|
382
|
+
# a key expression, and remove it from the conditions array. Do this backwards to make sure
|
|
383
|
+
# that we don't change the meaning of the indexes
|
|
384
|
+
used_condition_indexes.sort()
|
|
385
|
+
used_condition_indexes.reverse()
|
|
386
|
+
key_condition_expression = None
|
|
387
|
+
for condition_index in used_condition_indexes:
|
|
388
|
+
condition = conditions[condition_index]
|
|
389
|
+
dynamodb_operator_method = self._index_operators[condition["operator"]]
|
|
390
|
+
raw_search_value = condition["values"][0] if condition["values"] else None
|
|
391
|
+
value = self._value_for_condition_expression(raw_search_value, condition["column"], model)
|
|
392
|
+
condition_expression = getattr(dynamodb_conditions.Key(condition["column"]), dynamodb_operator_method)(
|
|
393
|
+
value
|
|
394
|
+
)
|
|
395
|
+
# add to our key condition expression
|
|
396
|
+
if key_condition_expression is None:
|
|
397
|
+
key_condition_expression = condition_expression
|
|
398
|
+
else:
|
|
399
|
+
key_condition_expression &= condition_expression
|
|
400
|
+
|
|
401
|
+
# and remove this condition from our list of conditions
|
|
402
|
+
del conditions[condition_index]
|
|
403
|
+
|
|
404
|
+
return [
|
|
405
|
+
key_condition_expression,
|
|
406
|
+
index_to_use,
|
|
407
|
+
conditions,
|
|
408
|
+
]
|
|
409
|
+
|
|
410
|
+
def _as_attr_filter_expressions(self, conditions, model):
|
|
411
|
+
filter_expression = None
|
|
412
|
+
for condition in conditions:
|
|
413
|
+
operator = condition["operator"]
|
|
414
|
+
value = condition["values"][0] if condition["values"] else None
|
|
415
|
+
column_name = condition["column"]
|
|
416
|
+
if operator not in self._attribute_operators:
|
|
417
|
+
raise ValueError(
|
|
418
|
+
f"I was asked to filter by operator '{operator}' but this operator is not supported by DynamoDB"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# a couple of our operators require special handling
|
|
422
|
+
if operator == "LIKE":
|
|
423
|
+
if value[0] != "%" and value[-1] == "%":
|
|
424
|
+
condition_expression = dynamodb_conditions.Attr(column_name).begins_with(value.rstrip("%"))
|
|
425
|
+
elif value[0] == "%" and value[-1] != "%":
|
|
426
|
+
raise ValueError("DynamoDB doesn't support the 'ends_with' operator")
|
|
427
|
+
elif value[0] == "%" and value[-1] == "%":
|
|
428
|
+
condition_expression = dynamodb_conditions.Attr(column_name).contains(value.strip("%"))
|
|
429
|
+
else:
|
|
430
|
+
condition_expression = dynamodb_conditions.Attr(column_name).eq(value)
|
|
431
|
+
elif operator == "IS NULL":
|
|
432
|
+
condition_expression = dynamodb_conditions.Attr(column_name).exists()
|
|
433
|
+
elif operator == "IS NOT NULL":
|
|
434
|
+
condition_expression = dynamodb_conditions.Attr(column_name).not_exists()
|
|
435
|
+
else:
|
|
436
|
+
dynamodb_operator = self._attribute_operators[operator]
|
|
437
|
+
value = self._value_for_condition_expression(value, column_name, model)
|
|
438
|
+
condition_expression = getattr(dynamodb_conditions.Attr(column_name), dynamodb_operator)(value)
|
|
439
|
+
|
|
440
|
+
if filter_expression is None:
|
|
441
|
+
filter_expression = condition_expression
|
|
442
|
+
else:
|
|
443
|
+
filter_expression &= condition_expression
|
|
444
|
+
|
|
445
|
+
return filter_expression
|
|
446
|
+
|
|
447
|
+
def _value_for_condition_expression(self, value, column_name, model):
|
|
448
|
+
# basically, if the column is an integer/float type, then we need to convert to Decimal
|
|
449
|
+
# or dynamodb can't search properly.
|
|
450
|
+
if id(model) not in self._model_columns_cache:
|
|
451
|
+
self._model_columns_cache[id(model)] = model.columns()
|
|
452
|
+
|
|
453
|
+
model_columns = self._model_columns_cache[id(model)]
|
|
454
|
+
if column_name not in model_columns:
|
|
455
|
+
return value
|
|
456
|
+
|
|
457
|
+
if isinstance(model_columns[column_name], Float) or isinstance(model_columns[column_name], Integer):
|
|
458
|
+
return Decimal(value)
|
|
459
|
+
|
|
460
|
+
return value
|
|
461
|
+
|
|
462
|
+
def _get_indexes_for_model(self, model):
|
|
463
|
+
"""Load indexes for the DynamoDB table for the given model."""
|
|
464
|
+
if model.table_name() in self._table_indexes:
|
|
465
|
+
return self._table_indexes[model.table_name()]
|
|
466
|
+
|
|
467
|
+
# Store the indexes by column name. The HASH attribute for each key is basically
|
|
468
|
+
# an indexed column, and the RANGE attribute is a column we can sort by.
|
|
469
|
+
# Note that a column can have multiple indexes which allows to sort on different
|
|
470
|
+
# columns. Therefore we'll combine all of this into a dictionary that looks something
|
|
471
|
+
# like this:
|
|
472
|
+
# { "column_name": {
|
|
473
|
+
# "default_index_name": "index_name",
|
|
474
|
+
# "sortable_columns": {
|
|
475
|
+
# "column_for_sort": "another_index_name",
|
|
476
|
+
# "another_column_for_sort": "a_third_index_name"
|
|
477
|
+
# }
|
|
478
|
+
# } }
|
|
479
|
+
# etc. Therefore, each column with a HASH/Partition index gets an entry in the main dict,
|
|
480
|
+
# and then is further subdivided for columns that have RANGE/Sort attributes, giving you
|
|
481
|
+
# the index name for that HASH+RANGE combination.
|
|
482
|
+
table_indexes = {}
|
|
483
|
+
table = self.dynamodb.Table(model.table_name())
|
|
484
|
+
schemas = []
|
|
485
|
+
# the primary index for the table doesn't have a name, and it will be used by default
|
|
486
|
+
# if we don't specify an index name. Therefore, we just pass around None for it's name
|
|
487
|
+
schemas.append({"IndexName": None, "KeySchema": table.key_schema})
|
|
488
|
+
global_secondary_indexes = table.global_secondary_indexes
|
|
489
|
+
local_secondary_indexes = table.local_secondary_indexes
|
|
490
|
+
if global_secondary_indexes is not None:
|
|
491
|
+
schemas.extend(table.global_secondary_indexes)
|
|
492
|
+
if local_secondary_indexes is not None:
|
|
493
|
+
schemas.extend(table.local_secondary_indexes)
|
|
494
|
+
for schema in schemas:
|
|
495
|
+
hash_column = ""
|
|
496
|
+
range_column = ""
|
|
497
|
+
for key in schema["KeySchema"]:
|
|
498
|
+
if key["KeyType"] == "RANGE":
|
|
499
|
+
range_column = key["AttributeName"]
|
|
500
|
+
if key["KeyType"] == "HASH":
|
|
501
|
+
hash_column = key["AttributeName"]
|
|
502
|
+
if hash_column not in table_indexes:
|
|
503
|
+
table_indexes[hash_column] = {"default_index_name": schema["IndexName"], "sortable_columns": {}}
|
|
504
|
+
if range_column:
|
|
505
|
+
table_indexes[hash_column]["sortable_columns"][range_column] = schema["IndexName"]
|
|
506
|
+
|
|
507
|
+
self._table_indexes[model.table_name()] = table_indexes
|
|
508
|
+
return table_indexes
|
|
509
|
+
|
|
510
|
+
def _find_primary_sort_column(self, model):
|
|
511
|
+
indexes = self._get_indexes_for_model(model)
|
|
512
|
+
primary_indexes = indexes.get(model.id_column_name)
|
|
513
|
+
if not primary_indexes:
|
|
514
|
+
return None
|
|
515
|
+
for column_name, index_name in primary_indexes["sortable_columns"].items():
|
|
516
|
+
# the primary index doesn't have a name, so we want the record with a name of None
|
|
517
|
+
if index_name is None:
|
|
518
|
+
return column_name
|
|
519
|
+
return None
|
|
520
|
+
|
|
521
|
+
def _map_from_boto3(self, record):
|
|
522
|
+
return {key: self._map_from_boto3_value(value) for (key, value) in record.items()}
|
|
523
|
+
|
|
524
|
+
def _map_from_boto3_value(self, value):
|
|
525
|
+
if isinstance(value, Decimal):
|
|
526
|
+
return float(value)
|
|
527
|
+
return value
|
|
528
|
+
|
|
529
|
+
def _check_query_configuration(self, configuration, model):
|
|
530
|
+
for key in configuration.keys():
|
|
531
|
+
if key not in self._allowed_configs:
|
|
532
|
+
raise KeyError(f"DynamoDBBackend does not support config '{key}'. You may be using the wrong backend")
|
|
533
|
+
|
|
534
|
+
for key in self._required_configs:
|
|
535
|
+
if key not in configuration:
|
|
536
|
+
raise KeyError(f"Missing required configuration key {key}")
|
|
537
|
+
|
|
538
|
+
for key in self._allowed_configs:
|
|
539
|
+
if key not in configuration:
|
|
540
|
+
configuration[key] = [] if key[-1] == "s" else ""
|
|
541
|
+
|
|
542
|
+
return configuration
|
|
543
|
+
|
|
544
|
+
def validate_pagination_kwargs(self, kwargs: dict[str, Any], case_mapping: Callable) -> str:
|
|
545
|
+
extra_keys = set(kwargs.keys()) - set(self.allowed_pagination_keys())
|
|
546
|
+
if len(extra_keys):
|
|
547
|
+
key_name = case_mapping("next_token")
|
|
548
|
+
return "Invalid pagination key(s): '" + "','".join(extra_keys) + f"'. Only '{key_name}' is allowed"
|
|
549
|
+
if "next_token" not in kwargs:
|
|
550
|
+
key_name = case_mapping("next_token")
|
|
551
|
+
return f"You must specify '{key_name}' when setting pagination"
|
|
552
|
+
# the next token should be a urlsafe-base64 encoded JSON string
|
|
553
|
+
try:
|
|
554
|
+
json.loads(base64.urlsafe_b64decode(kwargs["next_token"]))
|
|
555
|
+
except:
|
|
556
|
+
key_name = case_mapping("next_token")
|
|
557
|
+
return "The provided '{key_name}' appears to be invalid."
|
|
558
|
+
return ""
|
|
559
|
+
|
|
560
|
+
def allowed_pagination_keys(self) -> list[str]:
|
|
561
|
+
return ["next_token"]
|
|
562
|
+
|
|
563
|
+
def restore_next_token_from_config(self, next_token):
|
|
564
|
+
if not next_token:
|
|
565
|
+
return None
|
|
566
|
+
try:
|
|
567
|
+
return json.loads(base64.urlsafe_b64decode(next_token))
|
|
568
|
+
except:
|
|
569
|
+
return None
|
|
570
|
+
|
|
571
|
+
def serialize_next_token_for_response(self, last_evaluated_key):
|
|
572
|
+
return base64.urlsafe_b64encode(json.dumps(last_evaluated_key).encode("utf-8")).decode("utf8")
|
|
573
|
+
|
|
574
|
+
def documentation_pagination_next_page_response(self, case_mapping: Callable) -> list[Any]:
|
|
575
|
+
return [AutoDocString(case_mapping("next_token"))]
|
|
576
|
+
|
|
577
|
+
def documentation_pagination_next_page_example(self, case_mapping: Callable) -> dict[str, Any]:
|
|
578
|
+
return {case_mapping("next_token"): ""}
|
|
579
|
+
|
|
580
|
+
def documentation_pagination_parameters(self, case_mapping: Callable) -> list[tuple[Any]]:
|
|
581
|
+
return [(AutoDocString(case_mapping("next_token"), example=""), "A token to fetch the next page of results")]
|
|
582
|
+
|
|
583
|
+
def column_from_backend(self, column, value):
|
|
584
|
+
"""We have a couple columns we want to override transformations for."""
|
|
585
|
+
# We're pretty much ignoring the BOOL type for dynamodb, because it doesn't work in indexes
|
|
586
|
+
# (and 99% of the time when I have a boolean, it gets used in an index). Therefore,
|
|
587
|
+
# convert boolean values to "0", "1".
|
|
588
|
+
if isinstance(column, Boolean):
|
|
589
|
+
if value == "1":
|
|
590
|
+
return True
|
|
591
|
+
elif value == "0":
|
|
592
|
+
return False
|
|
593
|
+
else:
|
|
594
|
+
return bool(value)
|
|
595
|
+
return super().column_from_backend(column, value)
|
|
596
|
+
|
|
597
|
+
def column_to_backend(self, column, backend_data):
|
|
598
|
+
"""We have a couple columns we want to override transformations for."""
|
|
599
|
+
# most importantly, there's no need to transform a JSON column in either direction
|
|
600
|
+
if isinstance(column, Boolean):
|
|
601
|
+
if column.name not in backend_data:
|
|
602
|
+
return backend_data
|
|
603
|
+
as_string = "1" if bool(backend_data[column.name]) else "0"
|
|
604
|
+
return {**backend_data, column.name: as_string}
|
|
605
|
+
if isinstance(column, Float):
|
|
606
|
+
if column.name not in backend_data:
|
|
607
|
+
return backend_data
|
|
608
|
+
return {**backend_data, column.name: Decimal(backend_data[column.name])}
|
|
609
|
+
return column.to_backend(backend_data)
|