mongo-charms-single-kernel 1.8.6__py3-none-any.whl → 1.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mongo-charms-single-kernel might be problematic. Click here for more details.
- {mongo_charms_single_kernel-1.8.6.dist-info → mongo_charms_single_kernel-1.8.8.dist-info}/METADATA +2 -1
- {mongo_charms_single_kernel-1.8.6.dist-info → mongo_charms_single_kernel-1.8.8.dist-info}/RECORD +41 -40
- single_kernel_mongo/abstract_charm.py +8 -0
- single_kernel_mongo/config/literals.py +2 -23
- single_kernel_mongo/config/models.py +12 -0
- single_kernel_mongo/config/relations.py +0 -1
- single_kernel_mongo/config/statuses.py +10 -57
- single_kernel_mongo/core/abstract_upgrades_v3.py +149 -0
- single_kernel_mongo/core/k8s_workload.py +2 -2
- single_kernel_mongo/core/kubernetes_upgrades_v3.py +17 -0
- single_kernel_mongo/core/machine_upgrades_v3.py +54 -0
- single_kernel_mongo/core/operator.py +86 -5
- single_kernel_mongo/core/version_checker.py +7 -6
- single_kernel_mongo/core/vm_workload.py +30 -13
- single_kernel_mongo/core/workload.py +17 -19
- single_kernel_mongo/events/backups.py +3 -3
- single_kernel_mongo/events/cluster.py +1 -1
- single_kernel_mongo/events/database.py +1 -1
- single_kernel_mongo/events/lifecycle.py +5 -4
- single_kernel_mongo/events/tls.py +7 -4
- single_kernel_mongo/exceptions.py +4 -24
- single_kernel_mongo/lib/charms/operator_libs_linux/v1/systemd.py +288 -0
- single_kernel_mongo/managers/cluster.py +8 -8
- single_kernel_mongo/managers/config.py +5 -3
- single_kernel_mongo/managers/ldap.py +2 -1
- single_kernel_mongo/managers/mongo.py +48 -9
- single_kernel_mongo/managers/mongodb_operator.py +199 -96
- single_kernel_mongo/managers/mongos_operator.py +97 -35
- single_kernel_mongo/managers/sharding.py +4 -4
- single_kernel_mongo/managers/tls.py +54 -27
- single_kernel_mongo/managers/upgrade_v3.py +452 -0
- single_kernel_mongo/managers/upgrade_v3_status.py +133 -0
- single_kernel_mongo/state/app_peer_state.py +12 -2
- single_kernel_mongo/state/charm_state.py +31 -141
- single_kernel_mongo/state/config_server_state.py +0 -33
- single_kernel_mongo/state/unit_peer_state.py +10 -0
- single_kernel_mongo/templates/enable-transparent-huge-pages.service.j2 +14 -0
- single_kernel_mongo/utils/helpers.py +0 -6
- single_kernel_mongo/utils/mongo_config.py +32 -8
- single_kernel_mongo/core/abstract_upgrades.py +0 -890
- single_kernel_mongo/core/kubernetes_upgrades.py +0 -194
- single_kernel_mongo/core/machine_upgrades.py +0 -188
- single_kernel_mongo/events/upgrades.py +0 -157
- single_kernel_mongo/managers/upgrade.py +0 -334
- single_kernel_mongo/state/upgrade_state.py +0 -134
- {mongo_charms_single_kernel-1.8.6.dist-info → mongo_charms_single_kernel-1.8.8.dist-info}/WHEEL +0 -0
- {mongo_charms_single_kernel-1.8.6.dist-info → mongo_charms_single_kernel-1.8.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,890 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Copyright 2024 Canonical Ltd.
|
|
3
|
-
# See LICENSE file for licensing details.
|
|
4
|
-
|
|
5
|
-
"""The substrate agnostic Upgrades manager.
|
|
6
|
-
|
|
7
|
-
In this class, we manage upgrades and their lifecycle.
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
from __future__ import annotations
|
|
11
|
-
|
|
12
|
-
import copy
|
|
13
|
-
import logging
|
|
14
|
-
import secrets
|
|
15
|
-
import string
|
|
16
|
-
from abc import ABC, abstractmethod
|
|
17
|
-
from enum import Enum
|
|
18
|
-
from typing import TYPE_CHECKING, Generic, TypeVar
|
|
19
|
-
|
|
20
|
-
import poetry.core.constraints.version as poetry_version
|
|
21
|
-
from data_platform_helpers.advanced_statuses.models import StatusObject, StatusObjectList
|
|
22
|
-
from data_platform_helpers.advanced_statuses.protocol import ManagerStatusProtocol
|
|
23
|
-
from data_platform_helpers.advanced_statuses.types import Scope
|
|
24
|
-
from ops import Object
|
|
25
|
-
from pymongo.errors import OperationFailure, PyMongoError, ServerSelectionTimeoutError
|
|
26
|
-
from tenacity import RetryError, Retrying, retry, stop_after_attempt, wait_fixed
|
|
27
|
-
|
|
28
|
-
from single_kernel_mongo.config.literals import (
|
|
29
|
-
FEATURE_VERSION,
|
|
30
|
-
SNAP,
|
|
31
|
-
CharmKind,
|
|
32
|
-
Substrates,
|
|
33
|
-
UnitState,
|
|
34
|
-
)
|
|
35
|
-
from single_kernel_mongo.config.relations import RelationNames
|
|
36
|
-
from single_kernel_mongo.config.statuses import UpgradeStatuses
|
|
37
|
-
from single_kernel_mongo.core.operator import MainWorkloadType, OperatorProtocol
|
|
38
|
-
from single_kernel_mongo.core.structured_config import MongoDBRoles
|
|
39
|
-
from single_kernel_mongo.exceptions import (
|
|
40
|
-
BalancerStillRunningError,
|
|
41
|
-
ClusterNotHealthyError,
|
|
42
|
-
FailedToElectNewPrimaryError,
|
|
43
|
-
FailedToMovePrimaryError,
|
|
44
|
-
PeerRelationNotReadyError,
|
|
45
|
-
PrecheckFailedError,
|
|
46
|
-
)
|
|
47
|
-
from single_kernel_mongo.state.charm_state import CharmState
|
|
48
|
-
from single_kernel_mongo.utils.helpers import mongodb_only
|
|
49
|
-
from single_kernel_mongo.utils.mongo_config import MongoConfiguration
|
|
50
|
-
from single_kernel_mongo.utils.mongo_connection import MongoConnection
|
|
51
|
-
from single_kernel_mongo.utils.mongodb_users import OperatorUser
|
|
52
|
-
|
|
53
|
-
if TYPE_CHECKING:
|
|
54
|
-
from single_kernel_mongo.core.kubernetes_upgrades import KubernetesUpgrade
|
|
55
|
-
from single_kernel_mongo.core.machine_upgrades import MachineUpgrade
|
|
56
|
-
from single_kernel_mongo.managers.mongodb_operator import MongoDBOperator
|
|
57
|
-
from single_kernel_mongo.managers.mongos_operator import MongosOperator
|
|
58
|
-
|
|
59
|
-
T = TypeVar("T", covariant=True, bound=OperatorProtocol)
|
|
60
|
-
|
|
61
|
-
logger = logging.getLogger(__name__)
|
|
62
|
-
|
|
63
|
-
WRITE_KEY = "write_value"
|
|
64
|
-
SHARD_NAME_INDEX = "_id"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class UpgradeActions(str, Enum):
|
|
68
|
-
"""All upgrade actions."""
|
|
69
|
-
|
|
70
|
-
RESUME_ACTION_NAME = "resume-refresh"
|
|
71
|
-
PRECHECK_ACTION_NAME = "pre-refresh-check"
|
|
72
|
-
FORCE_REFRESH_START = "force-refresh-start"
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
# BEGIN: Useful classes
|
|
76
|
-
class AbstractUpgrade(ABC):
|
|
77
|
-
"""In-place upgrades abstract class (typing).
|
|
78
|
-
|
|
79
|
-
Based off specification: DA058 - In-Place Upgrades - Kubernetes v2
|
|
80
|
-
(https://docs.google.com/document/d/1tLjknwHudjcHs42nzPVBNkHs98XxAOT2BXGGpP7NyEU/)
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
def __init__(
|
|
84
|
-
self,
|
|
85
|
-
dependent: OperatorProtocol,
|
|
86
|
-
workload: MainWorkloadType,
|
|
87
|
-
state: CharmState,
|
|
88
|
-
substrate: Substrates,
|
|
89
|
-
) -> None:
|
|
90
|
-
self.charm = dependent.charm
|
|
91
|
-
self.dependent = dependent
|
|
92
|
-
self.workload = workload
|
|
93
|
-
self.state = state
|
|
94
|
-
self.substrate = substrate
|
|
95
|
-
self.relation_name = RelationNames.UPGRADE_VERSION.value
|
|
96
|
-
|
|
97
|
-
if not self.state.upgrade_relation:
|
|
98
|
-
raise PeerRelationNotReadyError
|
|
99
|
-
|
|
100
|
-
self.app_name = self.charm.app.name
|
|
101
|
-
self.unit_name = self.charm.unit.name
|
|
102
|
-
self._current_versions = {
|
|
103
|
-
"charm": self.workload.get_charm_revision(),
|
|
104
|
-
"workload": self.workload.get_version(),
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
@property
|
|
108
|
-
def unit_state(self) -> UnitState | None:
|
|
109
|
-
"""Unit upgrade state."""
|
|
110
|
-
return self.state.unit_upgrade_peer_data.unit_state
|
|
111
|
-
|
|
112
|
-
@unit_state.setter
|
|
113
|
-
def unit_state(self, value: UnitState) -> None:
|
|
114
|
-
self.state.unit_upgrade_peer_data.unit_state = value
|
|
115
|
-
|
|
116
|
-
@property
|
|
117
|
-
def is_compatible(self) -> bool:
|
|
118
|
-
"""Whether upgrade is supported from previous versions."""
|
|
119
|
-
if not (previous_version_strs := self.state.app_upgrade_peer_data.versions):
|
|
120
|
-
logger.debug("`versions` missing from peer relation")
|
|
121
|
-
return False
|
|
122
|
-
|
|
123
|
-
# TODO charm versioning: remove `.split("+")` (which removes git hash before comparing)
|
|
124
|
-
previous_version_strs["charm"] = previous_version_strs["charm"].split("+")[0]
|
|
125
|
-
previous_versions: dict[str, poetry_version.Version] = {
|
|
126
|
-
key: poetry_version.Version.parse(value) for key, value in previous_version_strs.items()
|
|
127
|
-
}
|
|
128
|
-
current_version_strs = copy.copy(self._current_versions)
|
|
129
|
-
current_version_strs["charm"] = current_version_strs["charm"].split("+")[0]
|
|
130
|
-
current_versions = {
|
|
131
|
-
key: poetry_version.Version.parse(value) for key, value in current_version_strs.items()
|
|
132
|
-
}
|
|
133
|
-
try:
|
|
134
|
-
# TODO Future PR: change this > sign to support downgrades
|
|
135
|
-
if (
|
|
136
|
-
previous_versions["charm"] > current_versions["charm"]
|
|
137
|
-
or previous_versions["charm"].major != current_versions["charm"].major
|
|
138
|
-
):
|
|
139
|
-
logger.debug(
|
|
140
|
-
f'{previous_versions["charm"]=} incompatible with {current_versions["charm"]=}'
|
|
141
|
-
)
|
|
142
|
-
return False
|
|
143
|
-
if (
|
|
144
|
-
previous_versions["workload"] > current_versions["workload"]
|
|
145
|
-
or previous_versions["workload"].major != current_versions["workload"].major
|
|
146
|
-
):
|
|
147
|
-
logger.debug(
|
|
148
|
-
f'{previous_versions["workload"]=} incompatible with {current_versions["workload"]=}'
|
|
149
|
-
)
|
|
150
|
-
return False
|
|
151
|
-
logger.debug(
|
|
152
|
-
f"Versions before refresh compatible with versions after refresh {previous_version_strs=} {self._current_versions=}"
|
|
153
|
-
)
|
|
154
|
-
return True
|
|
155
|
-
except KeyError as exception:
|
|
156
|
-
logger.debug(f"Version missing from {previous_versions=}", exc_info=exception)
|
|
157
|
-
return False
|
|
158
|
-
|
|
159
|
-
@abstractmethod
|
|
160
|
-
def _get_unit_healthy_status(self) -> StatusObject:
|
|
161
|
-
"""Status shown during upgrade if unit is healthy."""
|
|
162
|
-
raise NotImplementedError()
|
|
163
|
-
|
|
164
|
-
def get_upgrade_unit_status(self) -> StatusObject | None:
|
|
165
|
-
"""Unit upgrade status."""
|
|
166
|
-
if self.state.upgrade_in_progress:
|
|
167
|
-
if not self.is_compatible:
|
|
168
|
-
return UpgradeStatuses.INCOMPATIBLE_UPGRADE.value
|
|
169
|
-
return self._get_unit_healthy_status()
|
|
170
|
-
return None
|
|
171
|
-
|
|
172
|
-
@property
|
|
173
|
-
def app_status(self) -> StatusObject | None:
|
|
174
|
-
"""App upgrade status."""
|
|
175
|
-
if not self.state.upgrade_in_progress:
|
|
176
|
-
return None
|
|
177
|
-
if self.dependent.name == CharmKind.MONGOD and not self.upgrade_resumed:
|
|
178
|
-
# User confirmation needed to resume upgrade (i.e. upgrade second unit)
|
|
179
|
-
# Statuses over 120 characters are truncated in `juju status` as of juju 3.1.6 and
|
|
180
|
-
# 2.9.45
|
|
181
|
-
resume_string = ""
|
|
182
|
-
if len(self.state.units_upgrade_peer_data) > 1:
|
|
183
|
-
resume_string = f"Verify highest unit is healthy & run `{UpgradeActions.RESUME_ACTION_NAME.value}` action. "
|
|
184
|
-
return UpgradeStatuses.refreshing_needs_resume(resume_string)
|
|
185
|
-
return UpgradeStatuses.REFRESH_IN_PROGRESS.value
|
|
186
|
-
|
|
187
|
-
def set_versions_in_app_databag(self) -> None:
|
|
188
|
-
"""Save current versions in app databag.
|
|
189
|
-
|
|
190
|
-
Used after next upgrade to check compatibility (i.e. whether that upgrade should be
|
|
191
|
-
allowed).
|
|
192
|
-
"""
|
|
193
|
-
assert not self.state.upgrade_in_progress
|
|
194
|
-
logger.debug(f"Setting {self._current_versions=} in upgrade peer relation app databag")
|
|
195
|
-
self.state.app_upgrade_peer_data.versions = self._current_versions
|
|
196
|
-
logger.debug(f"Set {self._current_versions=} in upgrade peer relation app databag")
|
|
197
|
-
|
|
198
|
-
@property
|
|
199
|
-
@abstractmethod
|
|
200
|
-
def upgrade_resumed(self) -> bool:
|
|
201
|
-
"""Whether user has resumed upgrade with Juju action."""
|
|
202
|
-
raise NotImplementedError()
|
|
203
|
-
|
|
204
|
-
@abstractmethod
|
|
205
|
-
def reconcile_partition(self, *, from_event: bool = False, force: bool = False) -> str | None:
|
|
206
|
-
"""If ready, allow next unit to upgrade."""
|
|
207
|
-
raise NotImplementedError()
|
|
208
|
-
|
|
209
|
-
def pre_upgrade_check(self) -> None:
|
|
210
|
-
"""Check if this app is ready to upgrade.
|
|
211
|
-
|
|
212
|
-
Runs before any units are upgraded
|
|
213
|
-
|
|
214
|
-
Does *not* run during rollback
|
|
215
|
-
|
|
216
|
-
On machines, this runs before any units are upgraded (after `juju refresh`)
|
|
217
|
-
On machines & Kubernetes, this also runs during pre-upgrade-check action
|
|
218
|
-
|
|
219
|
-
Can run on leader or non-leader unit
|
|
220
|
-
|
|
221
|
-
Raises:
|
|
222
|
-
PrecheckFailed: App is not ready to upgrade
|
|
223
|
-
|
|
224
|
-
TODO Kubernetes: Run (some) checks after `juju refresh` (in case user forgets to run
|
|
225
|
-
pre-upgrade-check action). Note: 1 unit will upgrade before we can run checks (checks may
|
|
226
|
-
need to be modified).
|
|
227
|
-
See https://chat.canonical.com/canonical/pl/cmf6uhm1rp8b7k8gkjkdsj4mya
|
|
228
|
-
"""
|
|
229
|
-
logger.debug("Running pre-refresh checks")
|
|
230
|
-
|
|
231
|
-
if self.dependent.name == CharmKind.MONGOS:
|
|
232
|
-
if not self.state.db_initialised:
|
|
233
|
-
return
|
|
234
|
-
if not self.dependent.upgrade_manager.is_mongos_able_to_read_write():
|
|
235
|
-
raise PrecheckFailedError("mongos is not able to read/write")
|
|
236
|
-
return
|
|
237
|
-
|
|
238
|
-
# TODO: if shard is getting upgraded but BOTH have same revision, then fail
|
|
239
|
-
# https://warthogs.atlassian.net/browse/DPE-6397
|
|
240
|
-
try:
|
|
241
|
-
self.dependent.upgrade_manager.wait_for_cluster_healthy()
|
|
242
|
-
except RetryError:
|
|
243
|
-
logger.error("Cluster is not healthy")
|
|
244
|
-
raise PrecheckFailedError("Cluster is not healthy")
|
|
245
|
-
|
|
246
|
-
# On VM charms we can choose the order to upgrade, but not on K8s. In order to keep the
|
|
247
|
-
# two charms in sync we decided to have the VM charm have the same upgrade order as the K8s
|
|
248
|
-
# charm (i.e. highest to lowest.) Hence, we move the primary to the last unit to upgrade.
|
|
249
|
-
# This prevents the primary from jumping around from unit to unit during the upgrade
|
|
250
|
-
# procedure.
|
|
251
|
-
try:
|
|
252
|
-
self.dependent.upgrade_manager.move_primary_to_last_upgrade_unit()
|
|
253
|
-
except FailedToMovePrimaryError:
|
|
254
|
-
logger.error("Cluster failed to move primary before re-election.")
|
|
255
|
-
raise PrecheckFailedError("Primary switchover failed")
|
|
256
|
-
|
|
257
|
-
if not self.dependent.upgrade_manager.is_cluster_able_to_read_write():
|
|
258
|
-
logger.error("Cluster cannot read/write to replicas")
|
|
259
|
-
raise PrecheckFailedError("Cluster is not healthy")
|
|
260
|
-
|
|
261
|
-
if self.state.is_role(MongoDBRoles.CONFIG_SERVER):
|
|
262
|
-
if not self.dependent.upgrade_manager.are_pre_upgrade_operations_config_server_successful():
|
|
263
|
-
raise PrecheckFailedError("Pre-refresh operations on config-server failed.")
|
|
264
|
-
|
|
265
|
-
self.add_status_data_for_legacy_upgrades()
|
|
266
|
-
|
|
267
|
-
def add_status_data_for_legacy_upgrades(self):
|
|
268
|
-
"""Add dummy data for legacy upgrades.
|
|
269
|
-
|
|
270
|
-
Upgrades supported on revision 212 and lower require status information from shards.
|
|
271
|
-
however in upgrades on later reisions this information was determined not necessary and
|
|
272
|
-
obsolete. It is true that this information is *not* needed for earlier revisions to
|
|
273
|
-
facilitate earlier revisions we populate this data with ActiveStatus.
|
|
274
|
-
"""
|
|
275
|
-
if not self.state.is_role(MongoDBRoles.SHARD):
|
|
276
|
-
return
|
|
277
|
-
|
|
278
|
-
if not self.state.shard_relation:
|
|
279
|
-
return
|
|
280
|
-
|
|
281
|
-
self.state.unit_shard_state.status_ready_for_upgrade = True
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
# END: Useful classes
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
class GenericMongoDBUpgradeManager(ManagerStatusProtocol, Generic[T], Object, ABC):
|
|
288
|
-
"""Substrate agnostif, abstract handler for upgrade events."""
|
|
289
|
-
|
|
290
|
-
def __init__(
|
|
291
|
-
self,
|
|
292
|
-
dependent: T,
|
|
293
|
-
upgrade_backend: type[KubernetesUpgrade | MachineUpgrade],
|
|
294
|
-
*args,
|
|
295
|
-
**kwargs,
|
|
296
|
-
):
|
|
297
|
-
self.name = "upgrade"
|
|
298
|
-
super(Generic, self).__init__(dependent, *args, **kwargs) # type: ignore[arg-type]
|
|
299
|
-
self.dependent = dependent
|
|
300
|
-
self.substrate = self.dependent.substrate
|
|
301
|
-
self.upgrade_backend = upgrade_backend
|
|
302
|
-
self.charm = dependent.charm
|
|
303
|
-
self.state = dependent.state
|
|
304
|
-
|
|
305
|
-
@property
|
|
306
|
-
def _upgrade(self) -> KubernetesUpgrade | MachineUpgrade | None:
|
|
307
|
-
"""Gets the correct upgrade backend if it exists."""
|
|
308
|
-
try:
|
|
309
|
-
return self.upgrade_backend(
|
|
310
|
-
self.dependent,
|
|
311
|
-
self.dependent.workload,
|
|
312
|
-
self.state,
|
|
313
|
-
self.dependent.substrate,
|
|
314
|
-
)
|
|
315
|
-
except PeerRelationNotReadyError:
|
|
316
|
-
return None
|
|
317
|
-
|
|
318
|
-
def _set_upgrade_status(self) -> None:
|
|
319
|
-
"""Sets the upgrade status in the unit and app status."""
|
|
320
|
-
assert self._upgrade
|
|
321
|
-
if self.charm.unit.is_leader():
|
|
322
|
-
status_object = self._upgrade.app_status or UpgradeStatuses.ACTIVE_IDLE.value
|
|
323
|
-
self.state.statuses.add(status_object, scope="app", component=self.name)
|
|
324
|
-
# Set/clear upgrade unit status if no other unit status - upgrade status for units should
|
|
325
|
-
# have the lowest priority.
|
|
326
|
-
statuses: StatusObjectList = self.state.statuses.get(scope="unit", component=self.name)
|
|
327
|
-
if (
|
|
328
|
-
not statuses.root
|
|
329
|
-
or UpgradeStatuses.WAITING_POST_UPGRADE_STATUS in statuses
|
|
330
|
-
or statuses[0] == UpgradeStatuses.ACTIVE_IDLE # Works because the list is sorted
|
|
331
|
-
or any("is not up-to date with" in status.message for status in statuses)
|
|
332
|
-
):
|
|
333
|
-
self.state.statuses.set(
|
|
334
|
-
self._upgrade.get_upgrade_unit_status() or UpgradeStatuses.ACTIVE_IDLE.value,
|
|
335
|
-
scope="unit",
|
|
336
|
-
component=self.name,
|
|
337
|
-
)
|
|
338
|
-
|
|
339
|
-
def get_statuses(self, scope: Scope, recompute: bool = False) -> list[StatusObject]:
|
|
340
|
-
"""Gets statuses for upgrades statelessly."""
|
|
341
|
-
if not self._upgrade:
|
|
342
|
-
return []
|
|
343
|
-
|
|
344
|
-
if not recompute:
|
|
345
|
-
return self.state.statuses.get(scope=scope, component=self.name).root
|
|
346
|
-
|
|
347
|
-
match scope:
|
|
348
|
-
case "unit":
|
|
349
|
-
return [
|
|
350
|
-
self._upgrade.get_upgrade_unit_status() or UpgradeStatuses.ACTIVE_IDLE.value
|
|
351
|
-
]
|
|
352
|
-
case "app":
|
|
353
|
-
return [self._upgrade.app_status or UpgradeStatuses.ACTIVE_IDLE.value]
|
|
354
|
-
case _:
|
|
355
|
-
raise ValueError(f"Invalid scope {scope}")
|
|
356
|
-
|
|
357
|
-
def store_initial_revisions(self) -> None:
|
|
358
|
-
"""Handle peer relation created event."""
|
|
359
|
-
assert self._upgrade
|
|
360
|
-
if self.substrate == Substrates.VM:
|
|
361
|
-
self.state.unit_workload_container_version = SNAP.revision
|
|
362
|
-
logger.debug(f"Saved {SNAP.revision=} in unit databag after first install")
|
|
363
|
-
if self.dependent.name == CharmKind.MONGOD:
|
|
364
|
-
self.state.unit_upgrade_peer_data.current_revision = (
|
|
365
|
-
self.dependent.cross_app_version_checker.version # type: ignore
|
|
366
|
-
)
|
|
367
|
-
if self.charm.unit.is_leader():
|
|
368
|
-
if not self.state.upgrade_in_progress:
|
|
369
|
-
# Save versions on initial start
|
|
370
|
-
self._upgrade.set_versions_in_app_databag()
|
|
371
|
-
|
|
372
|
-
@abstractmethod
|
|
373
|
-
def run_post_app_upgrade_task(self) -> None:
|
|
374
|
-
"""Runs the post upgrade check to verify that the deployment is healthy."""
|
|
375
|
-
raise NotImplementedError()
|
|
376
|
-
|
|
377
|
-
def run_post_cluster_upgrade_task(self) -> None:
|
|
378
|
-
"""Runs the post upgrade check to verify that the deployment is healthy."""
|
|
379
|
-
raise NotImplementedError()
|
|
380
|
-
|
|
381
|
-
@abstractmethod
|
|
382
|
-
def run_post_upgrade_checks(self, finished_whole_cluster: bool = False) -> None:
|
|
383
|
-
"""Runs post-upgrade checks for after an application upgrade."""
|
|
384
|
-
raise NotImplementedError()
|
|
385
|
-
|
|
386
|
-
def _reconcile_upgrade(self, during_upgrade: bool = False) -> None:
|
|
387
|
-
"""Handle upgrade events."""
|
|
388
|
-
if not self._upgrade:
|
|
389
|
-
logger.debug("Peer relation not available")
|
|
390
|
-
return
|
|
391
|
-
if not self.state.app_upgrade_peer_data.versions:
|
|
392
|
-
logger.debug("Peer relation not ready")
|
|
393
|
-
return
|
|
394
|
-
if self.charm.unit.is_leader() and not self.state.upgrade_in_progress:
|
|
395
|
-
# Run before checking `self._upgrade.is_compatible` in case incompatible upgrade was
|
|
396
|
-
# forced & completed on all units.
|
|
397
|
-
self._upgrade.set_versions_in_app_databag()
|
|
398
|
-
|
|
399
|
-
if self.substrate == Substrates.VM and not self._upgrade.is_compatible:
|
|
400
|
-
self._set_upgrade_status()
|
|
401
|
-
return
|
|
402
|
-
|
|
403
|
-
if self._upgrade.unit_state is UnitState.OUTDATED:
|
|
404
|
-
self._on_vm_outdated() # type: ignore
|
|
405
|
-
return
|
|
406
|
-
|
|
407
|
-
if self._upgrade.unit_state is UnitState.RESTARTING: # Kubernetes only
|
|
408
|
-
if not self._upgrade.is_compatible:
|
|
409
|
-
logger.info(
|
|
410
|
-
f"Refresh incompatible. If you accept potential *data loss* and *downtime*, you can continue with `{UpgradeActions.RESUME_ACTION_NAME.value} force=true`"
|
|
411
|
-
)
|
|
412
|
-
self.state.statuses.add(
|
|
413
|
-
UpgradeStatuses.INCOMPATIBLE_UPGRADE.value,
|
|
414
|
-
scope="unit",
|
|
415
|
-
component=self.name,
|
|
416
|
-
)
|
|
417
|
-
return
|
|
418
|
-
|
|
419
|
-
if self.dependent.substrate == Substrates.K8S:
|
|
420
|
-
self._on_kubernetes_always(during_upgrade) # type: ignore
|
|
421
|
-
self._set_upgrade_status()
|
|
422
|
-
|
|
423
|
-
def _on_kubernetes_always(self, during_upgrade: bool) -> None:
|
|
424
|
-
"""Always run this as part of kubernetes reconcile_upgade call."""
|
|
425
|
-
if not self._upgrade:
|
|
426
|
-
logger.debug("Peer relation not available")
|
|
427
|
-
return
|
|
428
|
-
if (
|
|
429
|
-
not during_upgrade
|
|
430
|
-
and self.state.db_initialised
|
|
431
|
-
and self.dependent.mongo_manager.mongod_ready()
|
|
432
|
-
):
|
|
433
|
-
self._upgrade.unit_state = UnitState.HEALTHY
|
|
434
|
-
if self.charm.unit.is_leader():
|
|
435
|
-
self._upgrade.reconcile_partition()
|
|
436
|
-
self._set_upgrade_status()
|
|
437
|
-
|
|
438
|
-
def _on_vm_outdated(self) -> None:
|
|
439
|
-
"""This is run on VMs if the current unit is outdated."""
|
|
440
|
-
try:
|
|
441
|
-
# This is the case only for VM which is OK
|
|
442
|
-
authorized = self._upgrade.authorized # type: ignore
|
|
443
|
-
except PrecheckFailedError as exception:
|
|
444
|
-
self._set_upgrade_status()
|
|
445
|
-
self.state.statuses.add(exception.status, scope="unit", component=self.name)
|
|
446
|
-
logger.debug(f"Set unit status to {exception.status}")
|
|
447
|
-
logger.error(exception.status.message)
|
|
448
|
-
return
|
|
449
|
-
if authorized:
|
|
450
|
-
self._set_upgrade_status()
|
|
451
|
-
# We can type ignore because this branch is VM only
|
|
452
|
-
self._upgrade.upgrade_unit(dependent=self.dependent) # type: ignore
|
|
453
|
-
# Refresh status after upgrade
|
|
454
|
-
else:
|
|
455
|
-
logger.debug("Waiting to upgrade")
|
|
456
|
-
self._set_upgrade_status()
|
|
457
|
-
|
|
458
|
-
# BEGIN: Helpers
|
|
459
|
-
@mongodb_only
|
|
460
|
-
def move_primary_to_last_upgrade_unit(self) -> None:
|
|
461
|
-
"""Moves the primary to last unit that gets upgraded (the unit with the lowest id).
|
|
462
|
-
|
|
463
|
-
Raises FailedToMovePrimaryError
|
|
464
|
-
"""
|
|
465
|
-
# no need to move primary in the scenario of one unit
|
|
466
|
-
if len(self.state.units_upgrade_peer_data) < 2:
|
|
467
|
-
return
|
|
468
|
-
|
|
469
|
-
with MongoConnection(self.state.mongo_config) as mongod:
|
|
470
|
-
unit_with_lowest_id = self.state.units_upgrade_peer_data[-1].unit
|
|
471
|
-
unit_host = self.state.peer_unit_data(unit_with_lowest_id).internal_address
|
|
472
|
-
if mongod.primary() == unit_host:
|
|
473
|
-
logger.debug(
|
|
474
|
-
"Not moving Primary before refresh, primary is already on the last unit to refresh."
|
|
475
|
-
)
|
|
476
|
-
return
|
|
477
|
-
|
|
478
|
-
logger.debug("Moving primary to unit: %s", unit_with_lowest_id)
|
|
479
|
-
mongod.move_primary(new_primary_ip=unit_host)
|
|
480
|
-
|
|
481
|
-
@mongodb_only
|
|
482
|
-
def wait_for_cluster_healthy(
|
|
483
|
-
self: GenericMongoDBUpgradeManager[MongoDBOperator],
|
|
484
|
-
) -> None:
|
|
485
|
-
"""Waits until the cluster is healthy after upgrading.
|
|
486
|
-
|
|
487
|
-
After a unit restarts it can take some time for the cluster to settle.
|
|
488
|
-
|
|
489
|
-
Raises:
|
|
490
|
-
ClusterNotHealthyError.
|
|
491
|
-
"""
|
|
492
|
-
for attempt in Retrying(stop=stop_after_attempt(10), wait=wait_fixed(1)):
|
|
493
|
-
with attempt:
|
|
494
|
-
if not self.is_cluster_healthy():
|
|
495
|
-
raise ClusterNotHealthyError()
|
|
496
|
-
|
|
497
|
-
@mongodb_only
|
|
498
|
-
def is_cluster_healthy(self: GenericMongoDBUpgradeManager[MongoDBOperator]) -> bool:
|
|
499
|
-
"""Returns True if all nodes in the cluster/replica set are healthy."""
|
|
500
|
-
# TODO: check mongos
|
|
501
|
-
if not self.dependent.mongo_manager.mongod_ready():
|
|
502
|
-
logger.error("Cannot proceed with refresh. Service mongod is not running")
|
|
503
|
-
return False
|
|
504
|
-
|
|
505
|
-
if self.state.is_sharding_component and not self.state.has_sharding_integration:
|
|
506
|
-
return True
|
|
507
|
-
|
|
508
|
-
try:
|
|
509
|
-
return self.are_nodes_healthy()
|
|
510
|
-
except (PyMongoError, OperationFailure, ServerSelectionTimeoutError) as e:
|
|
511
|
-
logger.error(
|
|
512
|
-
"Cannot proceed with refresh. Failed to check cluster health, error: %s",
|
|
513
|
-
e,
|
|
514
|
-
)
|
|
515
|
-
return False
|
|
516
|
-
|
|
517
|
-
@mongodb_only
|
|
518
|
-
def are_nodes_healthy(self) -> bool:
|
|
519
|
-
"""Returns true if all nodes in the MongoDB deployment are healthy."""
|
|
520
|
-
if self.state.is_role(MongoDBRoles.REPLICATION):
|
|
521
|
-
return self.are_replica_set_nodes_healthy(self.state.mongo_config)
|
|
522
|
-
|
|
523
|
-
mongos_config = self.get_cluster_mongos()
|
|
524
|
-
if not self.are_shards_healthy(mongos_config):
|
|
525
|
-
logger.debug(
|
|
526
|
-
"One or more individual shards are not healthy - do not proceed with refresh."
|
|
527
|
-
)
|
|
528
|
-
return False
|
|
529
|
-
|
|
530
|
-
if not self.are_replicas_in_sharded_cluster_healthy(mongos_config):
|
|
531
|
-
logger.debug("One or more nodes are not healthy - do not proceed with refresh.")
|
|
532
|
-
return False
|
|
533
|
-
|
|
534
|
-
return True
|
|
535
|
-
|
|
536
|
-
def are_replicas_in_sharded_cluster_healthy(self, mongos_config: MongoConfiguration) -> bool:
|
|
537
|
-
"""Returns True if all replicas in the sharded cluster are healthy."""
|
|
538
|
-
# dictionary of all replica sets in the sharded cluster
|
|
539
|
-
for mongodb_config in self.get_all_replica_set_configs_in_cluster():
|
|
540
|
-
if not self.are_replica_set_nodes_healthy(mongodb_config):
|
|
541
|
-
logger.debug(f"Replica set: {mongodb_config.replset} contains unhealthy nodes.")
|
|
542
|
-
return False
|
|
543
|
-
|
|
544
|
-
return True
|
|
545
|
-
|
|
546
|
-
def are_shards_healthy(self, mongos_config: MongoConfiguration) -> bool:
|
|
547
|
-
"""Returns True if all shards in the cluster are healthy."""
|
|
548
|
-
with MongoConnection(mongos_config) as mongos:
|
|
549
|
-
if mongos.is_any_shard_draining():
|
|
550
|
-
logger.debug("Cluster is draining a shard, do not proceed with refresh.")
|
|
551
|
-
return False
|
|
552
|
-
|
|
553
|
-
if not mongos.are_all_shards_aware():
|
|
554
|
-
logger.debug("Not all shards are shard aware, do not proceed with refresh.")
|
|
555
|
-
return False
|
|
556
|
-
|
|
557
|
-
# Config-Server has access to all the related shard applications.
|
|
558
|
-
if self.state.is_role(MongoDBRoles.CONFIG_SERVER):
|
|
559
|
-
relation_shards = {
|
|
560
|
-
relation.app.name for relation in self.state.config_server_relation
|
|
561
|
-
}
|
|
562
|
-
cluster_shards = mongos.get_shard_members()
|
|
563
|
-
if len(relation_shards - cluster_shards):
|
|
564
|
-
logger.debug(
|
|
565
|
-
"Not all shards have been added/drained, do not proceed with refresh."
|
|
566
|
-
)
|
|
567
|
-
return False
|
|
568
|
-
|
|
569
|
-
return True
|
|
570
|
-
|
|
571
|
-
def get_all_replica_set_configs_in_cluster(self) -> list[MongoConfiguration]:
|
|
572
|
-
"""Returns a list of all the mongodb_configurations for each application in the cluster."""
|
|
573
|
-
mongos_config = self.get_cluster_mongos()
|
|
574
|
-
mongodb_configurations = []
|
|
575
|
-
if self.state.is_role(MongoDBRoles.SHARD):
|
|
576
|
-
# the hosts of the integrated mongos application are also the config-server hosts
|
|
577
|
-
config_server_hosts = self.state.app_peer_data.mongos_hosts
|
|
578
|
-
mongodb_configurations = [
|
|
579
|
-
self.state.mongodb_config_for_user(
|
|
580
|
-
OperatorUser,
|
|
581
|
-
hosts=set(config_server_hosts),
|
|
582
|
-
replset=self.state.config_server_name,
|
|
583
|
-
)
|
|
584
|
-
]
|
|
585
|
-
elif self.state.is_role(MongoDBRoles.CONFIG_SERVER):
|
|
586
|
-
mongodb_configurations = [self.state.mongo_config]
|
|
587
|
-
|
|
588
|
-
with MongoConnection(mongos_config) as mongos:
|
|
589
|
-
sc_status = mongos.client.admin.command("listShards")
|
|
590
|
-
for shard in sc_status["shards"]:
|
|
591
|
-
mongodb_configurations.append(self.get_mongodb_config_from_shard_entry(shard))
|
|
592
|
-
|
|
593
|
-
return mongodb_configurations
|
|
594
|
-
|
|
595
|
-
def are_replica_set_nodes_healthy(self, mongodb_config: MongoConfiguration) -> bool:
|
|
596
|
-
"""Returns true if all nodes in the MongoDB replica set are healthy."""
|
|
597
|
-
with MongoConnection(mongodb_config) as mongod:
|
|
598
|
-
rs_status = mongod.get_replset_status()
|
|
599
|
-
rs_status = mongod.client.admin.command("replSetGetStatus")
|
|
600
|
-
return not mongod.is_any_sync(rs_status)
|
|
601
|
-
|
|
602
|
-
def is_cluster_able_to_read_write(
|
|
603
|
-
self: GenericMongoDBUpgradeManager[MongoDBOperator],
|
|
604
|
-
) -> bool:
|
|
605
|
-
"""Returns True if read and write is feasible for cluster."""
|
|
606
|
-
try:
|
|
607
|
-
if self.state.is_role(MongoDBRoles.REPLICATION):
|
|
608
|
-
return self.is_replica_set_able_read_write()
|
|
609
|
-
return self.is_sharded_cluster_able_to_read_write()
|
|
610
|
-
except (ServerSelectionTimeoutError, OperationFailure):
|
|
611
|
-
logger.warning("Impossible to select server, will try again later")
|
|
612
|
-
return False
|
|
613
|
-
|
|
614
|
-
def is_mongos_able_to_read_write(
|
|
615
|
-
self: GenericMongoDBUpgradeManager[MongosOperator],
|
|
616
|
-
) -> bool:
|
|
617
|
-
"""Returns True if read and write is feasible from mongos."""
|
|
618
|
-
_, collection_name, write_value = self.get_random_write_and_collection()
|
|
619
|
-
config = self.state.mongos_config
|
|
620
|
-
self.add_write_to_sharded_cluster(config, config.database, collection_name, write_value)
|
|
621
|
-
|
|
622
|
-
write_replicated = self.confirm_excepted_write_cluster(
|
|
623
|
-
config,
|
|
624
|
-
collection_name,
|
|
625
|
-
write_value,
|
|
626
|
-
)
|
|
627
|
-
self.clear_tmp_collection(config, collection_name)
|
|
628
|
-
|
|
629
|
-
if not write_replicated:
|
|
630
|
-
logger.debug("Test read/write to cluster failed.")
|
|
631
|
-
return False
|
|
632
|
-
|
|
633
|
-
return True
|
|
634
|
-
|
|
635
|
-
@retry(
|
|
636
|
-
stop=stop_after_attempt(10),
|
|
637
|
-
wait=wait_fixed(1),
|
|
638
|
-
reraise=True,
|
|
639
|
-
)
|
|
640
|
-
def confirm_excepted_write_cluster(
|
|
641
|
-
self: GenericMongoDBUpgradeManager[MongosOperator],
|
|
642
|
-
config: MongoConfiguration,
|
|
643
|
-
collection_name: str,
|
|
644
|
-
expected_write_value: str,
|
|
645
|
-
) -> bool:
|
|
646
|
-
"""Returns True if the replica contains the expected write in the provided collection."""
|
|
647
|
-
with MongoConnection(config) as mongos:
|
|
648
|
-
db = mongos.client[config.database]
|
|
649
|
-
test_collection = db[collection_name]
|
|
650
|
-
query = test_collection.find({}, {WRITE_KEY: 1})
|
|
651
|
-
if query[0][WRITE_KEY] != expected_write_value:
|
|
652
|
-
return False
|
|
653
|
-
|
|
654
|
-
return True
|
|
655
|
-
|
|
656
|
-
def is_sharded_cluster_able_to_read_write(
|
|
657
|
-
self: GenericMongoDBUpgradeManager[MongoDBOperator],
|
|
658
|
-
) -> bool:
|
|
659
|
-
"""Returns True if possible to write all cluster shards and read from all replicas."""
|
|
660
|
-
mongos_config = self.get_cluster_mongos()
|
|
661
|
-
with MongoConnection(mongos_config) as mongos:
|
|
662
|
-
sc_status = mongos.client.admin.command("listShards")
|
|
663
|
-
for shard in sc_status["shards"]:
|
|
664
|
-
# force a write to a specific shard to ensure the primary on that shard can
|
|
665
|
-
# receive writes
|
|
666
|
-
db_name, collection_name, write_value = self.get_random_write_and_collection()
|
|
667
|
-
self.add_write_to_sharded_cluster(
|
|
668
|
-
mongos_config, db_name, collection_name, write_value
|
|
669
|
-
)
|
|
670
|
-
mongos.client.admin.command("movePrimary", db_name, to=shard[SHARD_NAME_INDEX])
|
|
671
|
-
|
|
672
|
-
write_replicated = self.is_write_on_secondaries(
|
|
673
|
-
self.get_mongodb_config_from_shard_entry(shard),
|
|
674
|
-
collection_name,
|
|
675
|
-
write_value,
|
|
676
|
-
db_name,
|
|
677
|
-
)
|
|
678
|
-
|
|
679
|
-
self.clear_db_collection(mongos_config, db_name)
|
|
680
|
-
if not write_replicated:
|
|
681
|
-
logger.debug(f"Test read/write to shard {shard['_id']} failed.")
|
|
682
|
-
return False
|
|
683
|
-
|
|
684
|
-
return True
|
|
685
|
-
|
|
686
|
-
def get_mongodb_config_from_shard_entry(self, shard_entry: dict) -> MongoConfiguration:
|
|
687
|
-
"""Returns a replica set MongoConfiguration based on a shard entry from ListShards."""
|
|
688
|
-
# field hosts is of the form shard01/host1:27018,host2:27018,host3:27018
|
|
689
|
-
shard_hosts = shard_entry["host"].split("/")[1]
|
|
690
|
-
parsed_ips = {host.split(":")[0] for host in shard_hosts.split(",")}
|
|
691
|
-
return self.state.mongodb_config_for_user(
|
|
692
|
-
OperatorUser, parsed_ips, replset=shard_entry[SHARD_NAME_INDEX]
|
|
693
|
-
)
|
|
694
|
-
|
|
695
|
-
def get_cluster_mongos(self) -> MongoConfiguration:
|
|
696
|
-
"""Return a mongos configuration for the sharded cluster."""
|
|
697
|
-
return (
|
|
698
|
-
self.state.mongos_config
|
|
699
|
-
if self.state.is_role(MongoDBRoles.CONFIG_SERVER)
|
|
700
|
-
else self.state.mongos_config_for_user(
|
|
701
|
-
OperatorUser, hosts=set(self.state.shard_state.mongos_hosts)
|
|
702
|
-
)
|
|
703
|
-
)
|
|
704
|
-
|
|
705
|
-
def is_replica_set_able_read_write(self) -> bool:
|
|
706
|
-
"""Returns True if is possible to write to primary and read from replicas."""
|
|
707
|
-
_, collection_name, write_value = self.get_random_write_and_collection()
|
|
708
|
-
mongodb_config = self.state.mongo_config
|
|
709
|
-
self.add_write_to_replica_set(mongodb_config, collection_name, write_value)
|
|
710
|
-
write_replicated = self.is_write_on_secondaries(
|
|
711
|
-
mongodb_config, collection_name, write_value
|
|
712
|
-
)
|
|
713
|
-
self.clear_tmp_collection(mongodb_config, collection_name)
|
|
714
|
-
return write_replicated
|
|
715
|
-
|
|
716
|
-
def clear_db_collection(self, mongos_config: MongoConfiguration, db_name: str) -> None:
|
|
717
|
-
"""Clears the temporary collection."""
|
|
718
|
-
with MongoConnection(mongos_config) as mongos:
|
|
719
|
-
mongos.client.drop_database(db_name)
|
|
720
|
-
|
|
721
|
-
def clear_tmp_collection(self, mongo_config: MongoConfiguration, collection_name: str) -> None:
|
|
722
|
-
"""Clears the temporary collection."""
|
|
723
|
-
with MongoConnection(mongo_config) as mongo:
|
|
724
|
-
db = mongo.client[mongo_config.database]
|
|
725
|
-
db.drop_collection(collection_name)
|
|
726
|
-
|
|
727
|
-
@retry(
|
|
728
|
-
stop=stop_after_attempt(10),
|
|
729
|
-
wait=wait_fixed(1),
|
|
730
|
-
reraise=True,
|
|
731
|
-
)
|
|
732
|
-
def confirm_excepted_write_on_replica(
|
|
733
|
-
self,
|
|
734
|
-
host: str,
|
|
735
|
-
db_name: str,
|
|
736
|
-
collection: str,
|
|
737
|
-
expected_write_value: str,
|
|
738
|
-
secondary_config: MongoConfiguration,
|
|
739
|
-
) -> None:
|
|
740
|
-
"""Returns True if the replica contains the expected write in the provided collection."""
|
|
741
|
-
secondary_config.hosts = {host}
|
|
742
|
-
with MongoConnection(secondary_config, direct=True) as direct_seconary:
|
|
743
|
-
db = direct_seconary.client[db_name]
|
|
744
|
-
test_collection = db[collection]
|
|
745
|
-
query = test_collection.find({}, {WRITE_KEY: 1})
|
|
746
|
-
if query[0][WRITE_KEY] != expected_write_value:
|
|
747
|
-
raise ClusterNotHealthyError
|
|
748
|
-
|
|
749
|
-
def get_random_write_and_collection(self) -> tuple[str, str, str]:
|
|
750
|
-
"""Returns a tuple for a random collection name and a unique write to add to it."""
|
|
751
|
-
choices = string.ascii_letters + string.digits
|
|
752
|
-
collection_name = "collection_" + "".join([secrets.choice(choices) for _ in range(32)])
|
|
753
|
-
write_value = "unique_write_" + "".join([secrets.choice(choices) for _ in range(16)])
|
|
754
|
-
db_name = "db_name_" + "".join([secrets.choice(choices) for _ in range(32)])
|
|
755
|
-
return (db_name, collection_name, write_value)
|
|
756
|
-
|
|
757
|
-
def add_write_to_sharded_cluster(
|
|
758
|
-
self, mongos_config: MongoConfiguration, db_name, collection_name, write_value
|
|
759
|
-
) -> None:
|
|
760
|
-
"""Adds a the provided write to the provided database with the provided collection."""
|
|
761
|
-
with MongoConnection(mongos_config) as mongod:
|
|
762
|
-
db = mongod.client[db_name]
|
|
763
|
-
test_collection = db[collection_name]
|
|
764
|
-
write = {WRITE_KEY: write_value}
|
|
765
|
-
test_collection.insert_one(write)
|
|
766
|
-
|
|
767
|
-
def add_write_to_replica_set(
|
|
768
|
-
self, mongodb_config: MongoConfiguration, collection_name, write_value
|
|
769
|
-
) -> None:
|
|
770
|
-
"""Adds a the provided write to the admin database with the provided collection."""
|
|
771
|
-
with MongoConnection(mongodb_config) as mongod:
|
|
772
|
-
db = mongod.client["admin"]
|
|
773
|
-
test_collection = db[collection_name]
|
|
774
|
-
write = {WRITE_KEY: write_value}
|
|
775
|
-
test_collection.insert_one(write)
|
|
776
|
-
|
|
777
|
-
def is_write_on_secondaries(
|
|
778
|
-
self,
|
|
779
|
-
mongodb_config: MongoConfiguration,
|
|
780
|
-
collection_name,
|
|
781
|
-
expected_write_value,
|
|
782
|
-
db_name: str = "admin",
|
|
783
|
-
) -> bool:
|
|
784
|
-
"""Returns true if the expected write."""
|
|
785
|
-
for replica_ip in mongodb_config.hosts:
|
|
786
|
-
try:
|
|
787
|
-
self.confirm_excepted_write_on_replica(
|
|
788
|
-
replica_ip,
|
|
789
|
-
db_name,
|
|
790
|
-
collection_name,
|
|
791
|
-
expected_write_value,
|
|
792
|
-
mongodb_config,
|
|
793
|
-
)
|
|
794
|
-
except ClusterNotHealthyError:
|
|
795
|
-
# do not return False immediately - as it is
|
|
796
|
-
logger.debug("Secondary with IP %s, does not contain the expected write.")
|
|
797
|
-
return False
|
|
798
|
-
|
|
799
|
-
return True
|
|
800
|
-
|
|
801
|
-
def step_down_primary_and_wait_reelection(self) -> None:
|
|
802
|
-
"""Steps down the current primary and waits for a new one to be elected."""
|
|
803
|
-
if len(self.state.internal_hosts) < 2:
|
|
804
|
-
logger.warning(
|
|
805
|
-
"No secondaries to become primary - upgrading primary without electing a new one, expect downtime."
|
|
806
|
-
)
|
|
807
|
-
return
|
|
808
|
-
|
|
809
|
-
old_primary = self.dependent.primary_unit_name # type: ignore
|
|
810
|
-
with MongoConnection(self.state.mongo_config) as mongod:
|
|
811
|
-
mongod.step_down_primary()
|
|
812
|
-
|
|
813
|
-
for attempt in Retrying(stop=stop_after_attempt(30), wait=wait_fixed(1), reraise=True):
|
|
814
|
-
with attempt:
|
|
815
|
-
new_primary = self.dependent.primary_unit_name # type: ignore
|
|
816
|
-
if new_primary == old_primary:
|
|
817
|
-
raise FailedToElectNewPrimaryError()
|
|
818
|
-
|
|
819
|
-
def are_pre_upgrade_operations_config_server_successful(self) -> bool:
|
|
820
|
-
"""Runs pre-upgrade operations for config-server and returns True if successful."""
|
|
821
|
-
if not self.state.is_role(MongoDBRoles.CONFIG_SERVER):
|
|
822
|
-
return False
|
|
823
|
-
|
|
824
|
-
if not self.is_feature_compatibility_version(FEATURE_VERSION):
|
|
825
|
-
logger.debug(
|
|
826
|
-
"Not all replicas have the expected feature compatibility: %s",
|
|
827
|
-
FEATURE_VERSION,
|
|
828
|
-
)
|
|
829
|
-
return False
|
|
830
|
-
|
|
831
|
-
self.set_mongos_feature_compatibilty_version(FEATURE_VERSION)
|
|
832
|
-
|
|
833
|
-
# pre-upgrade sequence runs twice. Once when the user runs the pre-upgrade action and
|
|
834
|
-
# again automatically on refresh (just in case the user forgot to). Disabling the balancer
|
|
835
|
-
# can negatively impact the cluster, so we only disable it once the upgrade sequence has
|
|
836
|
-
# begun.
|
|
837
|
-
if self._upgrade and self.state.upgrade_in_progress:
|
|
838
|
-
try:
|
|
839
|
-
self.turn_off_and_wait_for_balancer()
|
|
840
|
-
except BalancerStillRunningError:
|
|
841
|
-
logger.debug("Balancer is still running. Please try the pre-refresh check later.")
|
|
842
|
-
return False
|
|
843
|
-
|
|
844
|
-
return True
|
|
845
|
-
|
|
846
|
-
def is_feature_compatibility_version(self, expected_feature_version: str) -> bool:
|
|
847
|
-
"""Returns True if all nodes in the sharded cluster have the expected_feature_version.
|
|
848
|
-
|
|
849
|
-
Note it is NOT sufficient to check only mongos or the individual shards. It is necessary to
|
|
850
|
-
check each node according to MongoDB upgrade docs.
|
|
851
|
-
"""
|
|
852
|
-
for replica_set_config in self.get_all_replica_set_configs_in_cluster():
|
|
853
|
-
for single_host in replica_set_config.hosts:
|
|
854
|
-
single_replica_config = self.state.mongodb_config_for_user(
|
|
855
|
-
OperatorUser,
|
|
856
|
-
hosts={single_host},
|
|
857
|
-
replset=replica_set_config.replset,
|
|
858
|
-
standalone=True,
|
|
859
|
-
)
|
|
860
|
-
with MongoConnection(single_replica_config) as mongod:
|
|
861
|
-
version = mongod.client.admin.command(
|
|
862
|
-
{"getParameter": 1, "featureCompatibilityVersion": 1}
|
|
863
|
-
)
|
|
864
|
-
if (
|
|
865
|
-
version["featureCompatibilityVersion"]["version"]
|
|
866
|
-
!= expected_feature_version
|
|
867
|
-
):
|
|
868
|
-
return False
|
|
869
|
-
|
|
870
|
-
return True
|
|
871
|
-
|
|
872
|
-
def set_mongos_feature_compatibilty_version(self, feature_version: str) -> None:
|
|
873
|
-
"""Sets the mongos feature compatibility version."""
|
|
874
|
-
with MongoConnection(self.state.mongos_config) as mongos:
|
|
875
|
-
mongos.client.admin.command("setFeatureCompatibilityVersion", feature_version)
|
|
876
|
-
|
|
877
|
-
@retry(
|
|
878
|
-
stop=stop_after_attempt(10),
|
|
879
|
-
wait=wait_fixed(1),
|
|
880
|
-
reraise=True,
|
|
881
|
-
)
|
|
882
|
-
def turn_off_and_wait_for_balancer(self) -> None:
|
|
883
|
-
"""Sends the stop command to the balancer and wait for it to stop running."""
|
|
884
|
-
with MongoConnection(self.state.mongos_config) as mongos:
|
|
885
|
-
mongos.client.admin.command("balancerStop")
|
|
886
|
-
balancer_state = mongos.client.admin.command("balancerStatus")
|
|
887
|
-
if balancer_state["mode"] != "off":
|
|
888
|
-
raise BalancerStillRunningError("balancer is still Running.")
|
|
889
|
-
|
|
890
|
-
# END: helpers
|