dory-sdk 2.1.0__py3-none-any.whl → 2.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dory/edge/detector.py ADDED
@@ -0,0 +1,546 @@
1
+ """Edge/Cloud workload detection.
2
+
3
+ Provides multiple methods to detect whether the SDK is running on an
4
+ edge node or a cloud node, with fallback strategies.
5
+
6
+ Detection Methods (in priority order):
7
+ 1. Environment variables (DORY_WORKLOAD_LOCATION, NODE_TYPE)
8
+ 2. Pod labels via Kubernetes API
9
+ 3. Node labels via Kubernetes API
10
+ 4. Node name pattern matching (fallback)
11
+ """
12
+
13
+ import logging
14
+ import os
15
+ import re
16
+ from dataclasses import dataclass
17
+ from enum import Enum
18
+ from typing import Any
19
+
20
+ from dory.k8s.labels import (
21
+ LABEL_WORKLOAD_LOCATION,
22
+ LABEL_NODE_TYPE,
23
+ LABEL_MIGRATED_FROM_EDGE,
24
+ LABEL_ORIGINAL_NODE,
25
+ VALUE_EDGE,
26
+ VALUE_MANAGED,
27
+ WorkloadLocation,
28
+ )
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class NodeType(Enum):
34
+ """Type of node the workload is running on."""
35
+
36
+ EDGE = "edge"
37
+ CLOUD = "cloud"
38
+ UNKNOWN = "unknown"
39
+
40
+
41
+ @dataclass
42
+ class WorkloadContext:
43
+ """Complete context about where the workload is running.
44
+
45
+ Attributes:
46
+ node_type: Whether running on edge or cloud node
47
+ workload_location: Intended workload location (edge/managed)
48
+ is_edge: True if running on edge node
49
+ is_migrated: True if this is a migrated edge workload on cloud
50
+ original_node: Original edge node name if migrated
51
+ node_name: Current node name
52
+ pod_name: Current pod name
53
+ detection_method: How the context was detected
54
+ confidence: Confidence level (high/medium/low)
55
+ """
56
+
57
+ node_type: NodeType
58
+ workload_location: WorkloadLocation | None
59
+ is_edge: bool
60
+ is_migrated: bool
61
+ original_node: str | None
62
+ node_name: str | None
63
+ pod_name: str | None
64
+ detection_method: str
65
+ confidence: str # "high", "medium", "low"
66
+
67
+ def to_dict(self) -> dict[str, Any]:
68
+ """Convert to dictionary."""
69
+ return {
70
+ "node_type": self.node_type.value,
71
+ "workload_location": self.workload_location.value if self.workload_location else None,
72
+ "is_edge": self.is_edge,
73
+ "is_migrated": self.is_migrated,
74
+ "original_node": self.original_node,
75
+ "node_name": self.node_name,
76
+ "pod_name": self.pod_name,
77
+ "detection_method": self.detection_method,
78
+ "confidence": self.confidence,
79
+ }
80
+
81
+
82
+ class WorkloadDetector:
83
+ """Detects whether the workload is running on edge or cloud.
84
+
85
+ Uses multiple detection strategies with fallbacks:
86
+
87
+ 1. **Environment Variables** (highest confidence)
88
+ - DORY_WORKLOAD_LOCATION: "edge" or "managed"
89
+ - DORY_NODE_TYPE: "edge" or "cloud"
90
+ - NODE_TYPE: Node type from Downward API
91
+
92
+ 2. **Pod Labels** (high confidence)
93
+ - workload-location label on the pod
94
+ - migrated-from-edge label for failover pods
95
+
96
+ 3. **Node Labels** (high confidence)
97
+ - node-type label on the node (requires K8s API access)
98
+
99
+ 4. **Node Name Pattern** (low confidence)
100
+ - Pattern matching on node names (e.g., "edge-*", "cloud-*")
101
+
102
+ Usage:
103
+ detector = WorkloadDetector()
104
+ context = detector.detect()
105
+
106
+ if context.is_edge:
107
+ # Running on edge node
108
+ enable_offline_mode()
109
+ else:
110
+ # Running on cloud node
111
+ enable_full_connectivity()
112
+
113
+ if context.is_migrated:
114
+ # This is a failover pod
115
+ await restore_state_from_edge(context.original_node)
116
+ """
117
+
118
+ # Default patterns for node name detection
119
+ EDGE_NODE_PATTERNS = [
120
+ r"^edge-",
121
+ r"-edge-",
122
+ r"-edge$",
123
+ r"^edge\d+",
124
+ r"^outpost-",
125
+ r"^local-",
126
+ r"^onprem-",
127
+ ]
128
+
129
+ CLOUD_NODE_PATTERNS = [
130
+ r"^ip-\d+", # AWS node naming
131
+ r"^gke-", # GKE node naming
132
+ r"^aks-", # AKS node naming
133
+ r"^cloud-",
134
+ r"^managed-",
135
+ ]
136
+
137
+ def __init__(
138
+ self,
139
+ k8s_client: Any = None,
140
+ edge_patterns: list[str] | None = None,
141
+ cloud_patterns: list[str] | None = None,
142
+ ):
143
+ """Initialize detector.
144
+
145
+ Args:
146
+ k8s_client: Optional Kubernetes client for API-based detection
147
+ edge_patterns: Custom regex patterns for edge node names
148
+ cloud_patterns: Custom regex patterns for cloud node names
149
+ """
150
+ self._k8s_client = k8s_client
151
+ self._edge_patterns = edge_patterns or self.EDGE_NODE_PATTERNS
152
+ self._cloud_patterns = cloud_patterns or self.CLOUD_NODE_PATTERNS
153
+ self._cached_context: WorkloadContext | None = None
154
+
155
+ def detect(self, use_cache: bool = True) -> WorkloadContext:
156
+ """Detect workload context.
157
+
158
+ Tries multiple detection methods in order of reliability.
159
+
160
+ Args:
161
+ use_cache: If True, return cached result if available
162
+
163
+ Returns:
164
+ WorkloadContext with detection results
165
+ """
166
+ if use_cache and self._cached_context:
167
+ return self._cached_context
168
+
169
+ # Try detection methods in order
170
+ context = (
171
+ self._detect_from_env()
172
+ or self._detect_from_pod_labels()
173
+ or self._detect_from_node_labels()
174
+ or self._detect_from_node_name()
175
+ or self._default_context()
176
+ )
177
+
178
+ self._cached_context = context
179
+ logger.info(
180
+ f"Workload detection: node_type={context.node_type.value}, "
181
+ f"is_edge={context.is_edge}, method={context.detection_method}, "
182
+ f"confidence={context.confidence}"
183
+ )
184
+
185
+ return context
186
+
187
+ def _detect_from_env(self) -> WorkloadContext | None:
188
+ """Detect from environment variables."""
189
+ # Check DORY_WORKLOAD_LOCATION
190
+ workload_loc = os.environ.get("DORY_WORKLOAD_LOCATION", "").lower()
191
+ node_type_env = os.environ.get("DORY_NODE_TYPE", "").lower()
192
+ node_type_k8s = os.environ.get("NODE_TYPE", "").lower() # From Downward API
193
+
194
+ node_name = os.environ.get("NODE_NAME")
195
+ pod_name = os.environ.get("POD_NAME")
196
+
197
+ # Check for migrated pod
198
+ is_migrated = os.environ.get("DORY_MIGRATED_FROM_EDGE", "").lower() == "true"
199
+ original_node = os.environ.get("DORY_ORIGINAL_NODE")
200
+
201
+ # Determine node type
202
+ node_type = NodeType.UNKNOWN
203
+ if node_type_env == "edge" or node_type_k8s == "edge":
204
+ node_type = NodeType.EDGE
205
+ elif node_type_env == "cloud" or node_type_k8s in ("cloud", "managed", "application"):
206
+ node_type = NodeType.CLOUD
207
+
208
+ # Determine workload location
209
+ workload_location = None
210
+ if workload_loc == "edge":
211
+ workload_location = WorkloadLocation.EDGE
212
+ elif workload_loc == "managed":
213
+ workload_location = WorkloadLocation.MANAGED
214
+
215
+ # If we have clear indicators, return result
216
+ if node_type != NodeType.UNKNOWN or workload_location is not None:
217
+ is_edge = node_type == NodeType.EDGE
218
+ if node_type == NodeType.UNKNOWN and workload_location:
219
+ # Infer node type from workload location (less reliable)
220
+ is_edge = workload_location == WorkloadLocation.EDGE and not is_migrated
221
+
222
+ return WorkloadContext(
223
+ node_type=node_type if node_type != NodeType.UNKNOWN else (
224
+ NodeType.EDGE if is_edge else NodeType.CLOUD
225
+ ),
226
+ workload_location=workload_location,
227
+ is_edge=is_edge,
228
+ is_migrated=is_migrated,
229
+ original_node=original_node,
230
+ node_name=node_name,
231
+ pod_name=pod_name,
232
+ detection_method="environment",
233
+ confidence="high",
234
+ )
235
+
236
+ return None
237
+
238
+ def _detect_from_pod_labels(self) -> WorkloadContext | None:
239
+ """Detect from pod labels via Kubernetes API."""
240
+ if not self._k8s_client:
241
+ return None
242
+
243
+ try:
244
+ pod_name = os.environ.get("POD_NAME")
245
+ namespace = os.environ.get("POD_NAMESPACE", "default")
246
+
247
+ if not pod_name:
248
+ return None
249
+
250
+ # Get pod labels
251
+ pod = self._k8s_client.get_pod(namespace, pod_name)
252
+ if not pod or not pod.metadata or not pod.metadata.labels:
253
+ return None
254
+
255
+ labels = pod.metadata.labels
256
+ node_name = pod.spec.node_name if pod.spec else None
257
+
258
+ workload_loc = labels.get(LABEL_WORKLOAD_LOCATION, "")
259
+ is_migrated = labels.get(LABEL_MIGRATED_FROM_EDGE) == "true"
260
+ original_node = labels.get(LABEL_ORIGINAL_NODE)
261
+
262
+ workload_location = None
263
+ if workload_loc == VALUE_EDGE:
264
+ workload_location = WorkloadLocation.EDGE
265
+ elif workload_loc == VALUE_MANAGED:
266
+ workload_location = WorkloadLocation.MANAGED
267
+
268
+ if workload_location:
269
+ # For migrated pods, they're edge workloads but on cloud nodes
270
+ is_edge = workload_location == WorkloadLocation.EDGE and not is_migrated
271
+
272
+ return WorkloadContext(
273
+ node_type=NodeType.EDGE if is_edge else NodeType.CLOUD,
274
+ workload_location=workload_location,
275
+ is_edge=is_edge,
276
+ is_migrated=is_migrated,
277
+ original_node=original_node,
278
+ node_name=node_name,
279
+ pod_name=pod_name,
280
+ detection_method="pod_labels",
281
+ confidence="high",
282
+ )
283
+
284
+ except Exception as e:
285
+ logger.debug(f"Pod label detection failed: {e}")
286
+
287
+ return None
288
+
289
+ def _detect_from_node_labels(self) -> WorkloadContext | None:
290
+ """Detect from node labels via Kubernetes API."""
291
+ if not self._k8s_client:
292
+ return None
293
+
294
+ try:
295
+ node_name = os.environ.get("NODE_NAME")
296
+ if not node_name:
297
+ return None
298
+
299
+ # Get node labels
300
+ node = self._k8s_client.get_node(node_name)
301
+ if not node or not node.metadata or not node.metadata.labels:
302
+ return None
303
+
304
+ labels = node.metadata.labels
305
+ node_type_label = labels.get(LABEL_NODE_TYPE, "")
306
+
307
+ if node_type_label == "edge":
308
+ return WorkloadContext(
309
+ node_type=NodeType.EDGE,
310
+ workload_location=WorkloadLocation.EDGE,
311
+ is_edge=True,
312
+ is_migrated=False,
313
+ original_node=None,
314
+ node_name=node_name,
315
+ pod_name=os.environ.get("POD_NAME"),
316
+ detection_method="node_labels",
317
+ confidence="high",
318
+ )
319
+ elif node_type_label in ("cloud", "managed", "application"):
320
+ return WorkloadContext(
321
+ node_type=NodeType.CLOUD,
322
+ workload_location=WorkloadLocation.MANAGED,
323
+ is_edge=False,
324
+ is_migrated=False,
325
+ original_node=None,
326
+ node_name=node_name,
327
+ pod_name=os.environ.get("POD_NAME"),
328
+ detection_method="node_labels",
329
+ confidence="high",
330
+ )
331
+
332
+ except Exception as e:
333
+ logger.debug(f"Node label detection failed: {e}")
334
+
335
+ return None
336
+
337
+ def _detect_from_node_name(self) -> WorkloadContext | None:
338
+ """Detect from node name patterns (fallback)."""
339
+ node_name = os.environ.get("NODE_NAME", "")
340
+ if not node_name:
341
+ return None
342
+
343
+ # Check edge patterns
344
+ for pattern in self._edge_patterns:
345
+ if re.search(pattern, node_name, re.IGNORECASE):
346
+ return WorkloadContext(
347
+ node_type=NodeType.EDGE,
348
+ workload_location=WorkloadLocation.EDGE,
349
+ is_edge=True,
350
+ is_migrated=False,
351
+ original_node=None,
352
+ node_name=node_name,
353
+ pod_name=os.environ.get("POD_NAME"),
354
+ detection_method="node_name_pattern",
355
+ confidence="low",
356
+ )
357
+
358
+ # Check cloud patterns
359
+ for pattern in self._cloud_patterns:
360
+ if re.search(pattern, node_name, re.IGNORECASE):
361
+ return WorkloadContext(
362
+ node_type=NodeType.CLOUD,
363
+ workload_location=WorkloadLocation.MANAGED,
364
+ is_edge=False,
365
+ is_migrated=False,
366
+ original_node=None,
367
+ node_name=node_name,
368
+ pod_name=os.environ.get("POD_NAME"),
369
+ detection_method="node_name_pattern",
370
+ confidence="low",
371
+ )
372
+
373
+ return None
374
+
375
+ def _default_context(self) -> WorkloadContext:
376
+ """Return default context when detection fails."""
377
+ return WorkloadContext(
378
+ node_type=NodeType.UNKNOWN,
379
+ workload_location=None,
380
+ is_edge=False, # Default to cloud behavior (safer)
381
+ is_migrated=False,
382
+ original_node=None,
383
+ node_name=os.environ.get("NODE_NAME"),
384
+ pod_name=os.environ.get("POD_NAME"),
385
+ detection_method="default",
386
+ confidence="none",
387
+ )
388
+
389
+ def clear_cache(self) -> None:
390
+ """Clear cached detection result."""
391
+ self._cached_context = None
392
+
393
+
394
+ # =============================================================================
395
+ # Convenience Functions
396
+ # =============================================================================
397
+
398
+ # Global detector instance
399
+ _default_detector: WorkloadDetector | None = None
400
+
401
+
402
+ def get_detector() -> WorkloadDetector:
403
+ """Get or create default detector instance."""
404
+ global _default_detector
405
+ if _default_detector is None:
406
+ _default_detector = WorkloadDetector()
407
+ return _default_detector
408
+
409
+
410
+ def is_edge_node() -> bool:
411
+ """Check if currently running on an edge node.
412
+
413
+ Returns:
414
+ True if running on edge node
415
+ """
416
+ return get_detector().detect().is_edge
417
+
418
+
419
+ def is_cloud_node() -> bool:
420
+ """Check if currently running on a cloud node.
421
+
422
+ Returns:
423
+ True if running on cloud node
424
+ """
425
+ return not get_detector().detect().is_edge
426
+
427
+
428
+ def is_migrated_workload() -> bool:
429
+ """Check if this is a migrated edge workload.
430
+
431
+ Returns:
432
+ True if this is a failover pod from edge
433
+ """
434
+ return get_detector().detect().is_migrated
435
+
436
+
437
+ def get_workload_context() -> WorkloadContext:
438
+ """Get full workload context.
439
+
440
+ Returns:
441
+ WorkloadContext with all detection information
442
+ """
443
+ return get_detector().detect()
444
+
445
+
446
+ def get_node_type() -> NodeType:
447
+ """Get current node type.
448
+
449
+ Returns:
450
+ NodeType enum (EDGE, CLOUD, or UNKNOWN)
451
+ """
452
+ return get_detector().detect().node_type
453
+
454
+
455
+ # =============================================================================
456
+ # Kubernetes Pod Spec Helper
457
+ # =============================================================================
458
+
459
+ def get_recommended_env_vars() -> list[dict[str, Any]]:
460
+ """Get recommended environment variables for pod spec.
461
+
462
+ Returns a list of environment variable definitions that should be
463
+ added to the pod spec to enable proper workload detection.
464
+
465
+ Returns:
466
+ List of env var definitions for Kubernetes pod spec
467
+ """
468
+ return [
469
+ # Standard Downward API fields
470
+ {
471
+ "name": "POD_NAME",
472
+ "valueFrom": {"fieldRef": {"fieldPath": "metadata.name"}},
473
+ },
474
+ {
475
+ "name": "POD_NAMESPACE",
476
+ "valueFrom": {"fieldRef": {"fieldPath": "metadata.namespace"}},
477
+ },
478
+ {
479
+ "name": "POD_IP",
480
+ "valueFrom": {"fieldRef": {"fieldPath": "status.podIP"}},
481
+ },
482
+ {
483
+ "name": "NODE_NAME",
484
+ "valueFrom": {"fieldRef": {"fieldPath": "spec.nodeName"}},
485
+ },
486
+ # Dory-specific labels exposed as env vars
487
+ {
488
+ "name": "DORY_APP_NAME",
489
+ "valueFrom": {"fieldRef": {"fieldPath": "metadata.labels['app']"}},
490
+ },
491
+ {
492
+ "name": "DORY_WORKLOAD_LOCATION",
493
+ "valueFrom": {"fieldRef": {"fieldPath": "metadata.labels['workload-location']"}},
494
+ },
495
+ # Node type (if available)
496
+ {
497
+ "name": "DORY_NODE_TYPE",
498
+ "value": "$(NODE_TYPE)", # Set by node or externally
499
+ },
500
+ ]
501
+
502
+
503
+ def get_pod_spec_yaml_snippet() -> str:
504
+ """Get YAML snippet for pod spec environment variables.
505
+
506
+ Returns:
507
+ YAML string to add to pod spec
508
+ """
509
+ return """
510
+ env:
511
+ # Standard Kubernetes Downward API
512
+ - name: POD_NAME
513
+ valueFrom:
514
+ fieldRef:
515
+ fieldPath: metadata.name
516
+ - name: POD_NAMESPACE
517
+ valueFrom:
518
+ fieldRef:
519
+ fieldPath: metadata.namespace
520
+ - name: POD_IP
521
+ valueFrom:
522
+ fieldRef:
523
+ fieldPath: status.podIP
524
+ - name: NODE_NAME
525
+ valueFrom:
526
+ fieldRef:
527
+ fieldPath: spec.nodeName
528
+
529
+ # Dory workload detection
530
+ - name: DORY_APP_NAME
531
+ valueFrom:
532
+ fieldRef:
533
+ fieldPath: metadata.labels['app']
534
+ - name: DORY_WORKLOAD_LOCATION
535
+ valueFrom:
536
+ fieldRef:
537
+ fieldPath: metadata.labels['workload-location']
538
+ - name: DORY_MIGRATED_FROM_EDGE
539
+ valueFrom:
540
+ fieldRef:
541
+ fieldPath: metadata.labels['migrated-from-edge']
542
+ - name: DORY_ORIGINAL_NODE
543
+ valueFrom:
544
+ fieldRef:
545
+ fieldPath: metadata.labels['original-edge-node']
546
+ """