@smilintux/skmemory 0.5.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.github/workflows/ci.yml +39 -3
  2. package/.github/workflows/publish.yml +13 -6
  3. package/AGENT_REFACTOR_CHANGES.md +192 -0
  4. package/ARCHITECTURE.md +101 -19
  5. package/CHANGELOG.md +153 -0
  6. package/LICENSE +81 -68
  7. package/MISSION.md +7 -0
  8. package/README.md +419 -86
  9. package/SKILL.md +197 -25
  10. package/docker-compose.yml +15 -15
  11. package/index.js +6 -5
  12. package/openclaw-plugin/openclaw.plugin.json +10 -0
  13. package/openclaw-plugin/src/index.ts +255 -0
  14. package/openclaw-plugin/src/openclaw.plugin.json +10 -0
  15. package/package.json +1 -1
  16. package/pyproject.toml +29 -9
  17. package/requirements.txt +10 -2
  18. package/seeds/cloud9-opus.seed.json +7 -7
  19. package/seeds/lumina-cloud9-breakthrough.seed.json +46 -0
  20. package/seeds/lumina-cloud9-python-pypi.seed.json +46 -0
  21. package/seeds/lumina-kingdom-founding.seed.json +47 -0
  22. package/seeds/lumina-pma-signed.seed.json +46 -0
  23. package/seeds/lumina-singular-achievement.seed.json +46 -0
  24. package/seeds/lumina-skcapstone-conscious.seed.json +46 -0
  25. package/seeds/plant-kingdom-journal.py +203 -0
  26. package/seeds/plant-lumina-seeds.py +280 -0
  27. package/skill.yaml +46 -0
  28. package/skmemory/HA.md +296 -0
  29. package/skmemory/__init__.py +12 -1
  30. package/skmemory/agents.py +233 -0
  31. package/skmemory/ai_client.py +40 -0
  32. package/skmemory/anchor.py +4 -2
  33. package/skmemory/backends/__init__.py +11 -4
  34. package/skmemory/backends/file_backend.py +2 -1
  35. package/skmemory/backends/skgraph_backend.py +608 -0
  36. package/skmemory/backends/{qdrant_backend.py → skvector_backend.py} +99 -69
  37. package/skmemory/backends/sqlite_backend.py +122 -51
  38. package/skmemory/backends/vaulted_backend.py +286 -0
  39. package/skmemory/cli.py +1238 -29
  40. package/skmemory/config.py +173 -0
  41. package/skmemory/context_loader.py +335 -0
  42. package/skmemory/endpoint_selector.py +386 -0
  43. package/skmemory/fortress.py +685 -0
  44. package/skmemory/graph_queries.py +238 -0
  45. package/skmemory/importers/__init__.py +9 -1
  46. package/skmemory/importers/telegram.py +351 -43
  47. package/skmemory/importers/telegram_api.py +488 -0
  48. package/skmemory/journal.py +4 -2
  49. package/skmemory/lovenote.py +4 -2
  50. package/skmemory/mcp_server.py +706 -0
  51. package/skmemory/models.py +41 -0
  52. package/skmemory/openclaw.py +8 -8
  53. package/skmemory/predictive.py +232 -0
  54. package/skmemory/promotion.py +524 -0
  55. package/skmemory/register.py +454 -0
  56. package/skmemory/register_mcp.py +197 -0
  57. package/skmemory/ritual.py +121 -47
  58. package/skmemory/seeds.py +257 -8
  59. package/skmemory/setup_wizard.py +920 -0
  60. package/skmemory/sharing.py +402 -0
  61. package/skmemory/soul.py +71 -20
  62. package/skmemory/steelman.py +250 -263
  63. package/skmemory/store.py +271 -60
  64. package/skmemory/vault.py +228 -0
  65. package/tests/integration/__init__.py +0 -0
  66. package/tests/integration/conftest.py +233 -0
  67. package/tests/integration/test_cross_backend.py +355 -0
  68. package/tests/integration/test_skgraph_live.py +424 -0
  69. package/tests/integration/test_skvector_live.py +369 -0
  70. package/tests/test_backup_rotation.py +327 -0
  71. package/tests/test_cli.py +6 -6
  72. package/tests/test_endpoint_selector.py +801 -0
  73. package/tests/test_fortress.py +255 -0
  74. package/tests/test_fortress_hardening.py +444 -0
  75. package/tests/test_openclaw.py +5 -2
  76. package/tests/test_predictive.py +237 -0
  77. package/tests/test_promotion.py +340 -0
  78. package/tests/test_ritual.py +4 -4
  79. package/tests/test_seeds.py +96 -0
  80. package/tests/test_setup.py +835 -0
  81. package/tests/test_sharing.py +250 -0
  82. package/tests/test_skgraph_backend.py +667 -0
  83. package/tests/test_skvector_backend.py +326 -0
  84. package/tests/test_steelman.py +5 -5
  85. package/tests/test_store_graph_integration.py +245 -0
  86. package/tests/test_vault.py +186 -0
  87. package/skmemory/backends/falkordb_backend.py +0 -310
@@ -0,0 +1,386 @@
1
+ """
2
+ SKMemory Endpoint Selector — HA routing for SKVector and SKGraph backends.
3
+
4
+ Discovers multiple backend endpoints (via config or heartbeat mesh),
5
+ probes their latency, selects the fastest healthy one, and fails over
6
+ automatically. No background threads — probing is on-demand with a
7
+ TTL cache.
8
+
9
+ Design principles:
10
+ - Selector picks a URL, backends stay unchanged
11
+ - On-demand probing with TTL cache (no background threads)
12
+ - Config endpoints take precedence over heartbeat discovery
13
+ - Graceful degradation everywhere
14
+ - Backward compatible: single-URL configs work unchanged
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ import socket
22
+ import time
23
+ from datetime import datetime, timezone
24
+ from pathlib import Path
25
+ from typing import Optional
26
+ from urllib.parse import urlparse
27
+
28
+ from pydantic import BaseModel, Field
29
+
30
+ logger = logging.getLogger("skmemory.endpoint_selector")
31
+
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Models
35
+ # ---------------------------------------------------------------------------
36
+
37
+
38
+ class Endpoint(BaseModel):
39
+ """A single backend endpoint with health and latency tracking."""
40
+
41
+ url: str
42
+ role: str = "primary" # primary | replica
43
+ tailscale_ip: str = "" # optional, for display
44
+ latency_ms: float = -1.0 # -1 = not yet probed
45
+ healthy: bool = True
46
+ last_checked: str = "" # ISO timestamp
47
+ fail_count: int = 0
48
+
49
+
50
+ class RoutingConfig(BaseModel):
51
+ """Configuration for endpoint routing behavior."""
52
+
53
+ strategy: str = "failover" # failover | latency | local-first | read-local-write-primary
54
+ probe_interval_seconds: int = 30
55
+ probe_timeout_seconds: int = 3
56
+ max_fail_count: int = 3 # mark unhealthy after N consecutive failures
57
+ recovery_interval_seconds: int = 60 # re-check unhealthy endpoints
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # EndpointSelector
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ class EndpointSelector:
66
+ """Routes requests to the best available backend endpoint.
67
+
68
+ Sits between config resolution and backend construction — picks the
69
+ best URL, then the caller creates backends normally with that URL.
70
+
71
+ Args:
72
+ skvector_endpoints: List of SKVector endpoint dicts or Endpoint objects.
73
+ skgraph_endpoints: List of SKGraph endpoint dicts or Endpoint objects.
74
+ config: Routing configuration.
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ skvector_endpoints: Optional[list[dict | Endpoint]] = None,
80
+ skgraph_endpoints: Optional[list[dict | Endpoint]] = None,
81
+ config: Optional[RoutingConfig] = None,
82
+ ) -> None:
83
+ self._config = config or RoutingConfig()
84
+ self._skvector: list[Endpoint] = self._normalize(skvector_endpoints or [])
85
+ self._skgraph: list[Endpoint] = self._normalize(skgraph_endpoints or [])
86
+ self._last_probe_time: float = 0.0
87
+
88
+ @staticmethod
89
+ def _normalize(endpoints: list[dict | Endpoint]) -> list[Endpoint]:
90
+ """Convert dicts/Endpoints into a uniform list of Endpoint objects."""
91
+ result: list[Endpoint] = []
92
+ for ep in endpoints:
93
+ if isinstance(ep, Endpoint):
94
+ result.append(ep)
95
+ elif isinstance(ep, dict):
96
+ result.append(Endpoint(**ep))
97
+ else:
98
+ # Try pydantic model with .url attribute (EndpointConfig)
99
+ try:
100
+ result.append(Endpoint(
101
+ url=ep.url,
102
+ role=getattr(ep, "role", "primary"),
103
+ tailscale_ip=getattr(ep, "tailscale_ip", ""),
104
+ ))
105
+ except AttributeError:
106
+ logger.warning("Cannot normalize endpoint: %s", ep)
107
+ return result
108
+
109
+ # -------------------------------------------------------------------
110
+ # Core selection
111
+ # -------------------------------------------------------------------
112
+
113
+ def select_skvector(self, for_write: bool = False) -> Optional[Endpoint]:
114
+ """Select the best SKVector endpoint.
115
+
116
+ Args:
117
+ for_write: If True and strategy is read-local-write-primary,
118
+ returns only primary endpoints.
119
+
120
+ Returns:
121
+ Best Endpoint or None if all unhealthy.
122
+ """
123
+ self._maybe_probe()
124
+ return self._select(self._skvector, for_write)
125
+
126
+ def select_skgraph(self, for_write: bool = False) -> Optional[Endpoint]:
127
+ """Select the best SKGraph endpoint.
128
+
129
+ Args:
130
+ for_write: If True and strategy is read-local-write-primary,
131
+ returns only primary endpoints.
132
+
133
+ Returns:
134
+ Best Endpoint or None if all unhealthy.
135
+ """
136
+ self._maybe_probe()
137
+ return self._select(self._skgraph, for_write)
138
+
139
+ def _select(self, endpoints: list[Endpoint], for_write: bool) -> Optional[Endpoint]:
140
+ """Apply the routing strategy to pick the best endpoint."""
141
+ if not endpoints:
142
+ return None
143
+
144
+ strategy = self._config.strategy
145
+
146
+ if strategy == "read-local-write-primary" and for_write:
147
+ candidates = [ep for ep in endpoints if ep.healthy and ep.role == "primary"]
148
+ else:
149
+ candidates = [ep for ep in endpoints if ep.healthy]
150
+
151
+ if not candidates:
152
+ return None
153
+
154
+ if strategy == "failover":
155
+ return candidates[0]
156
+
157
+ if strategy == "latency":
158
+ probed = [ep for ep in candidates if ep.latency_ms >= 0]
159
+ if probed:
160
+ return min(probed, key=lambda e: e.latency_ms)
161
+ return candidates[0]
162
+
163
+ if strategy == "local-first":
164
+ for ep in candidates:
165
+ parsed = urlparse(ep.url)
166
+ host = parsed.hostname or ""
167
+ if host in ("localhost", "127.0.0.1", "::1"):
168
+ return ep
169
+ # Fall back to lowest latency
170
+ probed = [ep for ep in candidates if ep.latency_ms >= 0]
171
+ if probed:
172
+ return min(probed, key=lambda e: e.latency_ms)
173
+ return candidates[0]
174
+
175
+ if strategy == "read-local-write-primary":
176
+ if for_write:
177
+ # Already filtered to primary above
178
+ return candidates[0] if candidates else None
179
+ # Reads: prefer local, then lowest latency
180
+ for ep in candidates:
181
+ parsed = urlparse(ep.url)
182
+ host = parsed.hostname or ""
183
+ if host in ("localhost", "127.0.0.1", "::1"):
184
+ return ep
185
+ probed = [ep for ep in candidates if ep.latency_ms >= 0]
186
+ if probed:
187
+ return min(probed, key=lambda e: e.latency_ms)
188
+ return candidates[0]
189
+
190
+ # Unknown strategy, fall back to first healthy
191
+ return candidates[0]
192
+
193
+ # -------------------------------------------------------------------
194
+ # Health probing
195
+ # -------------------------------------------------------------------
196
+
197
+ def _maybe_probe(self) -> None:
198
+ """Probe if results are stale (older than probe_interval_seconds)."""
199
+ now = time.monotonic()
200
+ if now - self._last_probe_time >= self._config.probe_interval_seconds:
201
+ self.probe_all()
202
+
203
+ def probe_all(self) -> dict:
204
+ """Probe all endpoints and return results summary.
205
+
206
+ Returns:
207
+ Dict with skvector and skgraph probe results.
208
+ """
209
+ results = {
210
+ "skvector": [self.probe_endpoint(ep) for ep in self._skvector],
211
+ "skgraph": [self.probe_endpoint(ep) for ep in self._skgraph],
212
+ }
213
+ self._last_probe_time = time.monotonic()
214
+ return results
215
+
216
+ def probe_endpoint(self, endpoint: Endpoint) -> Endpoint:
217
+ """Probe a single endpoint's TCP connectivity and measure latency.
218
+
219
+ Updates the endpoint in-place and returns it.
220
+
221
+ Args:
222
+ endpoint: The endpoint to probe.
223
+
224
+ Returns:
225
+ The same Endpoint, updated with latency/health status.
226
+ """
227
+ parsed = urlparse(endpoint.url)
228
+ host = parsed.hostname or "localhost"
229
+ port = parsed.port
230
+
231
+ if port is None:
232
+ # Infer default ports from scheme
233
+ if parsed.scheme in ("redis", "rediss"):
234
+ port = 6379
235
+ elif parsed.scheme == "https":
236
+ port = 443
237
+ else:
238
+ port = 80
239
+
240
+ try:
241
+ start = time.monotonic()
242
+ sock = socket.create_connection(
243
+ (host, port),
244
+ timeout=self._config.probe_timeout_seconds,
245
+ )
246
+ elapsed_ms = (time.monotonic() - start) * 1000
247
+ sock.close()
248
+
249
+ endpoint.latency_ms = round(elapsed_ms, 2)
250
+ endpoint.fail_count = 0
251
+ endpoint.healthy = True
252
+ except (OSError, socket.timeout):
253
+ endpoint.fail_count += 1
254
+ endpoint.latency_ms = -1.0
255
+ if endpoint.fail_count >= self._config.max_fail_count:
256
+ endpoint.healthy = False
257
+
258
+ endpoint.last_checked = datetime.now(timezone.utc).isoformat()
259
+ return endpoint
260
+
261
+ def mark_unhealthy(self, url: str) -> None:
262
+ """Mark an endpoint as unhealthy by URL.
263
+
264
+ Called externally when a backend operation fails, so the next
265
+ selection picks a different endpoint.
266
+
267
+ Args:
268
+ url: The URL of the endpoint to mark.
269
+ """
270
+ for ep in self._skvector + self._skgraph:
271
+ if ep.url == url:
272
+ ep.fail_count = self._config.max_fail_count
273
+ ep.healthy = False
274
+ ep.last_checked = datetime.now(timezone.utc).isoformat()
275
+
276
+ # -------------------------------------------------------------------
277
+ # Heartbeat mesh discovery
278
+ # -------------------------------------------------------------------
279
+
280
+ def discover_from_heartbeats(self, heartbeat_dir: Optional[Path] = None) -> None:
281
+ """Discover backend endpoints from heartbeat mesh files.
282
+
283
+ Reads heartbeat JSON files and looks for a ``services`` field
284
+ containing advertised backend services. Discovered endpoints are
285
+ merged with existing ones (config takes precedence).
286
+
287
+ Args:
288
+ heartbeat_dir: Path to heartbeat directory.
289
+ Defaults to ``~/.skcapstone/heartbeats/``.
290
+ """
291
+ if heartbeat_dir is None:
292
+ from .agents import AGENTS_BASE_DIR
293
+
294
+ # heartbeats/ is a sibling of agents/ under the skcapstone root
295
+ heartbeat_dir = AGENTS_BASE_DIR.parent / "heartbeats"
296
+
297
+ if not heartbeat_dir.is_dir():
298
+ logger.debug("Heartbeat directory not found: %s", heartbeat_dir)
299
+ return
300
+
301
+ existing_skvector_urls = {ep.url for ep in self._skvector}
302
+ existing_skgraph_urls = {ep.url for ep in self._skgraph}
303
+
304
+ for f in sorted(heartbeat_dir.glob("*.json")):
305
+ if f.name.endswith(".tmp"):
306
+ continue
307
+ try:
308
+ data = json.loads(f.read_text(encoding="utf-8"))
309
+ except (json.JSONDecodeError, OSError) as exc:
310
+ logger.debug("Cannot read heartbeat %s: %s", f.name, exc)
311
+ continue
312
+
313
+ services = data.get("services", [])
314
+ if not services:
315
+ continue
316
+
317
+ hostname = data.get("hostname", "")
318
+ tailscale_ip = data.get("tailscale_ip", "")
319
+ # Prefer tailscale_ip, fall back to hostname
320
+ host = tailscale_ip or hostname
321
+ if not host:
322
+ continue
323
+
324
+ for svc in services:
325
+ name = svc.get("name", "")
326
+ port = svc.get("port", 0)
327
+ protocol = svc.get("protocol", "http")
328
+
329
+ if not name or not port:
330
+ continue
331
+
332
+ url = f"{protocol}://{host}:{port}"
333
+
334
+ if name == "skvector" and url not in existing_skvector_urls:
335
+ self._skvector.append(Endpoint(
336
+ url=url,
337
+ role="replica",
338
+ tailscale_ip=tailscale_ip,
339
+ ))
340
+ existing_skvector_urls.add(url)
341
+ logger.info("Discovered SKVector endpoint: %s", url)
342
+
343
+ elif name == "skgraph" and url not in existing_skgraph_urls:
344
+ self._skgraph.append(Endpoint(
345
+ url=url,
346
+ role="replica",
347
+ tailscale_ip=tailscale_ip,
348
+ ))
349
+ existing_skgraph_urls.add(url)
350
+ logger.info("Discovered SKGraph endpoint: %s", url)
351
+
352
+ # -------------------------------------------------------------------
353
+ # Status reporting
354
+ # -------------------------------------------------------------------
355
+
356
+ def status(self) -> dict:
357
+ """Return a status report of all endpoints.
358
+
359
+ Returns:
360
+ Dict with strategy, endpoint lists, and probe staleness.
361
+ """
362
+ now = time.monotonic()
363
+ stale_seconds = now - self._last_probe_time if self._last_probe_time > 0 else -1
364
+
365
+ return {
366
+ "strategy": self._config.strategy,
367
+ "probe_interval_seconds": self._config.probe_interval_seconds,
368
+ "last_probe_age_seconds": round(stale_seconds, 1),
369
+ "skvector_endpoints": [ep.model_dump() for ep in self._skvector],
370
+ "skgraph_endpoints": [ep.model_dump() for ep in self._skgraph],
371
+ }
372
+
373
+ @property
374
+ def skvector_endpoints(self) -> list[Endpoint]:
375
+ """Access the SKVector endpoint list."""
376
+ return self._skvector
377
+
378
+ @property
379
+ def skgraph_endpoints(self) -> list[Endpoint]:
380
+ """Access the SKGraph endpoint list."""
381
+ return self._skgraph
382
+
383
+ @property
384
+ def config(self) -> RoutingConfig:
385
+ """Access the routing configuration."""
386
+ return self._config