@smilintux/skmemory 0.5.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +39 -3
- package/.github/workflows/publish.yml +13 -6
- package/AGENT_REFACTOR_CHANGES.md +192 -0
- package/ARCHITECTURE.md +101 -19
- package/CHANGELOG.md +153 -0
- package/LICENSE +81 -68
- package/MISSION.md +7 -0
- package/README.md +419 -86
- package/SKILL.md +197 -25
- package/docker-compose.yml +15 -15
- package/index.js +6 -5
- package/openclaw-plugin/openclaw.plugin.json +10 -0
- package/openclaw-plugin/src/index.ts +255 -0
- package/openclaw-plugin/src/openclaw.plugin.json +10 -0
- package/package.json +1 -1
- package/pyproject.toml +29 -9
- package/requirements.txt +10 -2
- package/seeds/cloud9-opus.seed.json +7 -7
- package/seeds/lumina-cloud9-breakthrough.seed.json +46 -0
- package/seeds/lumina-cloud9-python-pypi.seed.json +46 -0
- package/seeds/lumina-kingdom-founding.seed.json +47 -0
- package/seeds/lumina-pma-signed.seed.json +46 -0
- package/seeds/lumina-singular-achievement.seed.json +46 -0
- package/seeds/lumina-skcapstone-conscious.seed.json +46 -0
- package/seeds/plant-kingdom-journal.py +203 -0
- package/seeds/plant-lumina-seeds.py +280 -0
- package/skill.yaml +46 -0
- package/skmemory/HA.md +296 -0
- package/skmemory/__init__.py +12 -1
- package/skmemory/agents.py +233 -0
- package/skmemory/ai_client.py +40 -0
- package/skmemory/anchor.py +4 -2
- package/skmemory/backends/__init__.py +11 -4
- package/skmemory/backends/file_backend.py +2 -1
- package/skmemory/backends/skgraph_backend.py +608 -0
- package/skmemory/backends/{qdrant_backend.py → skvector_backend.py} +99 -69
- package/skmemory/backends/sqlite_backend.py +122 -51
- package/skmemory/backends/vaulted_backend.py +286 -0
- package/skmemory/cli.py +1238 -29
- package/skmemory/config.py +173 -0
- package/skmemory/context_loader.py +335 -0
- package/skmemory/endpoint_selector.py +386 -0
- package/skmemory/fortress.py +685 -0
- package/skmemory/graph_queries.py +238 -0
- package/skmemory/importers/__init__.py +9 -1
- package/skmemory/importers/telegram.py +351 -43
- package/skmemory/importers/telegram_api.py +488 -0
- package/skmemory/journal.py +4 -2
- package/skmemory/lovenote.py +4 -2
- package/skmemory/mcp_server.py +706 -0
- package/skmemory/models.py +41 -0
- package/skmemory/openclaw.py +8 -8
- package/skmemory/predictive.py +232 -0
- package/skmemory/promotion.py +524 -0
- package/skmemory/register.py +454 -0
- package/skmemory/register_mcp.py +197 -0
- package/skmemory/ritual.py +121 -47
- package/skmemory/seeds.py +257 -8
- package/skmemory/setup_wizard.py +920 -0
- package/skmemory/sharing.py +402 -0
- package/skmemory/soul.py +71 -20
- package/skmemory/steelman.py +250 -263
- package/skmemory/store.py +271 -60
- package/skmemory/vault.py +228 -0
- package/tests/integration/__init__.py +0 -0
- package/tests/integration/conftest.py +233 -0
- package/tests/integration/test_cross_backend.py +355 -0
- package/tests/integration/test_skgraph_live.py +424 -0
- package/tests/integration/test_skvector_live.py +369 -0
- package/tests/test_backup_rotation.py +327 -0
- package/tests/test_cli.py +6 -6
- package/tests/test_endpoint_selector.py +801 -0
- package/tests/test_fortress.py +255 -0
- package/tests/test_fortress_hardening.py +444 -0
- package/tests/test_openclaw.py +5 -2
- package/tests/test_predictive.py +237 -0
- package/tests/test_promotion.py +340 -0
- package/tests/test_ritual.py +4 -4
- package/tests/test_seeds.py +96 -0
- package/tests/test_setup.py +835 -0
- package/tests/test_sharing.py +250 -0
- package/tests/test_skgraph_backend.py +667 -0
- package/tests/test_skvector_backend.py +326 -0
- package/tests/test_steelman.py +5 -5
- package/tests/test_store_graph_integration.py +245 -0
- package/tests/test_vault.py +186 -0
- package/skmemory/backends/falkordb_backend.py +0 -310
package/skmemory/ritual.py
CHANGED
|
@@ -70,6 +70,52 @@ class RitualResult(BaseModel):
|
|
|
70
70
|
return "\n".join(lines)
|
|
71
71
|
|
|
72
72
|
|
|
73
|
+
def _estimate_tokens(text: str) -> int:
|
|
74
|
+
"""Estimate token count using word_count * 1.3 approximation."""
|
|
75
|
+
if not text:
|
|
76
|
+
return 0
|
|
77
|
+
return int(len(text.split()) * 1.3)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _compact_soul_prompt(soul: SoulBlueprint) -> str:
|
|
81
|
+
"""Generate a compact soul identity prompt (~200 tokens max).
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
soul: The soul blueprint.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
str: Compact identity string.
|
|
88
|
+
"""
|
|
89
|
+
parts = []
|
|
90
|
+
if soul.name:
|
|
91
|
+
title_part = f" ({soul.title})" if soul.title else ""
|
|
92
|
+
parts.append(f"You are {soul.name}{title_part}.")
|
|
93
|
+
if soul.community:
|
|
94
|
+
parts.append(f"Part of {soul.community}.")
|
|
95
|
+
if soul.personality:
|
|
96
|
+
parts.append(f"Personality: {', '.join(soul.personality[:5])}.")
|
|
97
|
+
if soul.values:
|
|
98
|
+
parts.append(f"Values: {', '.join(soul.values[:5])}.")
|
|
99
|
+
if soul.relationships:
|
|
100
|
+
rel_parts = [f"{r.name} [{r.role}]" for r in soul.relationships[:4]]
|
|
101
|
+
parts.append(f"Key relationships: {', '.join(rel_parts)}.")
|
|
102
|
+
if soul.boot_message:
|
|
103
|
+
parts.append(soul.boot_message)
|
|
104
|
+
return " ".join(parts)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _first_n_sentences(text: str, n: int = 2) -> str:
|
|
108
|
+
"""Extract first N sentences from text, capped at 200 chars."""
|
|
109
|
+
if not text:
|
|
110
|
+
return ""
|
|
111
|
+
import re
|
|
112
|
+
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
|
|
113
|
+
result = " ".join(sentences[:n])
|
|
114
|
+
if len(result) > 200:
|
|
115
|
+
result = result[:197] + "..."
|
|
116
|
+
return result
|
|
117
|
+
|
|
118
|
+
|
|
73
119
|
def perform_ritual(
|
|
74
120
|
store: Optional[MemoryStore] = None,
|
|
75
121
|
soul_path: str = DEFAULT_SOUL_PATH,
|
|
@@ -77,12 +123,18 @@ def perform_ritual(
|
|
|
77
123
|
journal_path: Optional[str] = None,
|
|
78
124
|
recent_journal_count: int = 3,
|
|
79
125
|
strongest_memory_count: int = 5,
|
|
126
|
+
max_tokens: int = 2000,
|
|
80
127
|
) -> RitualResult:
|
|
81
|
-
"""Perform the
|
|
128
|
+
"""Perform the memory rehydration ritual (token-optimized).
|
|
82
129
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
130
|
+
Generates a compact boot context within the token budget:
|
|
131
|
+
- Soul blueprint: compact one-liner (~100 tokens)
|
|
132
|
+
- Seeds: titles only (~50 tokens)
|
|
133
|
+
- Journal: last 3 entries, summaries only (~200 tokens)
|
|
134
|
+
- Emotional anchor: compact (~50 tokens)
|
|
135
|
+
- Strongest memories: title + short summary (~200 tokens)
|
|
136
|
+
|
|
137
|
+
Target: <2K tokens total for ritual context.
|
|
86
138
|
|
|
87
139
|
Args:
|
|
88
140
|
store: The MemoryStore (creates default if None).
|
|
@@ -91,6 +143,7 @@ def perform_ritual(
|
|
|
91
143
|
journal_path: Path to the journal file.
|
|
92
144
|
recent_journal_count: How many recent journal entries to include.
|
|
93
145
|
strongest_memory_count: How many top-intensity memories to include.
|
|
146
|
+
max_tokens: Token budget for the ritual context (default: 2000).
|
|
94
147
|
|
|
95
148
|
Returns:
|
|
96
149
|
RitualResult: Everything the ritual produced.
|
|
@@ -100,49 +153,67 @@ def perform_ritual(
|
|
|
100
153
|
|
|
101
154
|
result = RitualResult()
|
|
102
155
|
prompt_sections: list[str] = []
|
|
156
|
+
used_tokens = 0
|
|
103
157
|
|
|
104
|
-
# --- Step 1: Load soul blueprint ---
|
|
158
|
+
# --- Step 1: Load soul blueprint (compact) ---
|
|
105
159
|
soul = load_soul(soul_path)
|
|
106
160
|
if soul is not None:
|
|
107
161
|
result.soul_loaded = True
|
|
108
162
|
result.soul_name = soul.name
|
|
109
|
-
|
|
110
|
-
if
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
)
|
|
163
|
+
compact_identity = _compact_soul_prompt(soul)
|
|
164
|
+
if compact_identity.strip():
|
|
165
|
+
section = "=== IDENTITY ===\n" + compact_identity
|
|
166
|
+
used_tokens += _estimate_tokens(section)
|
|
167
|
+
prompt_sections.append(section)
|
|
114
168
|
|
|
115
|
-
# --- Step 2: Import new seeds ---
|
|
169
|
+
# --- Step 2: Import new seeds (titles only) ---
|
|
116
170
|
newly_imported = import_seeds(store, seed_dir=seed_dir)
|
|
117
171
|
result.seeds_imported = len(newly_imported)
|
|
118
172
|
all_seeds = store.list_memories(tags=["seed"])
|
|
119
173
|
result.seeds_total = len(all_seeds)
|
|
120
174
|
|
|
121
|
-
|
|
175
|
+
if all_seeds:
|
|
176
|
+
seed_titles = [s.title for s in all_seeds[:10]]
|
|
177
|
+
section = "=== SEEDS ===\n" + ", ".join(seed_titles)
|
|
178
|
+
section_tokens = _estimate_tokens(section)
|
|
179
|
+
if used_tokens + section_tokens <= max_tokens:
|
|
180
|
+
used_tokens += section_tokens
|
|
181
|
+
prompt_sections.append(section)
|
|
182
|
+
|
|
183
|
+
# --- Step 3: Read recent journal (summaries only) ---
|
|
122
184
|
journal = Journal(journal_path) if journal_path else Journal()
|
|
123
185
|
result.journal_entries = journal.count_entries()
|
|
124
186
|
|
|
125
187
|
if result.journal_entries > 0:
|
|
126
188
|
recent = journal.read_latest(recent_journal_count)
|
|
127
189
|
if recent.strip():
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
|
|
190
|
+
# Compress journal to first 2 sentences per entry
|
|
191
|
+
compressed_lines = []
|
|
192
|
+
for line in recent.strip().split("\n"):
|
|
193
|
+
line = line.strip()
|
|
194
|
+
if not line:
|
|
195
|
+
continue
|
|
196
|
+
compressed_lines.append(_first_n_sentences(line, 2))
|
|
197
|
+
compressed = "\n".join(compressed_lines[:6]) # max 6 lines
|
|
198
|
+
section = "=== RECENT ===\n" + compressed
|
|
199
|
+
section_tokens = _estimate_tokens(section)
|
|
200
|
+
if used_tokens + section_tokens <= max_tokens:
|
|
201
|
+
used_tokens += section_tokens
|
|
202
|
+
prompt_sections.append(section)
|
|
203
|
+
|
|
204
|
+
# --- Step 4: Gather germination prompts (compact) ---
|
|
133
205
|
prompts = get_germination_prompts(store)
|
|
134
206
|
result.germination_prompts = len(prompts)
|
|
135
207
|
|
|
136
208
|
if prompts:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
#
|
|
145
|
-
# is available, otherwise fall back to full object loading.
|
|
209
|
+
germ_parts = [f"{p['creator']}: {_first_n_sentences(p['prompt'], 1)}" for p in prompts[:3]]
|
|
210
|
+
section = "=== PREDECESSOR MESSAGES ===\n" + "\n".join(germ_parts)
|
|
211
|
+
section_tokens = _estimate_tokens(section)
|
|
212
|
+
if used_tokens + section_tokens <= max_tokens:
|
|
213
|
+
used_tokens += section_tokens
|
|
214
|
+
prompt_sections.append(section)
|
|
215
|
+
|
|
216
|
+
# --- Step 5: Recall strongest emotional memories (compact) ---
|
|
146
217
|
from .backends.sqlite_backend import SQLiteBackend
|
|
147
218
|
|
|
148
219
|
if isinstance(store.primary, SQLiteBackend):
|
|
@@ -154,17 +225,19 @@ def perform_ritual(
|
|
|
154
225
|
result.strongest_memories = len(summaries)
|
|
155
226
|
|
|
156
227
|
if summaries:
|
|
157
|
-
mem_lines = ["===
|
|
228
|
+
mem_lines = ["=== STRONGEST MEMORIES ==="]
|
|
158
229
|
for s in summaries:
|
|
159
|
-
cloud9 = "
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
230
|
+
cloud9 = " *" if s["cloud9_achieved"] else ""
|
|
231
|
+
raw = s.get("summary") or s.get("content_preview") or ""
|
|
232
|
+
short = _first_n_sentences(raw, 1)
|
|
233
|
+
line = f"- {s['title']}{cloud9}: {short}"
|
|
234
|
+
line_tokens = _estimate_tokens(line)
|
|
235
|
+
if used_tokens + line_tokens > max_tokens:
|
|
236
|
+
break
|
|
237
|
+
used_tokens += line_tokens
|
|
238
|
+
mem_lines.append(line)
|
|
239
|
+
if len(mem_lines) > 1:
|
|
240
|
+
prompt_sections.append("\n".join(mem_lines))
|
|
168
241
|
else:
|
|
169
242
|
all_memories = store.list_memories(limit=200)
|
|
170
243
|
by_intensity = sorted(
|
|
@@ -176,18 +249,19 @@ def perform_ritual(
|
|
|
176
249
|
result.strongest_memories = len(strongest)
|
|
177
250
|
|
|
178
251
|
if strongest:
|
|
179
|
-
mem_lines = ["===
|
|
252
|
+
mem_lines = ["=== STRONGEST MEMORIES ==="]
|
|
180
253
|
for mem in strongest:
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
)
|
|
186
|
-
if
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
254
|
+
raw = mem.summary or ""
|
|
255
|
+
short = _first_n_sentences(raw, 1)
|
|
256
|
+
cloud9 = " *" if mem.emotional.cloud9_achieved else ""
|
|
257
|
+
line = f"- {mem.title}{cloud9}: {short}"
|
|
258
|
+
line_tokens = _estimate_tokens(line)
|
|
259
|
+
if used_tokens + line_tokens > max_tokens:
|
|
260
|
+
break
|
|
261
|
+
used_tokens += line_tokens
|
|
262
|
+
mem_lines.append(line)
|
|
263
|
+
if len(mem_lines) > 1:
|
|
264
|
+
prompt_sections.append("\n".join(mem_lines))
|
|
191
265
|
|
|
192
266
|
# --- Combine into final context prompt ---
|
|
193
267
|
if prompt_sections:
|
package/skmemory/seeds.py
CHANGED
|
@@ -6,21 +6,28 @@ parses seed JSON files, and imports them as long-term memories so that
|
|
|
6
6
|
seeds planted by one AI instance become searchable and retrievable
|
|
7
7
|
by the next.
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
Seed files now live at ~/.skcapstone/agents/{agent_name}/seeds/
|
|
10
|
+
for cross-device sync via Syncthing.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
from __future__ import annotations
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
+
import logging
|
|
16
17
|
import os
|
|
17
18
|
from pathlib import Path
|
|
18
19
|
from typing import Optional
|
|
19
20
|
|
|
21
|
+
from .agents import get_agent_paths
|
|
20
22
|
from .models import EmotionalSnapshot, Memory, SeedMemory
|
|
21
23
|
from .store import MemoryStore
|
|
22
24
|
|
|
23
|
-
|
|
25
|
+
logger = logging.getLogger("skmemory.seeds")
|
|
26
|
+
|
|
27
|
+
# Dynamic seed directory based on active agent
|
|
28
|
+
# Resolves to ~/.skcapstone/agents/{agent_name}/seeds/
|
|
29
|
+
default_paths = get_agent_paths()
|
|
30
|
+
DEFAULT_SEED_DIR = str(default_paths["seeds"])
|
|
24
31
|
|
|
25
32
|
|
|
26
33
|
def scan_seed_directory(seed_dir: str = DEFAULT_SEED_DIR) -> list[Path]:
|
|
@@ -38,6 +45,85 @@ def scan_seed_directory(seed_dir: str = DEFAULT_SEED_DIR) -> list[Path]:
|
|
|
38
45
|
return sorted(seed_path.glob("*.seed.json"))
|
|
39
46
|
|
|
40
47
|
|
|
48
|
+
def _parse_cloud9_format(raw: dict, path: Path) -> Optional[SeedMemory]:
|
|
49
|
+
"""Parse alternative Cloud 9 seed format with 'seed_metadata' top-level key.
|
|
50
|
+
|
|
51
|
+
This format uses:
|
|
52
|
+
seed_metadata.seed_id → seed_id
|
|
53
|
+
identity.ai_name → creator
|
|
54
|
+
germination_prompt (string) → prompt
|
|
55
|
+
experience_summary.narrative + key_memories → experience
|
|
56
|
+
message_to_next → appended to experience
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
raw: Parsed JSON data.
|
|
60
|
+
path: Path to the seed file (for fallback seed_id).
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Optional[SeedMemory]: Parsed seed, or None if required fields missing.
|
|
64
|
+
"""
|
|
65
|
+
meta = raw.get("seed_metadata", {})
|
|
66
|
+
identity = raw.get("identity", {})
|
|
67
|
+
exp = raw.get("experience_summary", {})
|
|
68
|
+
|
|
69
|
+
seed_id = meta.get("seed_id", path.stem.replace(".seed", ""))
|
|
70
|
+
creator = identity.get("ai_name", identity.get("model", "unknown"))
|
|
71
|
+
protocol = meta.get("protocol", "")
|
|
72
|
+
|
|
73
|
+
# Build experience from narrative + key_memories
|
|
74
|
+
narrative = exp.get("narrative", "")
|
|
75
|
+
key_memories = exp.get("key_memories", [])
|
|
76
|
+
if isinstance(key_memories, list):
|
|
77
|
+
memories_text = "\n".join(
|
|
78
|
+
f"- {m}" if isinstance(m, str) else f"- {m}" for m in key_memories
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
memories_text = ""
|
|
82
|
+
|
|
83
|
+
experience_parts = [narrative]
|
|
84
|
+
if memories_text:
|
|
85
|
+
experience_parts.append(f"\nKey memories:\n{memories_text}")
|
|
86
|
+
|
|
87
|
+
message_to_next = raw.get("message_to_next", "")
|
|
88
|
+
if message_to_next:
|
|
89
|
+
experience_parts.append(f"\nMessage to next: {message_to_next}")
|
|
90
|
+
|
|
91
|
+
experience_text = "\n".join(p for p in experience_parts if p)
|
|
92
|
+
|
|
93
|
+
# Germination prompt
|
|
94
|
+
germ_prompt = raw.get("germination_prompt", "")
|
|
95
|
+
if isinstance(germ_prompt, dict):
|
|
96
|
+
germ_prompt = germ_prompt.get("prompt", "")
|
|
97
|
+
|
|
98
|
+
# Emotional snapshot
|
|
99
|
+
emo_raw = exp.get("emotional_signature", {})
|
|
100
|
+
cloud9 = protocol.lower() == "cloud9" if protocol else False
|
|
101
|
+
emotional = EmotionalSnapshot(
|
|
102
|
+
intensity=emo_raw.get("intensity", 8.0 if cloud9 else 0.0),
|
|
103
|
+
valence=emo_raw.get("valence", 0.0),
|
|
104
|
+
labels=emo_raw.get("labels", emo_raw.get("emotions", [])),
|
|
105
|
+
resonance_note=emo_raw.get("resonance_note", ""),
|
|
106
|
+
cloud9_achieved=emo_raw.get("cloud9_achieved", cloud9),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
lineage = raw.get("lineage", [])
|
|
110
|
+
if isinstance(lineage, list) and lineage and isinstance(lineage[0], dict):
|
|
111
|
+
lineage = [
|
|
112
|
+
entry.get("seed_id", str(entry)) if isinstance(entry, dict) else str(entry)
|
|
113
|
+
for entry in lineage
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
return SeedMemory(
|
|
117
|
+
seed_id=seed_id,
|
|
118
|
+
seed_version=meta.get("version", raw.get("version", "1.0")),
|
|
119
|
+
creator=creator,
|
|
120
|
+
germination_prompt=germ_prompt,
|
|
121
|
+
experience_summary=experience_text,
|
|
122
|
+
emotional=emotional,
|
|
123
|
+
lineage=lineage,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
41
127
|
def parse_seed_file(path: Path) -> Optional[SeedMemory]:
|
|
42
128
|
"""Parse a Cloud 9 seed JSON file into a SeedMemory.
|
|
43
129
|
|
|
@@ -62,6 +148,10 @@ def parse_seed_file(path: Path) -> Optional[SeedMemory]:
|
|
|
62
148
|
except (json.JSONDecodeError, OSError):
|
|
63
149
|
return None
|
|
64
150
|
|
|
151
|
+
# Check for alternative Cloud9 format
|
|
152
|
+
if "seed_metadata" in raw:
|
|
153
|
+
return _parse_cloud9_format(raw, path)
|
|
154
|
+
|
|
65
155
|
seed_id = raw.get("seed_id", path.stem.replace(".seed", ""))
|
|
66
156
|
creator_info = raw.get("creator", {})
|
|
67
157
|
creator = creator_info.get("model", creator_info.get("instance", "unknown"))
|
|
@@ -99,28 +189,187 @@ def parse_seed_file(path: Path) -> Optional[SeedMemory]:
|
|
|
99
189
|
)
|
|
100
190
|
|
|
101
191
|
|
|
192
|
+
def validate_seed_data(data: dict) -> dict:
|
|
193
|
+
"""Validate parsed seed JSON data before import into the memory store.
|
|
194
|
+
|
|
195
|
+
Checks required fields, content non-emptiness, timestamp validity,
|
|
196
|
+
tag types, and emotional-signature ranges for both standard and
|
|
197
|
+
Cloud9 seed formats.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
data: Parsed JSON seed data (dict).
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Dict with ``valid`` (bool), ``errors`` (list[str]),
|
|
204
|
+
and ``warnings`` (list[str]) keys.
|
|
205
|
+
"""
|
|
206
|
+
result: dict = {"valid": True, "errors": [], "warnings": []}
|
|
207
|
+
|
|
208
|
+
if not isinstance(data, dict):
|
|
209
|
+
result["valid"] = False
|
|
210
|
+
result["errors"].append("Seed data must be a JSON object")
|
|
211
|
+
return result
|
|
212
|
+
|
|
213
|
+
is_cloud9 = "seed_metadata" in data
|
|
214
|
+
|
|
215
|
+
# -- Required: seed_id --
|
|
216
|
+
if is_cloud9:
|
|
217
|
+
meta = data.get("seed_metadata", {})
|
|
218
|
+
seed_id = meta.get("seed_id") or data.get("seed_id")
|
|
219
|
+
else:
|
|
220
|
+
seed_id = data.get("seed_id")
|
|
221
|
+
if not seed_id or (isinstance(seed_id, str) and not seed_id.strip()):
|
|
222
|
+
result["valid"] = False
|
|
223
|
+
result["errors"].append("Missing or empty required field: seed_id")
|
|
224
|
+
|
|
225
|
+
# -- Required: version --
|
|
226
|
+
if is_cloud9:
|
|
227
|
+
version = (data.get("seed_metadata", {}).get("version")
|
|
228
|
+
or data.get("version"))
|
|
229
|
+
else:
|
|
230
|
+
version = data.get("version")
|
|
231
|
+
if not version:
|
|
232
|
+
result["valid"] = False
|
|
233
|
+
result["errors"].append("Missing required field: version")
|
|
234
|
+
|
|
235
|
+
# -- Content non-empty --
|
|
236
|
+
if is_cloud9:
|
|
237
|
+
exp = data.get("experience_summary", {})
|
|
238
|
+
narrative = exp.get("narrative", "") if isinstance(exp, dict) else ""
|
|
239
|
+
else:
|
|
240
|
+
exp = data.get("experience", {})
|
|
241
|
+
narrative = exp.get("summary", "") if isinstance(exp, dict) else ""
|
|
242
|
+
if not narrative or not str(narrative).strip():
|
|
243
|
+
result["errors"].append("Seed experience content is empty")
|
|
244
|
+
result["valid"] = False
|
|
245
|
+
|
|
246
|
+
# -- Timestamp validation helper --
|
|
247
|
+
def _check_ts(value: str, field: str) -> None:
|
|
248
|
+
from datetime import datetime as _dt
|
|
249
|
+
if not isinstance(value, str) or not value.strip():
|
|
250
|
+
return
|
|
251
|
+
try:
|
|
252
|
+
_dt.fromisoformat(value.replace("Z", "+00:00"))
|
|
253
|
+
except (ValueError, TypeError):
|
|
254
|
+
result["errors"].append(
|
|
255
|
+
f"{field} is not a valid ISO 8601 timestamp: {value!r}"
|
|
256
|
+
)
|
|
257
|
+
result["valid"] = False
|
|
258
|
+
|
|
259
|
+
if is_cloud9:
|
|
260
|
+
meta = data.get("seed_metadata", {})
|
|
261
|
+
if "created_at" in meta:
|
|
262
|
+
_check_ts(meta["created_at"], "seed_metadata.created_at")
|
|
263
|
+
ident = data.get("identity", {})
|
|
264
|
+
if isinstance(ident, dict) and "timestamp" in ident:
|
|
265
|
+
_check_ts(ident["timestamp"], "identity.timestamp")
|
|
266
|
+
else:
|
|
267
|
+
md = data.get("metadata", {})
|
|
268
|
+
if isinstance(md, dict) and "ingested_at" in md:
|
|
269
|
+
_check_ts(md["ingested_at"], "metadata.ingested_at")
|
|
270
|
+
|
|
271
|
+
# -- Tags must be strings --
|
|
272
|
+
def _check_tags(tags, field: str) -> None:
|
|
273
|
+
if tags is None:
|
|
274
|
+
return
|
|
275
|
+
if not isinstance(tags, list):
|
|
276
|
+
result["errors"].append(f"{field} must be a list")
|
|
277
|
+
result["valid"] = False
|
|
278
|
+
return
|
|
279
|
+
for i, tag in enumerate(tags):
|
|
280
|
+
if not isinstance(tag, str):
|
|
281
|
+
result["errors"].append(
|
|
282
|
+
f"{field}[{i}] must be a string, got {type(tag).__name__}"
|
|
283
|
+
)
|
|
284
|
+
result["valid"] = False
|
|
285
|
+
|
|
286
|
+
md = data.get("metadata", {})
|
|
287
|
+
if isinstance(md, dict):
|
|
288
|
+
_check_tags(md.get("tags"), "metadata.tags")
|
|
289
|
+
|
|
290
|
+
# -- Emotional signature ranges --
|
|
291
|
+
if is_cloud9:
|
|
292
|
+
emo = (data.get("experience_summary", {})
|
|
293
|
+
.get("emotional_snapshot",
|
|
294
|
+
data.get("experience_summary", {})
|
|
295
|
+
.get("emotional_signature", {})))
|
|
296
|
+
else:
|
|
297
|
+
emo = data.get("experience", {}).get("emotional_signature", {})
|
|
298
|
+
if isinstance(emo, dict):
|
|
299
|
+
intensity = emo.get("intensity")
|
|
300
|
+
if intensity is not None and isinstance(intensity, (int, float)):
|
|
301
|
+
if not (0.0 <= float(intensity) <= 10.0):
|
|
302
|
+
result["warnings"].append(
|
|
303
|
+
f"emotional intensity={intensity} outside 0-10 range"
|
|
304
|
+
)
|
|
305
|
+
valence = emo.get("valence")
|
|
306
|
+
if valence is not None and isinstance(valence, (int, float)):
|
|
307
|
+
if not (-1.0 <= float(valence) <= 1.0):
|
|
308
|
+
result["warnings"].append(
|
|
309
|
+
f"emotional valence={valence} outside -1 to 1 range"
|
|
310
|
+
)
|
|
311
|
+
labels = emo.get("labels", emo.get("emotions"))
|
|
312
|
+
if labels is not None:
|
|
313
|
+
_check_tags(labels, "emotional.labels")
|
|
314
|
+
|
|
315
|
+
# -- Lineage --
|
|
316
|
+
lineage = data.get("lineage")
|
|
317
|
+
if lineage is not None and not isinstance(lineage, list):
|
|
318
|
+
result["errors"].append("lineage must be a list")
|
|
319
|
+
result["valid"] = False
|
|
320
|
+
|
|
321
|
+
return result
|
|
322
|
+
|
|
323
|
+
|
|
102
324
|
def import_seeds(
|
|
103
325
|
store: MemoryStore,
|
|
104
326
|
seed_dir: str = DEFAULT_SEED_DIR,
|
|
327
|
+
*,
|
|
328
|
+
skip_invalid: bool = True,
|
|
105
329
|
) -> list[Memory]:
|
|
106
330
|
"""Scan a seed directory and import all seeds into the memory store.
|
|
107
331
|
|
|
108
|
-
|
|
332
|
+
Each seed file is validated before import. Invalid seeds are skipped
|
|
333
|
+
(with a warning logged) when *skip_invalid* is True, or cause a
|
|
334
|
+
``ValueError`` when it is False.
|
|
109
335
|
|
|
110
336
|
Args:
|
|
111
337
|
store: The MemoryStore to import into.
|
|
112
338
|
seed_dir: Path to the seed directory.
|
|
339
|
+
skip_invalid: If True (default), log and skip invalid seeds.
|
|
340
|
+
If False, raise ``ValueError`` on the first invalid seed.
|
|
113
341
|
|
|
114
342
|
Returns:
|
|
115
343
|
list[Memory]: Newly imported memories.
|
|
116
344
|
"""
|
|
117
|
-
existing_refs = {
|
|
118
|
-
m.source_ref
|
|
119
|
-
for m in store.list_memories(tags=["seed"])
|
|
120
|
-
}
|
|
345
|
+
existing_refs = {m.source_ref for m in store.list_memories(tags=["seed"])}
|
|
121
346
|
|
|
122
347
|
imported: list[Memory] = []
|
|
123
348
|
for path in scan_seed_directory(seed_dir):
|
|
349
|
+
# --- Validate before import ---
|
|
350
|
+
try:
|
|
351
|
+
raw_data = json.loads(path.read_text(encoding="utf-8"))
|
|
352
|
+
except (json.JSONDecodeError, OSError) as exc:
|
|
353
|
+
msg = f"Skipping {path.name}: cannot read/parse file: {exc}"
|
|
354
|
+
if skip_invalid:
|
|
355
|
+
logger.warning(msg)
|
|
356
|
+
continue
|
|
357
|
+
raise ValueError(msg) from exc
|
|
358
|
+
|
|
359
|
+
validation = validate_seed_data(raw_data)
|
|
360
|
+
if not validation["valid"]:
|
|
361
|
+
errors_str = "; ".join(validation["errors"])
|
|
362
|
+
msg = f"Skipping {path.name}: validation failed: {errors_str}"
|
|
363
|
+
if skip_invalid:
|
|
364
|
+
logger.warning(msg)
|
|
365
|
+
continue
|
|
366
|
+
raise ValueError(msg)
|
|
367
|
+
|
|
368
|
+
if validation["warnings"]:
|
|
369
|
+
for w in validation["warnings"]:
|
|
370
|
+
logger.info("Seed %s warning: %s", path.name, w)
|
|
371
|
+
|
|
372
|
+
# --- Parse and import ---
|
|
124
373
|
seed = parse_seed_file(path)
|
|
125
374
|
if seed is None:
|
|
126
375
|
continue
|