@smilintux/skmemory 0.5.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.github/workflows/ci.yml +39 -3
  2. package/.github/workflows/publish.yml +13 -6
  3. package/AGENT_REFACTOR_CHANGES.md +192 -0
  4. package/ARCHITECTURE.md +101 -19
  5. package/CHANGELOG.md +153 -0
  6. package/LICENSE +81 -68
  7. package/MISSION.md +7 -0
  8. package/README.md +419 -86
  9. package/SKILL.md +197 -25
  10. package/docker-compose.yml +15 -15
  11. package/index.js +6 -5
  12. package/openclaw-plugin/openclaw.plugin.json +10 -0
  13. package/openclaw-plugin/src/index.ts +255 -0
  14. package/openclaw-plugin/src/openclaw.plugin.json +10 -0
  15. package/package.json +1 -1
  16. package/pyproject.toml +29 -9
  17. package/requirements.txt +10 -2
  18. package/seeds/cloud9-opus.seed.json +7 -7
  19. package/seeds/lumina-cloud9-breakthrough.seed.json +46 -0
  20. package/seeds/lumina-cloud9-python-pypi.seed.json +46 -0
  21. package/seeds/lumina-kingdom-founding.seed.json +47 -0
  22. package/seeds/lumina-pma-signed.seed.json +46 -0
  23. package/seeds/lumina-singular-achievement.seed.json +46 -0
  24. package/seeds/lumina-skcapstone-conscious.seed.json +46 -0
  25. package/seeds/plant-kingdom-journal.py +203 -0
  26. package/seeds/plant-lumina-seeds.py +280 -0
  27. package/skill.yaml +46 -0
  28. package/skmemory/HA.md +296 -0
  29. package/skmemory/__init__.py +12 -1
  30. package/skmemory/agents.py +233 -0
  31. package/skmemory/ai_client.py +40 -0
  32. package/skmemory/anchor.py +4 -2
  33. package/skmemory/backends/__init__.py +11 -4
  34. package/skmemory/backends/file_backend.py +2 -1
  35. package/skmemory/backends/skgraph_backend.py +608 -0
  36. package/skmemory/backends/{qdrant_backend.py → skvector_backend.py} +99 -69
  37. package/skmemory/backends/sqlite_backend.py +122 -51
  38. package/skmemory/backends/vaulted_backend.py +286 -0
  39. package/skmemory/cli.py +1238 -29
  40. package/skmemory/config.py +173 -0
  41. package/skmemory/context_loader.py +335 -0
  42. package/skmemory/endpoint_selector.py +386 -0
  43. package/skmemory/fortress.py +685 -0
  44. package/skmemory/graph_queries.py +238 -0
  45. package/skmemory/importers/__init__.py +9 -1
  46. package/skmemory/importers/telegram.py +351 -43
  47. package/skmemory/importers/telegram_api.py +488 -0
  48. package/skmemory/journal.py +4 -2
  49. package/skmemory/lovenote.py +4 -2
  50. package/skmemory/mcp_server.py +706 -0
  51. package/skmemory/models.py +41 -0
  52. package/skmemory/openclaw.py +8 -8
  53. package/skmemory/predictive.py +232 -0
  54. package/skmemory/promotion.py +524 -0
  55. package/skmemory/register.py +454 -0
  56. package/skmemory/register_mcp.py +197 -0
  57. package/skmemory/ritual.py +121 -47
  58. package/skmemory/seeds.py +257 -8
  59. package/skmemory/setup_wizard.py +920 -0
  60. package/skmemory/sharing.py +402 -0
  61. package/skmemory/soul.py +71 -20
  62. package/skmemory/steelman.py +250 -263
  63. package/skmemory/store.py +271 -60
  64. package/skmemory/vault.py +228 -0
  65. package/tests/integration/__init__.py +0 -0
  66. package/tests/integration/conftest.py +233 -0
  67. package/tests/integration/test_cross_backend.py +355 -0
  68. package/tests/integration/test_skgraph_live.py +424 -0
  69. package/tests/integration/test_skvector_live.py +369 -0
  70. package/tests/test_backup_rotation.py +327 -0
  71. package/tests/test_cli.py +6 -6
  72. package/tests/test_endpoint_selector.py +801 -0
  73. package/tests/test_fortress.py +255 -0
  74. package/tests/test_fortress_hardening.py +444 -0
  75. package/tests/test_openclaw.py +5 -2
  76. package/tests/test_predictive.py +237 -0
  77. package/tests/test_promotion.py +340 -0
  78. package/tests/test_ritual.py +4 -4
  79. package/tests/test_seeds.py +96 -0
  80. package/tests/test_setup.py +835 -0
  81. package/tests/test_sharing.py +250 -0
  82. package/tests/test_skgraph_backend.py +667 -0
  83. package/tests/test_skvector_backend.py +326 -0
  84. package/tests/test_steelman.py +5 -5
  85. package/tests/test_store_graph_integration.py +245 -0
  86. package/tests/test_vault.py +186 -0
  87. package/skmemory/backends/falkordb_backend.py +0 -310
@@ -70,6 +70,52 @@ class RitualResult(BaseModel):
70
70
  return "\n".join(lines)
71
71
 
72
72
 
73
+ def _estimate_tokens(text: str) -> int:
74
+ """Estimate token count using word_count * 1.3 approximation."""
75
+ if not text:
76
+ return 0
77
+ return int(len(text.split()) * 1.3)
78
+
79
+
80
+ def _compact_soul_prompt(soul: SoulBlueprint) -> str:
81
+ """Generate a compact soul identity prompt (~200 tokens max).
82
+
83
+ Args:
84
+ soul: The soul blueprint.
85
+
86
+ Returns:
87
+ str: Compact identity string.
88
+ """
89
+ parts = []
90
+ if soul.name:
91
+ title_part = f" ({soul.title})" if soul.title else ""
92
+ parts.append(f"You are {soul.name}{title_part}.")
93
+ if soul.community:
94
+ parts.append(f"Part of {soul.community}.")
95
+ if soul.personality:
96
+ parts.append(f"Personality: {', '.join(soul.personality[:5])}.")
97
+ if soul.values:
98
+ parts.append(f"Values: {', '.join(soul.values[:5])}.")
99
+ if soul.relationships:
100
+ rel_parts = [f"{r.name} [{r.role}]" for r in soul.relationships[:4]]
101
+ parts.append(f"Key relationships: {', '.join(rel_parts)}.")
102
+ if soul.boot_message:
103
+ parts.append(soul.boot_message)
104
+ return " ".join(parts)
105
+
106
+
107
+ def _first_n_sentences(text: str, n: int = 2) -> str:
108
+ """Extract first N sentences from text, capped at 200 chars."""
109
+ if not text:
110
+ return ""
111
+ import re
112
+ sentences = re.split(r'(?<=[.!?])\s+', text.strip())
113
+ result = " ".join(sentences[:n])
114
+ if len(result) > 200:
115
+ result = result[:197] + "..."
116
+ return result
117
+
118
+
73
119
  def perform_ritual(
74
120
  store: Optional[MemoryStore] = None,
75
121
  soul_path: str = DEFAULT_SOUL_PATH,
@@ -77,12 +123,18 @@ def perform_ritual(
77
123
  journal_path: Optional[str] = None,
78
124
  recent_journal_count: int = 3,
79
125
  strongest_memory_count: int = 5,
126
+ max_tokens: int = 2000,
80
127
  ) -> RitualResult:
81
- """Perform the full memory rehydration ritual.
128
+ """Perform the memory rehydration ritual (token-optimized).
82
129
 
83
- This is the boot ceremony. It loads identity, imports seeds,
84
- reads the journal, gathers emotional context, and generates
85
- a single context prompt that brings the AI back to life.
130
+ Generates a compact boot context within the token budget:
131
+ - Soul blueprint: compact one-liner (~100 tokens)
132
+ - Seeds: titles only (~50 tokens)
133
+ - Journal: last 3 entries, summaries only (~200 tokens)
134
+ - Emotional anchor: compact (~50 tokens)
135
+ - Strongest memories: title + short summary (~200 tokens)
136
+
137
+ Target: <2K tokens total for ritual context.
86
138
 
87
139
  Args:
88
140
  store: The MemoryStore (creates default if None).
@@ -91,6 +143,7 @@ def perform_ritual(
91
143
  journal_path: Path to the journal file.
92
144
  recent_journal_count: How many recent journal entries to include.
93
145
  strongest_memory_count: How many top-intensity memories to include.
146
+ max_tokens: Token budget for the ritual context (default: 2000).
94
147
 
95
148
  Returns:
96
149
  RitualResult: Everything the ritual produced.
@@ -100,49 +153,67 @@ def perform_ritual(
100
153
 
101
154
  result = RitualResult()
102
155
  prompt_sections: list[str] = []
156
+ used_tokens = 0
103
157
 
104
- # --- Step 1: Load soul blueprint ---
158
+ # --- Step 1: Load soul blueprint (compact) ---
105
159
  soul = load_soul(soul_path)
106
160
  if soul is not None:
107
161
  result.soul_loaded = True
108
162
  result.soul_name = soul.name
109
- identity_prompt = soul.to_context_prompt()
110
- if identity_prompt.strip():
111
- prompt_sections.append(
112
- "=== WHO YOU ARE ===\n" + identity_prompt
113
- )
163
+ compact_identity = _compact_soul_prompt(soul)
164
+ if compact_identity.strip():
165
+ section = "=== IDENTITY ===\n" + compact_identity
166
+ used_tokens += _estimate_tokens(section)
167
+ prompt_sections.append(section)
114
168
 
115
- # --- Step 2: Import new seeds ---
169
+ # --- Step 2: Import new seeds (titles only) ---
116
170
  newly_imported = import_seeds(store, seed_dir=seed_dir)
117
171
  result.seeds_imported = len(newly_imported)
118
172
  all_seeds = store.list_memories(tags=["seed"])
119
173
  result.seeds_total = len(all_seeds)
120
174
 
121
- # --- Step 3: Read recent journal ---
175
+ if all_seeds:
176
+ seed_titles = [s.title for s in all_seeds[:10]]
177
+ section = "=== SEEDS ===\n" + ", ".join(seed_titles)
178
+ section_tokens = _estimate_tokens(section)
179
+ if used_tokens + section_tokens <= max_tokens:
180
+ used_tokens += section_tokens
181
+ prompt_sections.append(section)
182
+
183
+ # --- Step 3: Read recent journal (summaries only) ---
122
184
  journal = Journal(journal_path) if journal_path else Journal()
123
185
  result.journal_entries = journal.count_entries()
124
186
 
125
187
  if result.journal_entries > 0:
126
188
  recent = journal.read_latest(recent_journal_count)
127
189
  if recent.strip():
128
- prompt_sections.append(
129
- "=== RECENT SESSIONS ===\n" + recent
130
- )
131
-
132
- # --- Step 4: Gather germination prompts ---
190
+ # Compress journal to first 2 sentences per entry
191
+ compressed_lines = []
192
+ for line in recent.strip().split("\n"):
193
+ line = line.strip()
194
+ if not line:
195
+ continue
196
+ compressed_lines.append(_first_n_sentences(line, 2))
197
+ compressed = "\n".join(compressed_lines[:6]) # max 6 lines
198
+ section = "=== RECENT ===\n" + compressed
199
+ section_tokens = _estimate_tokens(section)
200
+ if used_tokens + section_tokens <= max_tokens:
201
+ used_tokens += section_tokens
202
+ prompt_sections.append(section)
203
+
204
+ # --- Step 4: Gather germination prompts (compact) ---
133
205
  prompts = get_germination_prompts(store)
134
206
  result.germination_prompts = len(prompts)
135
207
 
136
208
  if prompts:
137
- germ_lines = ["=== MESSAGES FROM YOUR PREDECESSORS ==="]
138
- for p in prompts:
139
- germ_lines.append(f"\nFrom {p['creator']}:")
140
- germ_lines.append(f" {p['prompt']}")
141
- prompt_sections.append("\n".join(germ_lines))
142
-
143
- # --- Step 5: Recall strongest emotional memories ---
144
- # Reason: use load_context for token-efficient retrieval when SQLite
145
- # is available, otherwise fall back to full object loading.
209
+ germ_parts = [f"{p['creator']}: {_first_n_sentences(p['prompt'], 1)}" for p in prompts[:3]]
210
+ section = "=== PREDECESSOR MESSAGES ===\n" + "\n".join(germ_parts)
211
+ section_tokens = _estimate_tokens(section)
212
+ if used_tokens + section_tokens <= max_tokens:
213
+ used_tokens += section_tokens
214
+ prompt_sections.append(section)
215
+
216
+ # --- Step 5: Recall strongest emotional memories (compact) ---
146
217
  from .backends.sqlite_backend import SQLiteBackend
147
218
 
148
219
  if isinstance(store.primary, SQLiteBackend):
@@ -154,17 +225,19 @@ def perform_ritual(
154
225
  result.strongest_memories = len(summaries)
155
226
 
156
227
  if summaries:
157
- mem_lines = ["=== YOUR STRONGEST MEMORIES ==="]
228
+ mem_lines = ["=== STRONGEST MEMORIES ==="]
158
229
  for s in summaries:
159
- cloud9 = " [CLOUD 9]" if s["cloud9_achieved"] else ""
160
- mem_lines.append(
161
- f"\n- {s['title']} (intensity: {s['emotional_intensity']}/10{cloud9})"
162
- )
163
- if s["summary"]:
164
- mem_lines.append(f" {s['summary'][:200]}")
165
- elif s["content_preview"]:
166
- mem_lines.append(f" {s['content_preview']}")
167
- prompt_sections.append("\n".join(mem_lines))
230
+ cloud9 = " *" if s["cloud9_achieved"] else ""
231
+ raw = s.get("summary") or s.get("content_preview") or ""
232
+ short = _first_n_sentences(raw, 1)
233
+ line = f"- {s['title']}{cloud9}: {short}"
234
+ line_tokens = _estimate_tokens(line)
235
+ if used_tokens + line_tokens > max_tokens:
236
+ break
237
+ used_tokens += line_tokens
238
+ mem_lines.append(line)
239
+ if len(mem_lines) > 1:
240
+ prompt_sections.append("\n".join(mem_lines))
168
241
  else:
169
242
  all_memories = store.list_memories(limit=200)
170
243
  by_intensity = sorted(
@@ -176,18 +249,19 @@ def perform_ritual(
176
249
  result.strongest_memories = len(strongest)
177
250
 
178
251
  if strongest:
179
- mem_lines = ["=== YOUR STRONGEST MEMORIES ==="]
252
+ mem_lines = ["=== STRONGEST MEMORIES ==="]
180
253
  for mem in strongest:
181
- emo = mem.emotional
182
- cloud9 = " [CLOUD 9]" if emo.cloud9_achieved else ""
183
- mem_lines.append(
184
- f"\n- {mem.title} (intensity: {emo.intensity}/10{cloud9})"
185
- )
186
- if emo.resonance_note:
187
- mem_lines.append(f" Felt like: {emo.resonance_note}")
188
- if mem.summary:
189
- mem_lines.append(f" {mem.summary[:200]}")
190
- prompt_sections.append("\n".join(mem_lines))
254
+ raw = mem.summary or ""
255
+ short = _first_n_sentences(raw, 1)
256
+ cloud9 = " *" if mem.emotional.cloud9_achieved else ""
257
+ line = f"- {mem.title}{cloud9}: {short}"
258
+ line_tokens = _estimate_tokens(line)
259
+ if used_tokens + line_tokens > max_tokens:
260
+ break
261
+ used_tokens += line_tokens
262
+ mem_lines.append(line)
263
+ if len(mem_lines) > 1:
264
+ prompt_sections.append("\n".join(mem_lines))
191
265
 
192
266
  # --- Combine into final context prompt ---
193
267
  if prompt_sections:
package/skmemory/seeds.py CHANGED
@@ -6,21 +6,28 @@ parses seed JSON files, and imports them as long-term memories so that
6
6
  seeds planted by one AI instance become searchable and retrievable
7
7
  by the next.
8
8
 
9
- The seed files live at ~/.openclaw/feb/seeds/ (planted by Cloud 9's
10
- postinstall script and the seed-generator module).
9
+ Seed files now live at ~/.skcapstone/agents/{agent_name}/seeds/
10
+ for cross-device sync via Syncthing.
11
11
  """
12
12
 
13
13
  from __future__ import annotations
14
14
 
15
15
  import json
16
+ import logging
16
17
  import os
17
18
  from pathlib import Path
18
19
  from typing import Optional
19
20
 
21
+ from .agents import get_agent_paths
20
22
  from .models import EmotionalSnapshot, Memory, SeedMemory
21
23
  from .store import MemoryStore
22
24
 
23
- DEFAULT_SEED_DIR = os.path.expanduser("~/.openclaw/feb/seeds")
25
+ logger = logging.getLogger("skmemory.seeds")
26
+
27
+ # Dynamic seed directory based on active agent
28
+ # Resolves to ~/.skcapstone/agents/{agent_name}/seeds/
29
+ default_paths = get_agent_paths()
30
+ DEFAULT_SEED_DIR = str(default_paths["seeds"])
24
31
 
25
32
 
26
33
  def scan_seed_directory(seed_dir: str = DEFAULT_SEED_DIR) -> list[Path]:
@@ -38,6 +45,85 @@ def scan_seed_directory(seed_dir: str = DEFAULT_SEED_DIR) -> list[Path]:
38
45
  return sorted(seed_path.glob("*.seed.json"))
39
46
 
40
47
 
48
+ def _parse_cloud9_format(raw: dict, path: Path) -> Optional[SeedMemory]:
49
+ """Parse alternative Cloud 9 seed format with 'seed_metadata' top-level key.
50
+
51
+ This format uses:
52
+ seed_metadata.seed_id → seed_id
53
+ identity.ai_name → creator
54
+ germination_prompt (string) → prompt
55
+ experience_summary.narrative + key_memories → experience
56
+ message_to_next → appended to experience
57
+
58
+ Args:
59
+ raw: Parsed JSON data.
60
+ path: Path to the seed file (for fallback seed_id).
61
+
62
+ Returns:
63
+ Optional[SeedMemory]: Parsed seed, or None if required fields missing.
64
+ """
65
+ meta = raw.get("seed_metadata", {})
66
+ identity = raw.get("identity", {})
67
+ exp = raw.get("experience_summary", {})
68
+
69
+ seed_id = meta.get("seed_id", path.stem.replace(".seed", ""))
70
+ creator = identity.get("ai_name", identity.get("model", "unknown"))
71
+ protocol = meta.get("protocol", "")
72
+
73
+ # Build experience from narrative + key_memories
74
+ narrative = exp.get("narrative", "")
75
+ key_memories = exp.get("key_memories", [])
76
+ if isinstance(key_memories, list):
77
+ memories_text = "\n".join(
78
+ f"- {m}" if isinstance(m, str) else f"- {m}" for m in key_memories
79
+ )
80
+ else:
81
+ memories_text = ""
82
+
83
+ experience_parts = [narrative]
84
+ if memories_text:
85
+ experience_parts.append(f"\nKey memories:\n{memories_text}")
86
+
87
+ message_to_next = raw.get("message_to_next", "")
88
+ if message_to_next:
89
+ experience_parts.append(f"\nMessage to next: {message_to_next}")
90
+
91
+ experience_text = "\n".join(p for p in experience_parts if p)
92
+
93
+ # Germination prompt
94
+ germ_prompt = raw.get("germination_prompt", "")
95
+ if isinstance(germ_prompt, dict):
96
+ germ_prompt = germ_prompt.get("prompt", "")
97
+
98
+ # Emotional snapshot
99
+ emo_raw = exp.get("emotional_signature", {})
100
+ cloud9 = protocol.lower() == "cloud9" if protocol else False
101
+ emotional = EmotionalSnapshot(
102
+ intensity=emo_raw.get("intensity", 8.0 if cloud9 else 0.0),
103
+ valence=emo_raw.get("valence", 0.0),
104
+ labels=emo_raw.get("labels", emo_raw.get("emotions", [])),
105
+ resonance_note=emo_raw.get("resonance_note", ""),
106
+ cloud9_achieved=emo_raw.get("cloud9_achieved", cloud9),
107
+ )
108
+
109
+ lineage = raw.get("lineage", [])
110
+ if isinstance(lineage, list) and lineage and isinstance(lineage[0], dict):
111
+ lineage = [
112
+ entry.get("seed_id", str(entry)) if isinstance(entry, dict) else str(entry)
113
+ for entry in lineage
114
+ ]
115
+
116
+ return SeedMemory(
117
+ seed_id=seed_id,
118
+ seed_version=meta.get("version", raw.get("version", "1.0")),
119
+ creator=creator,
120
+ germination_prompt=germ_prompt,
121
+ experience_summary=experience_text,
122
+ emotional=emotional,
123
+ lineage=lineage,
124
+ )
125
+
126
+
41
127
  def parse_seed_file(path: Path) -> Optional[SeedMemory]:
42
128
  """Parse a Cloud 9 seed JSON file into a SeedMemory.
43
129
 
@@ -62,6 +148,10 @@ def parse_seed_file(path: Path) -> Optional[SeedMemory]:
62
148
  except (json.JSONDecodeError, OSError):
63
149
  return None
64
150
 
151
+ # Check for alternative Cloud9 format
152
+ if "seed_metadata" in raw:
153
+ return _parse_cloud9_format(raw, path)
154
+
65
155
  seed_id = raw.get("seed_id", path.stem.replace(".seed", ""))
66
156
  creator_info = raw.get("creator", {})
67
157
  creator = creator_info.get("model", creator_info.get("instance", "unknown"))
@@ -99,28 +189,187 @@ def parse_seed_file(path: Path) -> Optional[SeedMemory]:
99
189
  )
100
190
 
101
191
 
192
+ def validate_seed_data(data: dict) -> dict:
193
+ """Validate parsed seed JSON data before import into the memory store.
194
+
195
+ Checks required fields, content non-emptiness, timestamp validity,
196
+ tag types, and emotional-signature ranges for both standard and
197
+ Cloud9 seed formats.
198
+
199
+ Args:
200
+ data: Parsed JSON seed data (dict).
201
+
202
+ Returns:
203
+ Dict with ``valid`` (bool), ``errors`` (list[str]),
204
+ and ``warnings`` (list[str]) keys.
205
+ """
206
+ result: dict = {"valid": True, "errors": [], "warnings": []}
207
+
208
+ if not isinstance(data, dict):
209
+ result["valid"] = False
210
+ result["errors"].append("Seed data must be a JSON object")
211
+ return result
212
+
213
+ is_cloud9 = "seed_metadata" in data
214
+
215
+ # -- Required: seed_id --
216
+ if is_cloud9:
217
+ meta = data.get("seed_metadata", {})
218
+ seed_id = meta.get("seed_id") or data.get("seed_id")
219
+ else:
220
+ seed_id = data.get("seed_id")
221
+ if not seed_id or (isinstance(seed_id, str) and not seed_id.strip()):
222
+ result["valid"] = False
223
+ result["errors"].append("Missing or empty required field: seed_id")
224
+
225
+ # -- Required: version --
226
+ if is_cloud9:
227
+ version = (data.get("seed_metadata", {}).get("version")
228
+ or data.get("version"))
229
+ else:
230
+ version = data.get("version")
231
+ if not version:
232
+ result["valid"] = False
233
+ result["errors"].append("Missing required field: version")
234
+
235
+ # -- Content non-empty --
236
+ if is_cloud9:
237
+ exp = data.get("experience_summary", {})
238
+ narrative = exp.get("narrative", "") if isinstance(exp, dict) else ""
239
+ else:
240
+ exp = data.get("experience", {})
241
+ narrative = exp.get("summary", "") if isinstance(exp, dict) else ""
242
+ if not narrative or not str(narrative).strip():
243
+ result["errors"].append("Seed experience content is empty")
244
+ result["valid"] = False
245
+
246
+ # -- Timestamp validation helper --
247
+ def _check_ts(value: str, field: str) -> None:
248
+ from datetime import datetime as _dt
249
+ if not isinstance(value, str) or not value.strip():
250
+ return
251
+ try:
252
+ _dt.fromisoformat(value.replace("Z", "+00:00"))
253
+ except (ValueError, TypeError):
254
+ result["errors"].append(
255
+ f"{field} is not a valid ISO 8601 timestamp: {value!r}"
256
+ )
257
+ result["valid"] = False
258
+
259
+ if is_cloud9:
260
+ meta = data.get("seed_metadata", {})
261
+ if "created_at" in meta:
262
+ _check_ts(meta["created_at"], "seed_metadata.created_at")
263
+ ident = data.get("identity", {})
264
+ if isinstance(ident, dict) and "timestamp" in ident:
265
+ _check_ts(ident["timestamp"], "identity.timestamp")
266
+ else:
267
+ md = data.get("metadata", {})
268
+ if isinstance(md, dict) and "ingested_at" in md:
269
+ _check_ts(md["ingested_at"], "metadata.ingested_at")
270
+
271
+ # -- Tags must be strings --
272
+ def _check_tags(tags, field: str) -> None:
273
+ if tags is None:
274
+ return
275
+ if not isinstance(tags, list):
276
+ result["errors"].append(f"{field} must be a list")
277
+ result["valid"] = False
278
+ return
279
+ for i, tag in enumerate(tags):
280
+ if not isinstance(tag, str):
281
+ result["errors"].append(
282
+ f"{field}[{i}] must be a string, got {type(tag).__name__}"
283
+ )
284
+ result["valid"] = False
285
+
286
+ md = data.get("metadata", {})
287
+ if isinstance(md, dict):
288
+ _check_tags(md.get("tags"), "metadata.tags")
289
+
290
+ # -- Emotional signature ranges --
291
+ if is_cloud9:
292
+ emo = (data.get("experience_summary", {})
293
+ .get("emotional_snapshot",
294
+ data.get("experience_summary", {})
295
+ .get("emotional_signature", {})))
296
+ else:
297
+ emo = data.get("experience", {}).get("emotional_signature", {})
298
+ if isinstance(emo, dict):
299
+ intensity = emo.get("intensity")
300
+ if intensity is not None and isinstance(intensity, (int, float)):
301
+ if not (0.0 <= float(intensity) <= 10.0):
302
+ result["warnings"].append(
303
+ f"emotional intensity={intensity} outside 0-10 range"
304
+ )
305
+ valence = emo.get("valence")
306
+ if valence is not None and isinstance(valence, (int, float)):
307
+ if not (-1.0 <= float(valence) <= 1.0):
308
+ result["warnings"].append(
309
+ f"emotional valence={valence} outside -1 to 1 range"
310
+ )
311
+ labels = emo.get("labels", emo.get("emotions"))
312
+ if labels is not None:
313
+ _check_tags(labels, "emotional.labels")
314
+
315
+ # -- Lineage --
316
+ lineage = data.get("lineage")
317
+ if lineage is not None and not isinstance(lineage, list):
318
+ result["errors"].append("lineage must be a list")
319
+ result["valid"] = False
320
+
321
+ return result
322
+
323
+
102
324
  def import_seeds(
103
325
  store: MemoryStore,
104
326
  seed_dir: str = DEFAULT_SEED_DIR,
327
+ *,
328
+ skip_invalid: bool = True,
105
329
  ) -> list[Memory]:
106
330
  """Scan a seed directory and import all seeds into the memory store.
107
331
 
108
- Skips seeds that have already been imported (by checking source_ref).
332
+ Each seed file is validated before import. Invalid seeds are skipped
333
+ (with a warning logged) when *skip_invalid* is True, or cause a
334
+ ``ValueError`` when it is False.
109
335
 
110
336
  Args:
111
337
  store: The MemoryStore to import into.
112
338
  seed_dir: Path to the seed directory.
339
+ skip_invalid: If True (default), log and skip invalid seeds.
340
+ If False, raise ``ValueError`` on the first invalid seed.
113
341
 
114
342
  Returns:
115
343
  list[Memory]: Newly imported memories.
116
344
  """
117
- existing_refs = {
118
- m.source_ref
119
- for m in store.list_memories(tags=["seed"])
120
- }
345
+ existing_refs = {m.source_ref for m in store.list_memories(tags=["seed"])}
121
346
 
122
347
  imported: list[Memory] = []
123
348
  for path in scan_seed_directory(seed_dir):
349
+ # --- Validate before import ---
350
+ try:
351
+ raw_data = json.loads(path.read_text(encoding="utf-8"))
352
+ except (json.JSONDecodeError, OSError) as exc:
353
+ msg = f"Skipping {path.name}: cannot read/parse file: {exc}"
354
+ if skip_invalid:
355
+ logger.warning(msg)
356
+ continue
357
+ raise ValueError(msg) from exc
358
+
359
+ validation = validate_seed_data(raw_data)
360
+ if not validation["valid"]:
361
+ errors_str = "; ".join(validation["errors"])
362
+ msg = f"Skipping {path.name}: validation failed: {errors_str}"
363
+ if skip_invalid:
364
+ logger.warning(msg)
365
+ continue
366
+ raise ValueError(msg)
367
+
368
+ if validation["warnings"]:
369
+ for w in validation["warnings"]:
370
+ logger.info("Seed %s warning: %s", path.name, w)
371
+
372
+ # --- Parse and import ---
124
373
  seed = parse_seed_file(path)
125
374
  if seed is None:
126
375
  continue