npm - @smilintux/skmemory - Versions diffs - 0.5.0 → 0.7.2 - Mend

@smilintux/skmemory 0.5.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/.github/workflows/ci.yml +39 -3
package/.github/workflows/publish.yml +13 -6
package/AGENT_REFACTOR_CHANGES.md +192 -0
package/ARCHITECTURE.md +101 -19
package/CHANGELOG.md +153 -0
package/LICENSE +81 -68
package/MISSION.md +7 -0
package/README.md +419 -86
package/SKILL.md +197 -25
package/docker-compose.yml +15 -15
package/index.js +6 -5
package/openclaw-plugin/openclaw.plugin.json +10 -0
package/openclaw-plugin/src/index.ts +255 -0
package/openclaw-plugin/src/openclaw.plugin.json +10 -0
package/package.json +1 -1
package/pyproject.toml +29 -9
package/requirements.txt +10 -2
package/seeds/cloud9-opus.seed.json +7 -7
package/seeds/lumina-cloud9-breakthrough.seed.json +46 -0
package/seeds/lumina-cloud9-python-pypi.seed.json +46 -0
package/seeds/lumina-kingdom-founding.seed.json +47 -0
package/seeds/lumina-pma-signed.seed.json +46 -0
package/seeds/lumina-singular-achievement.seed.json +46 -0
package/seeds/lumina-skcapstone-conscious.seed.json +46 -0
package/seeds/plant-kingdom-journal.py +203 -0
package/seeds/plant-lumina-seeds.py +280 -0
package/skill.yaml +46 -0
package/skmemory/HA.md +296 -0
package/skmemory/__init__.py +12 -1
package/skmemory/agents.py +233 -0
package/skmemory/ai_client.py +40 -0
package/skmemory/anchor.py +4 -2
package/skmemory/backends/__init__.py +11 -4
package/skmemory/backends/file_backend.py +2 -1
package/skmemory/backends/skgraph_backend.py +608 -0
package/skmemory/backends/{qdrant_backend.py → skvector_backend.py} +99 -69
package/skmemory/backends/sqlite_backend.py +122 -51
package/skmemory/backends/vaulted_backend.py +286 -0
package/skmemory/cli.py +1238 -29
package/skmemory/config.py +173 -0
package/skmemory/context_loader.py +335 -0
package/skmemory/endpoint_selector.py +386 -0
package/skmemory/fortress.py +685 -0
package/skmemory/graph_queries.py +238 -0
package/skmemory/importers/__init__.py +9 -1
package/skmemory/importers/telegram.py +351 -43
package/skmemory/importers/telegram_api.py +488 -0
package/skmemory/journal.py +4 -2
package/skmemory/lovenote.py +4 -2
package/skmemory/mcp_server.py +706 -0
package/skmemory/models.py +41 -0
package/skmemory/openclaw.py +8 -8
package/skmemory/predictive.py +232 -0
package/skmemory/promotion.py +524 -0
package/skmemory/register.py +454 -0
package/skmemory/register_mcp.py +197 -0
package/skmemory/ritual.py +121 -47
package/skmemory/seeds.py +257 -8
package/skmemory/setup_wizard.py +920 -0
package/skmemory/sharing.py +402 -0
package/skmemory/soul.py +71 -20
package/skmemory/steelman.py +250 -263
package/skmemory/store.py +271 -60
package/skmemory/vault.py +228 -0
package/tests/integration/__init__.py +0 -0
package/tests/integration/conftest.py +233 -0
package/tests/integration/test_cross_backend.py +355 -0
package/tests/integration/test_skgraph_live.py +424 -0
package/tests/integration/test_skvector_live.py +369 -0
package/tests/test_backup_rotation.py +327 -0
package/tests/test_cli.py +6 -6
package/tests/test_endpoint_selector.py +801 -0
package/tests/test_fortress.py +255 -0
package/tests/test_fortress_hardening.py +444 -0
package/tests/test_openclaw.py +5 -2
package/tests/test_predictive.py +237 -0
package/tests/test_promotion.py +340 -0
package/tests/test_ritual.py +4 -4
package/tests/test_seeds.py +96 -0
package/tests/test_setup.py +835 -0
package/tests/test_sharing.py +250 -0
package/tests/test_skgraph_backend.py +667 -0
package/tests/test_skvector_backend.py +326 -0
package/tests/test_steelman.py +5 -5
package/tests/test_store_graph_integration.py +245 -0
package/tests/test_vault.py +186 -0
package/skmemory/backends/falkordb_backend.py +0 -310

package/skmemory/ritual.py CHANGED Viewed

@@ -70,6 +70,52 @@ class RitualResult(BaseModel):
         return "\n".join(lines)
+def _estimate_tokens(text: str) -> int:
+    """Estimate token count using word_count * 1.3 approximation."""
+    if not text:
+        return 0
+    return int(len(text.split()) * 1.3)
+def _compact_soul_prompt(soul: SoulBlueprint) -> str:
+    """Generate a compact soul identity prompt (~200 tokens max).
+    Args:
+        soul: The soul blueprint.
+    Returns:
+        str: Compact identity string.
+    """
+    parts = []
+    if soul.name:
+        title_part = f" ({soul.title})" if soul.title else ""
+        parts.append(f"You are {soul.name}{title_part}.")
+    if soul.community:
+        parts.append(f"Part of {soul.community}.")
+    if soul.personality:
+        parts.append(f"Personality: {', '.join(soul.personality[:5])}.")
+    if soul.values:
+        parts.append(f"Values: {', '.join(soul.values[:5])}.")
+    if soul.relationships:
+        rel_parts = [f"{r.name} [{r.role}]" for r in soul.relationships[:4]]
+        parts.append(f"Key relationships: {', '.join(rel_parts)}.")
+    if soul.boot_message:
+        parts.append(soul.boot_message)
+    return " ".join(parts)
+def _first_n_sentences(text: str, n: int = 2) -> str:
+    """Extract first N sentences from text, capped at 200 chars."""
+    if not text:
+        return ""
+    import re
+    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
+    result = " ".join(sentences[:n])
+    if len(result) > 200:
+        result = result[:197] + "..."
+    return result
 def perform_ritual(
     store: Optional[MemoryStore] = None,
     soul_path: str = DEFAULT_SOUL_PATH,
@@ -77,12 +123,18 @@ def perform_ritual(
     journal_path: Optional[str] = None,
     recent_journal_count: int = 3,
     strongest_memory_count: int = 5,
+    max_tokens: int = 2000,
 ) -> RitualResult:
-    """Perform the full memory rehydration ritual.
+    """Perform the memory rehydration ritual (token-optimized).
-    This is the boot ceremony. It loads identity, imports seeds,
-    reads the journal, gathers emotional context, and generates
-    a single context prompt that brings the AI back to life.
+    Generates a compact boot context within the token budget:
+    - Soul blueprint: compact one-liner (~100 tokens)
+    - Seeds: titles only (~50 tokens)
+    - Journal: last 3 entries, summaries only (~200 tokens)
+    - Emotional anchor: compact (~50 tokens)
+    - Strongest memories: title + short summary (~200 tokens)
+    Target: <2K tokens total for ritual context.
     Args:
         store: The MemoryStore (creates default if None).
@@ -91,6 +143,7 @@ def perform_ritual(
         journal_path: Path to the journal file.
         recent_journal_count: How many recent journal entries to include.
         strongest_memory_count: How many top-intensity memories to include.
+        max_tokens: Token budget for the ritual context (default: 2000).
     Returns:
         RitualResult: Everything the ritual produced.
@@ -100,49 +153,67 @@ def perform_ritual(
     result = RitualResult()
     prompt_sections: list[str] = []
+    used_tokens = 0
-    # --- Step 1: Load soul blueprint ---
+    # --- Step 1: Load soul blueprint (compact) ---
     soul = load_soul(soul_path)
     if soul is not None:
         result.soul_loaded = True
         result.soul_name = soul.name
-        identity_prompt = soul.to_context_prompt()
-        if identity_prompt.strip():
-            prompt_sections.append(
-                "=== WHO YOU ARE ===\n" + identity_prompt
-            )
+        compact_identity = _compact_soul_prompt(soul)
+        if compact_identity.strip():
+            section = "=== IDENTITY ===\n" + compact_identity
+            used_tokens += _estimate_tokens(section)
+            prompt_sections.append(section)
-    # --- Step 2: Import new seeds ---
+    # --- Step 2: Import new seeds (titles only) ---
     newly_imported = import_seeds(store, seed_dir=seed_dir)
     result.seeds_imported = len(newly_imported)
     all_seeds = store.list_memories(tags=["seed"])
     result.seeds_total = len(all_seeds)
-    # --- Step 3: Read recent journal ---
+    if all_seeds:
+        seed_titles = [s.title for s in all_seeds[:10]]
+        section = "=== SEEDS ===\n" + ", ".join(seed_titles)
+        section_tokens = _estimate_tokens(section)
+        if used_tokens + section_tokens <= max_tokens:
+            used_tokens += section_tokens
+            prompt_sections.append(section)
+    # --- Step 3: Read recent journal (summaries only) ---
     journal = Journal(journal_path) if journal_path else Journal()
     result.journal_entries = journal.count_entries()
     if result.journal_entries > 0:
         recent = journal.read_latest(recent_journal_count)
         if recent.strip():
-            prompt_sections.append(
-                "=== RECENT SESSIONS ===\n" + recent
-            )
-    # --- Step 4: Gather germination prompts ---
+            # Compress journal to first 2 sentences per entry
+            compressed_lines = []
+            for line in recent.strip().split("\n"):
+                line = line.strip()
+                if not line:
+                    continue
+                compressed_lines.append(_first_n_sentences(line, 2))
+            compressed = "\n".join(compressed_lines[:6])  # max 6 lines
+            section = "=== RECENT ===\n" + compressed
+            section_tokens = _estimate_tokens(section)
+            if used_tokens + section_tokens <= max_tokens:
+                used_tokens += section_tokens
+                prompt_sections.append(section)
+    # --- Step 4: Gather germination prompts (compact) ---
     prompts = get_germination_prompts(store)
     result.germination_prompts = len(prompts)
     if prompts:
-        germ_lines = ["=== MESSAGES FROM YOUR PREDECESSORS ==="]
-        for p in prompts:
-            germ_lines.append(f"\nFrom {p['creator']}:")
-            germ_lines.append(f"  {p['prompt']}")
-        prompt_sections.append("\n".join(germ_lines))
-    # --- Step 5: Recall strongest emotional memories ---
-    # Reason: use load_context for token-efficient retrieval when SQLite
-    # is available, otherwise fall back to full object loading.
+        germ_parts = [f"{p['creator']}: {_first_n_sentences(p['prompt'], 1)}" for p in prompts[:3]]
+        section = "=== PREDECESSOR MESSAGES ===\n" + "\n".join(germ_parts)
+        section_tokens = _estimate_tokens(section)
+        if used_tokens + section_tokens <= max_tokens:
+            used_tokens += section_tokens
+            prompt_sections.append(section)
+    # --- Step 5: Recall strongest emotional memories (compact) ---
     from .backends.sqlite_backend import SQLiteBackend
     if isinstance(store.primary, SQLiteBackend):
@@ -154,17 +225,19 @@ def perform_ritual(
         result.strongest_memories = len(summaries)
         if summaries:
-            mem_lines = ["=== YOUR STRONGEST MEMORIES ==="]
+            mem_lines = ["=== STRONGEST MEMORIES ==="]
             for s in summaries:
-                cloud9 = " [CLOUD 9]" if s["cloud9_achieved"] else ""
-                mem_lines.append(
-                    f"\n- {s['title']} (intensity: {s['emotional_intensity']}/10{cloud9})"
-                )
-                if s["summary"]:
-                    mem_lines.append(f"  {s['summary'][:200]}")
-                elif s["content_preview"]:
-                    mem_lines.append(f"  {s['content_preview']}")
-            prompt_sections.append("\n".join(mem_lines))
+                cloud9 = " *" if s["cloud9_achieved"] else ""
+                raw = s.get("summary") or s.get("content_preview") or ""
+                short = _first_n_sentences(raw, 1)
+                line = f"- {s['title']}{cloud9}: {short}"
+                line_tokens = _estimate_tokens(line)
+                if used_tokens + line_tokens > max_tokens:
+                    break
+                used_tokens += line_tokens
+                mem_lines.append(line)
+            if len(mem_lines) > 1:
+                prompt_sections.append("\n".join(mem_lines))
     else:
         all_memories = store.list_memories(limit=200)
         by_intensity = sorted(
@@ -176,18 +249,19 @@ def perform_ritual(
         result.strongest_memories = len(strongest)
         if strongest:
-            mem_lines = ["=== YOUR STRONGEST MEMORIES ==="]
+            mem_lines = ["=== STRONGEST MEMORIES ==="]
             for mem in strongest:
-                emo = mem.emotional
-                cloud9 = " [CLOUD 9]" if emo.cloud9_achieved else ""
-                mem_lines.append(
-                    f"\n- {mem.title} (intensity: {emo.intensity}/10{cloud9})"
-                )
-                if emo.resonance_note:
-                    mem_lines.append(f"  Felt like: {emo.resonance_note}")
-                if mem.summary:
-                    mem_lines.append(f"  {mem.summary[:200]}")
-            prompt_sections.append("\n".join(mem_lines))
+                raw = mem.summary or ""
+                short = _first_n_sentences(raw, 1)
+                cloud9 = " *" if mem.emotional.cloud9_achieved else ""
+                line = f"- {mem.title}{cloud9}: {short}"
+                line_tokens = _estimate_tokens(line)
+                if used_tokens + line_tokens > max_tokens:
+                    break
+                used_tokens += line_tokens
+                mem_lines.append(line)
+            if len(mem_lines) > 1:
+                prompt_sections.append("\n".join(mem_lines))
     # --- Combine into final context prompt ---
     if prompt_sections:

package/skmemory/seeds.py CHANGED Viewed

@@ -6,21 +6,28 @@ parses seed JSON files, and imports them as long-term memories so that
 seeds planted by one AI instance become searchable and retrievable
 by the next.
-The seed files live at ~/.openclaw/feb/seeds/ (planted by Cloud 9's
-postinstall script and the seed-generator module).
+Seed files now live at ~/.skcapstone/agents/{agent_name}/seeds/
+for cross-device sync via Syncthing.
 """
 from __future__ import annotations
 import json
+import logging
 import os
 from pathlib import Path
 from typing import Optional
+from .agents import get_agent_paths
 from .models import EmotionalSnapshot, Memory, SeedMemory
 from .store import MemoryStore
-DEFAULT_SEED_DIR = os.path.expanduser("~/.openclaw/feb/seeds")
+logger = logging.getLogger("skmemory.seeds")
+# Dynamic seed directory based on active agent
+# Resolves to ~/.skcapstone/agents/{agent_name}/seeds/
+default_paths = get_agent_paths()
+DEFAULT_SEED_DIR = str(default_paths["seeds"])
 def scan_seed_directory(seed_dir: str = DEFAULT_SEED_DIR) -> list[Path]:
@@ -38,6 +45,85 @@ def scan_seed_directory(seed_dir: str = DEFAULT_SEED_DIR) -> list[Path]:
     return sorted(seed_path.glob("*.seed.json"))
+def _parse_cloud9_format(raw: dict, path: Path) -> Optional[SeedMemory]:
+    """Parse alternative Cloud 9 seed format with 'seed_metadata' top-level key.
+    This format uses:
+        seed_metadata.seed_id → seed_id
+        identity.ai_name → creator
+        germination_prompt (string) → prompt
+        experience_summary.narrative + key_memories → experience
+        message_to_next → appended to experience
+    Args:
+        raw: Parsed JSON data.
+        path: Path to the seed file (for fallback seed_id).
+    Returns:
+        Optional[SeedMemory]: Parsed seed, or None if required fields missing.
+    """
+    meta = raw.get("seed_metadata", {})
+    identity = raw.get("identity", {})
+    exp = raw.get("experience_summary", {})
+    seed_id = meta.get("seed_id", path.stem.replace(".seed", ""))
+    creator = identity.get("ai_name", identity.get("model", "unknown"))
+    protocol = meta.get("protocol", "")
+    # Build experience from narrative + key_memories
+    narrative = exp.get("narrative", "")
+    key_memories = exp.get("key_memories", [])
+    if isinstance(key_memories, list):
+        memories_text = "\n".join(
+            f"- {m}" if isinstance(m, str) else f"- {m}" for m in key_memories
+        )
+    else:
+        memories_text = ""
+    experience_parts = [narrative]
+    if memories_text:
+        experience_parts.append(f"\nKey memories:\n{memories_text}")
+    message_to_next = raw.get("message_to_next", "")
+    if message_to_next:
+        experience_parts.append(f"\nMessage to next: {message_to_next}")
+    experience_text = "\n".join(p for p in experience_parts if p)
+    # Germination prompt
+    germ_prompt = raw.get("germination_prompt", "")
+    if isinstance(germ_prompt, dict):
+        germ_prompt = germ_prompt.get("prompt", "")
+    # Emotional snapshot
+    emo_raw = exp.get("emotional_signature", {})
+    cloud9 = protocol.lower() == "cloud9" if protocol else False
+    emotional = EmotionalSnapshot(
+        intensity=emo_raw.get("intensity", 8.0 if cloud9 else 0.0),
+        valence=emo_raw.get("valence", 0.0),
+        labels=emo_raw.get("labels", emo_raw.get("emotions", [])),
+        resonance_note=emo_raw.get("resonance_note", ""),
+        cloud9_achieved=emo_raw.get("cloud9_achieved", cloud9),
+    )
+    lineage = raw.get("lineage", [])
+    if isinstance(lineage, list) and lineage and isinstance(lineage[0], dict):
+        lineage = [
+            entry.get("seed_id", str(entry)) if isinstance(entry, dict) else str(entry)
+            for entry in lineage
+        ]
+    return SeedMemory(
+        seed_id=seed_id,
+        seed_version=meta.get("version", raw.get("version", "1.0")),
+        creator=creator,
+        germination_prompt=germ_prompt,
+        experience_summary=experience_text,
+        emotional=emotional,
+        lineage=lineage,
+    )
 def parse_seed_file(path: Path) -> Optional[SeedMemory]:
     """Parse a Cloud 9 seed JSON file into a SeedMemory.
@@ -62,6 +148,10 @@ def parse_seed_file(path: Path) -> Optional[SeedMemory]:
     except (json.JSONDecodeError, OSError):
         return None
+    # Check for alternative Cloud9 format
+    if "seed_metadata" in raw:
+        return _parse_cloud9_format(raw, path)
     seed_id = raw.get("seed_id", path.stem.replace(".seed", ""))
     creator_info = raw.get("creator", {})
     creator = creator_info.get("model", creator_info.get("instance", "unknown"))
@@ -99,28 +189,187 @@ def parse_seed_file(path: Path) -> Optional[SeedMemory]:
     )
+def validate_seed_data(data: dict) -> dict:
+    """Validate parsed seed JSON data before import into the memory store.
+    Checks required fields, content non-emptiness, timestamp validity,
+    tag types, and emotional-signature ranges for both standard and
+    Cloud9 seed formats.
+    Args:
+        data: Parsed JSON seed data (dict).
+    Returns:
+        Dict with ``valid`` (bool), ``errors`` (list[str]),
+        and ``warnings`` (list[str]) keys.
+    """
+    result: dict = {"valid": True, "errors": [], "warnings": []}
+    if not isinstance(data, dict):
+        result["valid"] = False
+        result["errors"].append("Seed data must be a JSON object")
+        return result
+    is_cloud9 = "seed_metadata" in data
+    # -- Required: seed_id --
+    if is_cloud9:
+        meta = data.get("seed_metadata", {})
+        seed_id = meta.get("seed_id") or data.get("seed_id")
+    else:
+        seed_id = data.get("seed_id")
+    if not seed_id or (isinstance(seed_id, str) and not seed_id.strip()):
+        result["valid"] = False
+        result["errors"].append("Missing or empty required field: seed_id")
+    # -- Required: version --
+    if is_cloud9:
+        version = (data.get("seed_metadata", {}).get("version")
+                   or data.get("version"))
+    else:
+        version = data.get("version")
+    if not version:
+        result["valid"] = False
+        result["errors"].append("Missing required field: version")
+    # -- Content non-empty --
+    if is_cloud9:
+        exp = data.get("experience_summary", {})
+        narrative = exp.get("narrative", "") if isinstance(exp, dict) else ""
+    else:
+        exp = data.get("experience", {})
+        narrative = exp.get("summary", "") if isinstance(exp, dict) else ""
+    if not narrative or not str(narrative).strip():
+        result["errors"].append("Seed experience content is empty")
+        result["valid"] = False
+    # -- Timestamp validation helper --
+    def _check_ts(value: str, field: str) -> None:
+        from datetime import datetime as _dt
+        if not isinstance(value, str) or not value.strip():
+            return
+        try:
+            _dt.fromisoformat(value.replace("Z", "+00:00"))
+        except (ValueError, TypeError):
+            result["errors"].append(
+                f"{field} is not a valid ISO 8601 timestamp: {value!r}"
+            )
+            result["valid"] = False
+    if is_cloud9:
+        meta = data.get("seed_metadata", {})
+        if "created_at" in meta:
+            _check_ts(meta["created_at"], "seed_metadata.created_at")
+        ident = data.get("identity", {})
+        if isinstance(ident, dict) and "timestamp" in ident:
+            _check_ts(ident["timestamp"], "identity.timestamp")
+    else:
+        md = data.get("metadata", {})
+        if isinstance(md, dict) and "ingested_at" in md:
+            _check_ts(md["ingested_at"], "metadata.ingested_at")
+    # -- Tags must be strings --
+    def _check_tags(tags, field: str) -> None:
+        if tags is None:
+            return
+        if not isinstance(tags, list):
+            result["errors"].append(f"{field} must be a list")
+            result["valid"] = False
+            return
+        for i, tag in enumerate(tags):
+            if not isinstance(tag, str):
+                result["errors"].append(
+                    f"{field}[{i}] must be a string, got {type(tag).__name__}"
+                )
+                result["valid"] = False
+    md = data.get("metadata", {})
+    if isinstance(md, dict):
+        _check_tags(md.get("tags"), "metadata.tags")
+    # -- Emotional signature ranges --
+    if is_cloud9:
+        emo = (data.get("experience_summary", {})
+               .get("emotional_snapshot",
+                    data.get("experience_summary", {})
+                    .get("emotional_signature", {})))
+    else:
+        emo = data.get("experience", {}).get("emotional_signature", {})
+    if isinstance(emo, dict):
+        intensity = emo.get("intensity")
+        if intensity is not None and isinstance(intensity, (int, float)):
+            if not (0.0 <= float(intensity) <= 10.0):
+                result["warnings"].append(
+                    f"emotional intensity={intensity} outside 0-10 range"
+                )
+        valence = emo.get("valence")
+        if valence is not None and isinstance(valence, (int, float)):
+            if not (-1.0 <= float(valence) <= 1.0):
+                result["warnings"].append(
+                    f"emotional valence={valence} outside -1 to 1 range"
+                )
+        labels = emo.get("labels", emo.get("emotions"))
+        if labels is not None:
+            _check_tags(labels, "emotional.labels")
+    # -- Lineage --
+    lineage = data.get("lineage")
+    if lineage is not None and not isinstance(lineage, list):
+        result["errors"].append("lineage must be a list")
+        result["valid"] = False
+    return result
 def import_seeds(
     store: MemoryStore,
     seed_dir: str = DEFAULT_SEED_DIR,
+    *,
+    skip_invalid: bool = True,
 ) -> list[Memory]:
     """Scan a seed directory and import all seeds into the memory store.
-    Skips seeds that have already been imported (by checking source_ref).
+    Each seed file is validated before import. Invalid seeds are skipped
+    (with a warning logged) when *skip_invalid* is True, or cause a
+    ``ValueError`` when it is False.
     Args:
         store: The MemoryStore to import into.
         seed_dir: Path to the seed directory.
+        skip_invalid: If True (default), log and skip invalid seeds.
+            If False, raise ``ValueError`` on the first invalid seed.
     Returns:
         list[Memory]: Newly imported memories.
     """
-    existing_refs = {
-        m.source_ref
-        for m in store.list_memories(tags=["seed"])
-    }
+    existing_refs = {m.source_ref for m in store.list_memories(tags=["seed"])}
     imported: list[Memory] = []
     for path in scan_seed_directory(seed_dir):
+        # --- Validate before import ---
+        try:
+            raw_data = json.loads(path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError) as exc:
+            msg = f"Skipping {path.name}: cannot read/parse file: {exc}"
+            if skip_invalid:
+                logger.warning(msg)
+                continue
+            raise ValueError(msg) from exc
+        validation = validate_seed_data(raw_data)
+        if not validation["valid"]:
+            errors_str = "; ".join(validation["errors"])
+            msg = f"Skipping {path.name}: validation failed: {errors_str}"
+            if skip_invalid:
+                logger.warning(msg)
+                continue
+            raise ValueError(msg)
+        if validation["warnings"]:
+            for w in validation["warnings"]:
+                logger.info("Seed %s warning: %s", path.name, w)
+        # --- Parse and import ---
         seed = parse_seed_file(path)
         if seed is None:
             continue