npm - arkaos - Versions diffs - 2.0.3 → 2.1.1 - Mend

arkaos 2.0.3 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 2.0.3
1	+ 2.1.1

package/core/budget/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

package/core/budget/__pycache__/manager.cpython-313.pyc CHANGED Viewed

Binary file

package/core/budget/__pycache__/schema.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/__pycache__/chunker.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/__pycache__/embedder.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/__pycache__/indexer.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/__pycache__/ingest.cpython-313.pyc ADDED Viewed

Binary file

package/core/knowledge/__pycache__/vector_store.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/ingest.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""Knowledge ingest engine — process YouTube, PDF, audio, web, markdown.
+Downloads, transcribes, extracts text, chunks, embeds, and indexes into
+the vector store. Reports progress via callback for real-time UI updates.
+"""
+import os
+import re
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Callable, Optional
+from core.knowledge.chunker import chunk_markdown
+from core.knowledge.vector_store import VectorStore
+@dataclass
+class IngestResult:
+    """Result of an ingest operation."""
+    source: str
+    source_type: str
+    text_length: int = 0
+    chunks_created: int = 0
+    title: str = ""
+    error: str = ""
+    success: bool = True
+ProgressCallback = Callable[[int, str], None]  # (percent, message)
+def detect_source_type(source: str) -> str:
+    """Auto-detect content type from URL or file extension."""
+    source_lower = source.lower()
+    # YouTube URLs
+    if any(domain in source_lower for domain in ["youtube.com", "youtu.be"]):
+        return "youtube"
+    # Web URLs
+    if source_lower.startswith(("http://", "https://")):
+        return "web"
+    # File extensions
+    ext = Path(source).suffix.lower()
+    if ext == ".pdf":
+        return "pdf"
+    if ext in (".mp3", ".wav", ".m4a", ".ogg", ".flac", ".webm"):
+        return "audio"
+    if ext in (".md", ".txt", ".rst"):
+        return "markdown"
+    return "unknown"
+class IngestEngine:
+    """Processes content from various sources into the vector store."""
+    def __init__(self, store: VectorStore, media_dir: str | Path = "") -> None:
+        self._store = store
+        self._media_dir = Path(media_dir) if media_dir else Path.home() / ".arkaos" / "media"
+        self._media_dir.mkdir(parents=True, exist_ok=True)
+    def ingest(
+        self,
+        source: str,
+        source_type: str = "",
+        on_progress: Optional[ProgressCallback] = None,
+        metadata: dict | None = None,
+    ) -> IngestResult:
+        """Ingest content from any supported source.
+        Args:
+            source: URL or file path.
+            source_type: youtube, pdf, audio, web, markdown. Auto-detected if empty.
+            on_progress: Callback(percent, message) for progress updates.
+            metadata: Extra metadata to attach to indexed chunks.
+        """
+        if not source_type:
+            source_type = detect_source_type(source)
+        progress = on_progress or (lambda p, m: None)
+        progress(0, f"Starting {source_type} ingest...")
+        processors = {
+            "youtube": self._process_youtube,
+            "pdf": self._process_pdf,
+            "audio": self._process_audio,
+            "web": self._process_web,
+            "markdown": self._process_markdown,
+        }
+        processor = processors.get(source_type)
+        if not processor:
+            return IngestResult(source=source, source_type=source_type, error=f"Unsupported type: {source_type}", success=False)
+        try:
+            text, title = processor(source, progress)
+        except Exception as e:
+            return IngestResult(source=source, source_type=source_type, error=str(e), success=False)
+        if not text or len(text.strip()) < 50:
+            return IngestResult(source=source, source_type=source_type, error="Extracted text too short", success=False)
+        # Chunk and index
+        progress(75, "Chunking content...")
+        chunks = chunk_markdown(text, max_tokens=512, source=source)
+        progress(85, f"Indexing {len(chunks)} chunks...")
+        texts = [c.text for c in chunks]
+        headings = [c.heading for c in chunks]
+        count = self._store.index_chunks(
+            texts=texts,
+            headings=headings,
+            source=source,
+            metadata={"type": source_type, "title": title, **(metadata or {})},
+        )
+        progress(100, f"Done — {count} chunks indexed")
+        return IngestResult(
+            source=source,
+            source_type=source_type,
+            text_length=len(text),
+            chunks_created=count,
+            title=title,
+            success=True,
+        )
+    def _process_youtube(self, url: str, progress: ProgressCallback) -> tuple[str, str]:
+        """Download YouTube video and transcribe audio."""
+        try:
+            import yt_dlp
+        except ImportError:
+            raise RuntimeError("yt-dlp not installed. Run: pip install yt-dlp")
+        progress(5, "Fetching video info...")
+        # Download audio only
+        audio_path = str(self._media_dir / "yt_audio.wav")
+        ydl_opts = {
+            "format": "bestaudio/best",
+            "outtmpl": str(self._media_dir / "yt_audio.%(ext)s"),
+            "postprocessors": [{
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "wav",
+                "preferredquality": "16",
+            }],
+            "quiet": True,
+            "no_warnings": True,
+        }
+        progress(10, "Downloading audio...")
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            title = info.get("title", "YouTube Video")
+        progress(35, "Transcribing audio...")
+        text = self._transcribe_audio(audio_path)
+        # Cleanup
+        try:
+            os.remove(audio_path)
+        except OSError:
+            pass
+        return text, title
+    def _process_pdf(self, path: str, progress: ProgressCallback) -> tuple[str, str]:
+        """Extract text from PDF."""
+        try:
+            import pdfplumber
+        except ImportError:
+            raise RuntimeError("pdfplumber not installed. Run: pip install pdfplumber")
+        progress(10, "Opening PDF...")
+        filepath = Path(path)
+        if not filepath.exists():
+            raise FileNotFoundError(f"PDF not found: {path}")
+        pages_text = []
+        with pdfplumber.open(filepath) as pdf:
+            total_pages = len(pdf.pages)
+            for i, page in enumerate(pdf.pages):
+                text = page.extract_text() or ""
+                pages_text.append(text)
+                pct = 10 + int((i / total_pages) * 60)
+                progress(pct, f"Extracting page {i + 1}/{total_pages}...")
+        title = filepath.stem.replace("-", " ").replace("_", " ")
+        return "\n\n".join(pages_text), title
+    def _process_audio(self, path: str, progress: ProgressCallback) -> tuple[str, str]:
+        """Transcribe audio file."""
+        progress(10, "Loading audio...")
+        filepath = Path(path)
+        if not filepath.exists():
+            raise FileNotFoundError(f"Audio not found: {path}")
+        progress(20, "Transcribing audio...")
+        text = self._transcribe_audio(str(filepath))
+        title = filepath.stem.replace("-", " ").replace("_", " ")
+        return text, title
+    def _process_web(self, url: str, progress: ProgressCallback) -> tuple[str, str]:
+        """Scrape web page content."""
+        try:
+            import requests
+            from bs4 import BeautifulSoup
+        except ImportError:
+            raise RuntimeError("beautifulsoup4 and requests not installed. Run: pip install beautifulsoup4 requests")
+        progress(10, "Fetching page...")
+        resp = requests.get(url, timeout=15, headers={
+            "User-Agent": "Mozilla/5.0 (ArkaOS Knowledge Indexer)"
+        })
+        resp.raise_for_status()
+        progress(40, "Parsing content...")
+        soup = BeautifulSoup(resp.text, "html.parser")
+        # Remove scripts, styles, nav, footer
+        for tag in soup(["script", "style", "nav", "footer", "header", "aside"]):
+            tag.decompose()
+        # Get title
+        title = soup.title.string if soup.title else url
+        # Get main content (article > main > body)
+        main = soup.find("article") or soup.find("main") or soup.find("body")
+        text = main.get_text(separator="\n\n", strip=True) if main else soup.get_text(separator="\n\n", strip=True)
+        # Clean up whitespace
+        text = re.sub(r'\n{3,}', '\n\n', text)
+        return text, title
+    def _process_markdown(self, path: str, progress: ProgressCallback) -> tuple[str, str]:
+        """Read markdown/text file directly."""
+        progress(10, "Reading file...")
+        filepath = Path(path)
+        if not filepath.exists():
+            raise FileNotFoundError(f"File not found: {path}")
+        text = filepath.read_text(encoding="utf-8")
+        title = filepath.stem.replace("-", " ").replace("_", " ")
+        return text, title
+    def _transcribe_audio(self, audio_path: str) -> str:
+        """Transcribe audio using faster-whisper (or fallback)."""
+        try:
+            from faster_whisper import WhisperModel
+            model = WhisperModel("base", device="cpu", compute_type="int8")
+            segments, _ = model.transcribe(audio_path, beam_size=5)
+            return " ".join(segment.text for segment in segments)
+        except ImportError:
+            pass
+        try:
+            import whisper
+            model = whisper.load_model("base")
+            result = model.transcribe(audio_path)
+            return result["text"]
+        except ImportError:
+            raise RuntimeError(
+                "No transcription engine available. Install one:\n"
+                "  pip install faster-whisper   (recommended, lighter)\n"
+                "  pip install openai-whisper   (original, heavier)"
+            )

package/core/obsidian/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

package/core/obsidian/__pycache__/templates.cpython-313.pyc CHANGED Viewed

Binary file

package/core/obsidian/__pycache__/writer.cpython-313.pyc CHANGED Viewed

Binary file

package/core/orchestration/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

package/core/orchestration/__pycache__/patterns.cpython-313.pyc CHANGED Viewed

Binary file

package/core/orchestration/__pycache__/protocol.cpython-313.pyc CHANGED Viewed

Binary file

package/core/personas/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Persona system — create, store, and clone personas as agents."""
+from core.personas.schema import Persona
+from core.personas.manager import PersonaManager
+__all__ = ["Persona", "PersonaManager"]

package/core/personas/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file

package/core/personas/__pycache__/manager.cpython-313.pyc ADDED Viewed

Binary file

package/core/personas/__pycache__/schema.cpython-313.pyc ADDED Viewed

Binary file

package/core/personas/manager.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Persona manager — CRUD operations and cloning to agents."""
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+import yaml
+from core.personas.schema import Persona
+class PersonaManager:
+    """Manages persona lifecycle: create, store, list, clone to agent."""
+    def __init__(self, storage_path: str | Path = "") -> None:
+        self._personas: dict[str, Persona] = {}
+        self._storage_path = Path(storage_path) if storage_path else None
+        if self._storage_path and self._storage_path.exists():
+            self._load()
+    def create(self, persona: Persona) -> Persona:
+        """Create a new persona."""
+        persona.created_at = datetime.now().isoformat()
+        persona.updated_at = persona.created_at
+        self._personas[persona.id] = persona
+        self._save()
+        return persona
+    def get(self, persona_id: str) -> Optional[Persona]:
+        return self._personas.get(persona_id)
+    def list_all(self) -> list[Persona]:
+        return list(self._personas.values())
+    def update(self, persona_id: str, updates: dict) -> Optional[Persona]:
+        persona = self._personas.get(persona_id)
+        if not persona:
+            return None
+        for key, value in updates.items():
+            if hasattr(persona, key):
+                setattr(persona, key, value)
+        persona.updated_at = datetime.now().isoformat()
+        self._save()
+        return persona
+    def delete(self, persona_id: str) -> bool:
+        if persona_id in self._personas:
+            del self._personas[persona_id]
+            self._save()
+            return True
+        return False
+    def clone_to_agent(
+        self,
+        persona_id: str,
+        department: str = "strategy",
+        tier: int = 2,
+        agents_dir: str | Path = "",
+    ) -> Optional[str]:
+        """Clone a persona to an ArkaOS agent YAML file.
+        Returns the agent ID if successful, None if persona not found.
+        """
+        persona = self._personas.get(persona_id)
+        if not persona:
+            return None
+        agent_data = persona.to_agent_yaml(department=department, tier=tier)
+        agent_id = agent_data["id"]
+        if agents_dir:
+            output_dir = Path(agents_dir)
+            output_dir.mkdir(parents=True, exist_ok=True)
+            output_path = output_dir / f"{agent_id}.yaml"
+            with open(output_path, "w") as f:
+                yaml.dump(agent_data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
+        # Track the clone
+        persona.cloned_to_agents.append(agent_id)
+        persona.updated_at = datetime.now().isoformat()
+        self._save()
+        return agent_id
+    def _save(self) -> None:
+        if self._storage_path is None:
+            return
+        self._storage_path.parent.mkdir(parents=True, exist_ok=True)
+        data = {pid: p.model_dump(mode="json") for pid, p in self._personas.items()}
+        with open(self._storage_path, "w") as f:
+            json.dump(data, f, indent=2)
+    def _load(self) -> None:
+        if self._storage_path is None or not self._storage_path.exists():
+            return
+        content = self._storage_path.read_text().strip()
+        if not content:
+            return
+        data = json.loads(content)
+        for pid, pdata in data.items():
+            self._personas[pid] = Persona.model_validate(pdata)

package/core/personas/schema.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""Persona schema — models for persona creation and cloning."""
+from datetime import datetime
+from typing import Optional, Any
+from pydantic import BaseModel, Field
+class PersonaDISC(BaseModel):
+    primary: str = "C"
+    secondary: str = "S"
+    communication_style: str = ""
+    under_pressure: str = ""
+    motivator: str = ""
+class PersonaEnneagram(BaseModel):
+    type: int = 5
+    wing: int = 6
+    core_motivation: str = ""
+    core_fear: str = ""
+    subtype: str = "self-preservation"
+class PersonaBigFive(BaseModel):
+    openness: int = 50
+    conscientiousness: int = 50
+    extraversion: int = 50
+    agreeableness: int = 50
+    neuroticism: int = 50
+class PersonaCommunication(BaseModel):
+    tone: str = ""
+    vocabulary_level: str = "specialist"
+    preferred_format: str = ""
+    avoid: list[str] = Field(default_factory=list)
+class Persona(BaseModel):
+    """A persona based on a real person or archetype."""
+    id: str
+    name: str
+    title: str = ""                     # e.g., "Business Strategy", "Growth Marketing"
+    tagline: str = ""                   # e.g., "The Natural Commander with emotional depth"
+    source: str = ""                    # e.g., "Alex Hormozi", "Naval Ravikant"
+    avatar_url: str = ""
+    # Behavioral DNA
+    disc: PersonaDISC = Field(default_factory=PersonaDISC)
+    enneagram: PersonaEnneagram = Field(default_factory=PersonaEnneagram)
+    big_five: PersonaBigFive = Field(default_factory=PersonaBigFive)
+    mbti: str = "INTJ"
+    # Knowledge
+    mental_models: list[str] = Field(default_factory=list)
+    expertise_domains: list[str] = Field(default_factory=list)
+    frameworks: list[str] = Field(default_factory=list)
+    key_quotes: list[str] = Field(default_factory=list)
+    # Communication
+    communication: PersonaCommunication = Field(default_factory=PersonaCommunication)
+    # Metadata
+    created_at: str = ""
+    updated_at: str = ""
+    cloned_to_agents: list[str] = Field(default_factory=list)
+    def to_agent_yaml(self, department: str = "strategy", tier: int = 2) -> dict:
+        """Convert persona to an ArkaOS agent YAML structure."""
+        agent_id = f"persona-{self.id}"
+        return {
+            "id": agent_id,
+            "name": self.name,
+            "role": self.title or f"{self.source} Persona",
+            "department": department,
+            "tier": tier,
+            "behavioral_dna": {
+                "disc": {
+                    "primary": self.disc.primary,
+                    "secondary": self.disc.secondary,
+                    "communication_style": self.disc.communication_style,
+                    "under_pressure": self.disc.under_pressure,
+                    "motivator": self.disc.motivator,
+                },
+                "enneagram": {
+                    "type": self.enneagram.type,
+                    "wing": self.enneagram.wing,
+                    "core_motivation": self.enneagram.core_motivation,
+                    "core_fear": self.enneagram.core_fear,
+                    "subtype": self.enneagram.subtype,
+                },
+                "big_five": {
+                    "openness": self.big_five.openness,
+                    "conscientiousness": self.big_five.conscientiousness,
+                    "extraversion": self.big_five.extraversion,
+                    "agreeableness": self.big_five.agreeableness,
+                    "neuroticism": self.big_five.neuroticism,
+                },
+                "mbti": {"type": self.mbti},
+            },
+            "mental_models": {
+                "primary": self.mental_models[:3],
+                "secondary": self.mental_models[3:6],
+            },
+            "authority": {
+                "veto": False,
+                "approve_budget": False,
+                "approve_architecture": False,
+                "orchestrate": False,
+                "delegates_to": [],
+                "escalates_to": None,
+            },
+            "expertise": {
+                "domains": self.expertise_domains[:5],
+                "frameworks": self.frameworks[:5],
+                "depth": "advanced",
+                "years_equivalent": 10,
+            },
+            "communication": {
+                "language": "en",
+                "tone": self.communication.tone,
+                "vocabulary_level": self.communication.vocabulary_level,
+                "preferred_format": self.communication.preferred_format,
+                "avoid": self.communication.avoid,
+            },
+        }

package/core/runtime/__pycache__/subagent.cpython-313.pyc CHANGED Viewed

Binary file

package/core/squads/__pycache__/schema.cpython-313.pyc CHANGED Viewed

Binary file

package/core/synapse/__pycache__/engine.cpython-313.pyc CHANGED Viewed

Binary file

package/core/synapse/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary file

package/core/tasks/__pycache__/schema.cpython-313.pyc CHANGED Viewed

Binary file

package/core/workflow/__pycache__/engine.cpython-313.pyc CHANGED Viewed

Binary file

package/core/workflow/__pycache__/schema.cpython-313.pyc CHANGED Viewed

Binary file

package/installer/cli.js CHANGED Viewed

@@ -39,6 +39,7 @@ Usage:
   npx arkaos init             Initialize project config (.arkaos.json)
   npx arkaos update           Update to latest version
   npx arkaos migrate          Migrate from v1 to v2
+  npx arkaos dashboard        Start monitoring dashboard
   npx arkaos doctor           Run health checks
   npx arkaos uninstall        Remove ArkaOS
@@ -98,6 +99,18 @@ async function main() {
       await migrate();
       break;
+    case "dashboard": {
+      const { execSync: execDash } = await import("node:child_process");
+      const repoRootDash = dirname(fileURLToPath(import.meta.url)).replace(/\/installer$/, "");
+      try {
+        execDash(`bash "${repoRootDash}/scripts/start-dashboard.sh"`, {
+          stdio: "inherit",
+          env: { ...process.env, ARKAOS_ROOT: repoRootDash },
+        });
+      } catch { process.exit(1); }
+      break;
+    }
     case "index": {
       const { execSync } = await import("node:child_process");
       const indexArgs = positionals.slice(1).join(" ");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "arkaos",
-  "version": "2.0.3",
+  "version": "2.1.1",
   "description": "The Operating System for AI Agent Teams",
   "type": "module",
   "bin": {

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "arkaos-core"
-version = "2.0.3"
+version = "2.1.1"
 description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
 readme = "README.md"
 license = {text = "MIT"}
@@ -32,6 +32,17 @@ knowledge = [
     "fastembed>=0.8.0",
     "sqlite-vss>=0.1.2",
 ]
+dashboard = [
+    "fastapi>=0.115.0",
+    "uvicorn>=0.32.0",
+]
+ingest = [
+    "yt-dlp>=2024.0",
+    "faster-whisper>=1.0.0",
+    "pdfplumber>=0.11.0",
+    "beautifulsoup4>=4.12.0",
+    "requests>=2.31.0",
+]
 dev = [
     "pytest>=8.0",
     "pytest-cov>=5.0",