npm - arkaos - Versions diffs - 2.2.0 → 2.2.2 - Mend

arkaos 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/VERSION +1 -1
package/arka/SKILL.md +12 -6
package/core/jobs/__init__.py +5 -0
package/core/jobs/__pycache__/__init__.cpython-313.pyc +0 -0
package/core/jobs/__pycache__/manager.cpython-313.pyc +0 -0
package/core/jobs/manager.py +172 -0
package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
package/core/knowledge/ingest.py +80 -19
package/package.json +1 -1
package/pyproject.toml +1 -1

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 2.2.0
1	+ 2.2.2

package/arka/SKILL.md CHANGED Viewed

@@ -1,16 +1,16 @@
 ---
 name: arka
 description: >
-  ArkaOS v2 main orchestrator. Routes commands to 16 departments, resolves natural language
-  to slash commands, runs standups, system monitoring, and cross-department coordination.
-  The entry point for every user interaction.
+  ArkaOS v2 main orchestrator. Routes commands to 17 departments, resolves natural language
+  to slash commands, runs standups, system monitoring, dashboard, knowledge base, personas,
+  and cross-department coordination. The entry point for every user interaction.
 allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
 ---
 # ArkaOS v2 — Main Orchestrator
 > **The Operating System for AI Agent Teams**
-> 56 agents. 16 departments. ~180 commands. Multi-runtime.
+> 65 agents. 17 departments. 244+ skills. Multi-runtime. Dashboard. Knowledge RAG.
 ## System Commands
@@ -23,6 +23,11 @@ allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
 | `/arka help` | List all department commands |
 | `/arka setup` | Interactive profile setup (name, company, role, objectives) |
 | `/arka conclave` | Activate personal AI advisory board (The Conclave) |
+| `/arka dashboard` | Open monitoring dashboard (localhost:3333) |
+| `/arka index` | Index Obsidian vault into knowledge base |
+| `/arka search <query>` | Semantic search in knowledge base |
+| `/arka keys` | Manage API keys (OpenAI, Google, fal.ai) |
+| `/arka personas` | Manage AI personas (create, clone to agent) |
 | `/do <description>` | Universal routing — natural language to department command |
 ## Universal Orchestrator (/do)
@@ -99,8 +104,9 @@ Every workflow includes a Quality Gate phase before delivery:
 | Tier | Role | Count | Authority |
 |------|------|-------|-----------|
 | 0 | C-Suite | 6 | Veto power, strategic decisions |
-| 1 | Squad Leads | 15 | Orchestrate department, domain decisions |
-| 2 | Specialists | 35 | Execute within domain expertise |
+| 1 | Squad Leads | 16 | Orchestrate department, domain decisions |
+| 2 | Specialists | 40 | Execute within domain expertise |
+| 3 | Support | 3 | Research, documentation, data collection |
 ## Cross-Department Collaboration

package/core/jobs/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Job queue — SQLite-based persistent job tracking."""
+from core.jobs.manager import JobManager, Job
+__all__ = ["JobManager", "Job"]

package/core/jobs/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file

package/core/jobs/__pycache__/manager.cpython-313.pyc ADDED Viewed

Binary file

package/core/jobs/manager.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""SQLite-based job queue for persistent task tracking.
+Cross-platform (Mac, Linux, Windows). Thread-safe. Survives restarts.
+"""
+import sqlite3
+import uuid
+from dataclasses import dataclass, asdict
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+@dataclass
+class Job:
+    id: str
+    type: str = ""            # youtube, pdf, audio, web, markdown, kb_index
+    source: str = ""          # URL or file path
+    title: str = ""
+    status: str = "queued"    # queued, processing, downloading, transcribing, embedding, completed, failed, cancelled
+    progress: int = 0         # 0-100
+    message: str = ""         # Current step description
+    chunks_created: int = 0
+    media_path: str = ""      # Path to downloaded media file
+    error: str = ""
+    created_at: str = ""
+    started_at: str = ""
+    completed_at: str = ""
+    def to_dict(self) -> dict:
+        return asdict(self)
+class JobManager:
+    """SQLite-backed job queue. Thread-safe for concurrent reads."""
+    def __init__(self, db_path: str | Path = ""):
+        self._db_path = str(db_path) if db_path else str(Path.home() / ".arkaos" / "jobs.db")
+        Path(self._db_path).parent.mkdir(parents=True, exist_ok=True)
+        self._init_db()
+    def _conn(self) -> sqlite3.Connection:
+        conn = sqlite3.connect(self._db_path)
+        conn.row_factory = sqlite3.Row
+        conn.execute("PRAGMA journal_mode=WAL")  # Better concurrency
+        return conn
+    def _init_db(self) -> None:
+        with self._conn() as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS jobs (
+                    id TEXT PRIMARY KEY,
+                    type TEXT DEFAULT '',
+                    source TEXT DEFAULT '',
+                    title TEXT DEFAULT '',
+                    status TEXT DEFAULT 'queued',
+                    progress INTEGER DEFAULT 0,
+                    message TEXT DEFAULT '',
+                    chunks_created INTEGER DEFAULT 0,
+                    media_path TEXT DEFAULT '',
+                    error TEXT DEFAULT '',
+                    created_at TEXT DEFAULT '',
+                    started_at TEXT DEFAULT '',
+                    completed_at TEXT DEFAULT ''
+                )
+            """)
+    def create(self, source: str, source_type: str, title: str = "") -> Job:
+        job = Job(
+            id=f"job-{uuid.uuid4().hex[:8]}",
+            type=source_type,
+            source=source,
+            title=title or f"{source_type}: {source[:60]}",
+            status="queued",
+            created_at=datetime.now().isoformat(),
+        )
+        with self._conn() as conn:
+            conn.execute(
+                "INSERT INTO jobs (id, type, source, title, status, progress, message, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+                (job.id, job.type, job.source, job.title, job.status, 0, "Queued", job.created_at),
+            )
+        return job
+    def get(self, job_id: str) -> Optional[Job]:
+        with self._conn() as conn:
+            row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
+            if not row:
+                return None
+            return Job(**dict(row))
+    def update_progress(self, job_id: str, progress: int, message: str, status: str = "processing") -> None:
+        with self._conn() as conn:
+            conn.execute(
+                "UPDATE jobs SET progress = ?, message = ?, status = ? WHERE id = ?",
+                (progress, message, status, job_id),
+            )
+    def start(self, job_id: str) -> None:
+        with self._conn() as conn:
+            conn.execute(
+                "UPDATE jobs SET status = 'processing', started_at = ? WHERE id = ?",
+                (datetime.now().isoformat(), job_id),
+            )
+    def complete(self, job_id: str, chunks_created: int = 0, media_path: str = "") -> None:
+        with self._conn() as conn:
+            conn.execute(
+                "UPDATE jobs SET status = 'completed', progress = 100, message = 'Done', chunks_created = ?, media_path = ?, completed_at = ? WHERE id = ?",
+                (chunks_created, media_path, datetime.now().isoformat(), job_id),
+            )
+    def fail(self, job_id: str, error: str) -> None:
+        with self._conn() as conn:
+            conn.execute(
+                "UPDATE jobs SET status = 'failed', error = ?, completed_at = ? WHERE id = ?",
+                (error, datetime.now().isoformat(), job_id),
+            )
+    def cancel(self, job_id: str) -> bool:
+        with self._conn() as conn:
+            result = conn.execute(
+                "UPDATE jobs SET status = 'cancelled', completed_at = ? WHERE id = ? AND status = 'queued'",
+                (datetime.now().isoformat(), job_id),
+            )
+            return result.rowcount > 0
+    def list_all(self, limit: int = 50) -> list[Job]:
+        with self._conn() as conn:
+            rows = conn.execute(
+                "SELECT * FROM jobs ORDER BY created_at DESC LIMIT ?", (limit,)
+            ).fetchall()
+            return [Job(**dict(r)) for r in rows]
+    def list_active(self) -> list[Job]:
+        with self._conn() as conn:
+            rows = conn.execute(
+                "SELECT * FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding') ORDER BY created_at ASC"
+            ).fetchall()
+            return [Job(**dict(r)) for r in rows]
+    def list_by_status(self, status: str, limit: int = 50) -> list[Job]:
+        with self._conn() as conn:
+            rows = conn.execute(
+                "SELECT * FROM jobs WHERE status = ? ORDER BY created_at DESC LIMIT ?", (status, limit)
+            ).fetchall()
+            return [Job(**dict(r)) for r in rows]
+    def summary(self) -> dict:
+        with self._conn() as conn:
+            total = conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
+            active = conn.execute("SELECT COUNT(*) FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding')").fetchone()[0]
+            completed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'completed'").fetchone()[0]
+            failed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'failed'").fetchone()[0]
+            total_chunks = conn.execute("SELECT COALESCE(SUM(chunks_created), 0) FROM jobs WHERE status = 'completed'").fetchone()[0]
+            return {
+                "total": total,
+                "active": active,
+                "completed": completed,
+                "failed": failed,
+                "total_chunks": total_chunks,
+            }
+    def clear_completed(self, keep_last: int = 20) -> int:
+        with self._conn() as conn:
+            rows = conn.execute(
+                "SELECT id FROM jobs WHERE status IN ('completed', 'failed', 'cancelled') ORDER BY completed_at DESC"
+            ).fetchall()
+            to_delete = [r["id"] for r in rows[keep_last:]]
+            if to_delete:
+                placeholders = ",".join("?" * len(to_delete))
+                conn.execute(f"DELETE FROM jobs WHERE id IN ({placeholders})", to_delete)
+            return len(to_delete)

package/core/knowledge/__pycache__/ingest.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/ingest.py CHANGED Viewed

@@ -106,16 +106,30 @@ class IngestEngine:
         # Chunk and index
         progress(75, "Chunking content...")
         chunks = chunk_markdown(text, max_tokens=512, source=source)
+        total_chunks = len(chunks)
-        progress(85, f"Indexing {len(chunks)} chunks...")
+        if total_chunks == 0:
+            progress(100, "No chunks to index")
+            return IngestResult(source=source, source_type=source_type, text_length=len(text), chunks_created=0, title=title, success=True)
+        # Index in batches with granular progress (85→99%)
         texts = [c.text for c in chunks]
         headings = [c.heading for c in chunks]
-        count = self._store.index_chunks(
-            texts=texts,
-            headings=headings,
-            source=source,
-            metadata={"type": source_type, "title": title, **(metadata or {})},
-        )
+        batch_size = 10
+        count = 0
+        for i in range(0, total_chunks, batch_size):
+            batch_end = min(i + batch_size, total_chunks)
+            pct = 85 + int((i / total_chunks) * 14)
+            progress(pct, f"Embedding & indexing chunks {i + 1}—{batch_end} of {total_chunks}...")
+            batch_count = self._store.index_chunks(
+                texts=texts[i:batch_end],
+                headings=headings[i:batch_end] if headings else None,
+                source=source,
+                metadata={"type": source_type, "title": title, **(metadata or {})},
+            )
+            count += batch_count
         progress(100, f"Done — {count} chunks indexed")
@@ -145,15 +159,33 @@ class IngestEngine:
         )
     def _process_youtube(self, url: str, progress: ProgressCallback) -> tuple[str, str]:
-        """Download YouTube video and transcribe audio."""
+        """Download YouTube video and transcribe audio.
+        5 distinct phases with clear progress:
+        Phase 1: Fetch video info (0-5%)
+        Phase 2: Download video (5-25%)
+        Phase 3: Extract audio (25-35%)
+        Phase 4: Transcribe audio (35-65%)
+        Phase 5: Return text for chunking/indexing (handled by caller, 75-100%)
+        """
         try:
             import yt_dlp
         except ImportError:
             raise RuntimeError("yt-dlp not installed. Run: pip install yt-dlp")
-        progress(5, "Fetching video info...")
+        # === Phase 1: Fetch video info ===
+        progress(2, "Phase 1/4 — Fetching video info...")
+        try:
+            with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
+                info = ydl.extract_info(url, download=False)
+                title = info.get("title", "YouTube Video")
+                duration = info.get("duration", 0)
+                progress(5, f"Phase 1/4 — Found: {title} ({duration}s)")
+        except Exception as e:
+            raise RuntimeError(f"YouTube access failed: {str(e)[:200]}")
-        # Download audio only
+        # === Phase 2: Download video + extract audio ===
+        progress(8, f"Phase 2/4 — Downloading video...")
         audio_path = str(self._media_dir / "yt_audio.wav")
         ydl_opts = {
             "format": "bestaudio/best",
@@ -165,21 +197,50 @@ class IngestEngine:
             }],
             "quiet": True,
             "no_warnings": True,
+            "progress_hooks": [lambda d: progress(
+                8 + int((d.get("downloaded_bytes", 0) / max(d.get("total_bytes", 1), 1)) * 17),
+                f"Phase 2/4 — Downloading... {d.get('_percent_str', '').strip()}"
+            ) if d.get("status") == "downloading" else None],
         }
-        progress(10, "Downloading audio...")
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=True)
-            title = info.get("title", "YouTube Video")
-        progress(35, "Transcribing audio...")
+            ydl.extract_info(url, download=True)
+        # === Phase 3: Extract audio (FFmpeg post-processing) ===
+        progress(28, "Phase 3/4 — Extracting audio from video...")
+        # Verify audio file exists
+        if not os.path.exists(audio_path):
+            # Try to find the downloaded file with different extension
+            for ext in ["wav", "m4a", "webm", "mp3", "opus"]:
+                alt = str(self._media_dir / f"yt_audio.{ext}")
+                if os.path.exists(alt):
+                    audio_path = alt
+                    break
+            else:
+                raise RuntimeError("Audio extraction failed — no output file found")
+        audio_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
+        progress(35, f"Phase 3/4 — Audio extracted ({audio_size_mb:.1f} MB)")
+        # === Phase 4: Transcribe audio ===
+        progress(38, "Phase 4/4 — Transcribing audio (this may take a while)...")
         text = self._transcribe_audio(audio_path)
-        # Cleanup
+        if not text or len(text.strip()) < 20:
+            raise RuntimeError("Transcription produced no usable text")
+        word_count = len(text.split())
+        progress(70, f"Phase 4/4 — Transcribed: {word_count} words")
+        # Rename audio to include title for easy identification
+        safe_title = "".join(c if c.isalnum() or c in " -_" else "" for c in title)[:50].strip()
+        final_audio = self._media_dir / f"{safe_title}.wav"
         try:
-            os.remove(audio_path)
-        except OSError:
-            pass
+            import shutil
+            shutil.move(audio_path, str(final_audio))
+        except Exception:
+            final_audio = Path(audio_path)
         return text, title

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "arkaos",
-  "version": "2.2.0",
+  "version": "2.2.2",
   "description": "The Operating System for AI Agent Teams",
   "type": "module",
   "bin": {

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "arkaos-core"
-version = "2.2.0"
+version = "2.2.2"
 description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
 readme = "README.md"
 license = {text = "MIT"}