arkaos 2.2.1 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 2.2.1
1
+ 2.2.2
package/arka/SKILL.md CHANGED
@@ -1,16 +1,16 @@
1
1
  ---
2
2
  name: arka
3
3
  description: >
4
- ArkaOS v2 main orchestrator. Routes commands to 16 departments, resolves natural language
5
- to slash commands, runs standups, system monitoring, and cross-department coordination.
6
- The entry point for every user interaction.
4
+ ArkaOS v2 main orchestrator. Routes commands to 17 departments, resolves natural language
5
+ to slash commands, runs standups, system monitoring, dashboard, knowledge base, personas,
6
+ and cross-department coordination. The entry point for every user interaction.
7
7
  allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
8
8
  ---
9
9
 
10
10
  # ArkaOS v2 — Main Orchestrator
11
11
 
12
12
  > **The Operating System for AI Agent Teams**
13
- > 56 agents. 16 departments. ~180 commands. Multi-runtime.
13
+ > 65 agents. 17 departments. 244+ skills. Multi-runtime. Dashboard. Knowledge RAG.
14
14
 
15
15
  ## System Commands
16
16
 
@@ -23,6 +23,11 @@ allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
23
23
  | `/arka help` | List all department commands |
24
24
  | `/arka setup` | Interactive profile setup (name, company, role, objectives) |
25
25
  | `/arka conclave` | Activate personal AI advisory board (The Conclave) |
26
+ | `/arka dashboard` | Open monitoring dashboard (localhost:3333) |
27
+ | `/arka index` | Index Obsidian vault into knowledge base |
28
+ | `/arka search <query>` | Semantic search in knowledge base |
29
+ | `/arka keys` | Manage API keys (OpenAI, Google, fal.ai) |
30
+ | `/arka personas` | Manage AI personas (create, clone to agent) |
26
31
  | `/do <description>` | Universal routing — natural language to department command |
27
32
 
28
33
  ## Universal Orchestrator (/do)
@@ -99,8 +104,9 @@ Every workflow includes a Quality Gate phase before delivery:
99
104
  | Tier | Role | Count | Authority |
100
105
  |------|------|-------|-----------|
101
106
  | 0 | C-Suite | 6 | Veto power, strategic decisions |
102
- | 1 | Squad Leads | 15 | Orchestrate department, domain decisions |
103
- | 2 | Specialists | 35 | Execute within domain expertise |
107
+ | 1 | Squad Leads | 16 | Orchestrate department, domain decisions |
108
+ | 2 | Specialists | 40 | Execute within domain expertise |
109
+ | 3 | Support | 3 | Research, documentation, data collection |
104
110
 
105
111
  ## Cross-Department Collaboration
106
112
 
@@ -0,0 +1,5 @@
1
+ """Job queue — SQLite-based persistent job tracking."""
2
+
3
+ from core.jobs.manager import JobManager, Job
4
+
5
+ __all__ = ["JobManager", "Job"]
@@ -0,0 +1,172 @@
1
+ """SQLite-based job queue for persistent task tracking.
2
+
3
+ Cross-platform (Mac, Linux, Windows). Thread-safe. Survives restarts.
4
+ """
5
+
6
+ import sqlite3
7
+ import uuid
8
+ from dataclasses import dataclass, asdict
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+
14
+ @dataclass
15
+ class Job:
16
+ id: str
17
+ type: str = "" # youtube, pdf, audio, web, markdown, kb_index
18
+ source: str = "" # URL or file path
19
+ title: str = ""
20
+ status: str = "queued" # queued, processing, downloading, transcribing, embedding, completed, failed, cancelled
21
+ progress: int = 0 # 0-100
22
+ message: str = "" # Current step description
23
+ chunks_created: int = 0
24
+ media_path: str = "" # Path to downloaded media file
25
+ error: str = ""
26
+ created_at: str = ""
27
+ started_at: str = ""
28
+ completed_at: str = ""
29
+
30
+ def to_dict(self) -> dict:
31
+ return asdict(self)
32
+
33
+
34
+ class JobManager:
35
+ """SQLite-backed job queue. Thread-safe for concurrent reads."""
36
+
37
+ def __init__(self, db_path: str | Path = ""):
38
+ self._db_path = str(db_path) if db_path else str(Path.home() / ".arkaos" / "jobs.db")
39
+ Path(self._db_path).parent.mkdir(parents=True, exist_ok=True)
40
+ self._init_db()
41
+
42
+ def _conn(self) -> sqlite3.Connection:
43
+ conn = sqlite3.connect(self._db_path)
44
+ conn.row_factory = sqlite3.Row
45
+ conn.execute("PRAGMA journal_mode=WAL") # Better concurrency
46
+ return conn
47
+
48
+ def _init_db(self) -> None:
49
+ with self._conn() as conn:
50
+ conn.execute("""
51
+ CREATE TABLE IF NOT EXISTS jobs (
52
+ id TEXT PRIMARY KEY,
53
+ type TEXT DEFAULT '',
54
+ source TEXT DEFAULT '',
55
+ title TEXT DEFAULT '',
56
+ status TEXT DEFAULT 'queued',
57
+ progress INTEGER DEFAULT 0,
58
+ message TEXT DEFAULT '',
59
+ chunks_created INTEGER DEFAULT 0,
60
+ media_path TEXT DEFAULT '',
61
+ error TEXT DEFAULT '',
62
+ created_at TEXT DEFAULT '',
63
+ started_at TEXT DEFAULT '',
64
+ completed_at TEXT DEFAULT ''
65
+ )
66
+ """)
67
+
68
+ def create(self, source: str, source_type: str, title: str = "") -> Job:
69
+ job = Job(
70
+ id=f"job-{uuid.uuid4().hex[:8]}",
71
+ type=source_type,
72
+ source=source,
73
+ title=title or f"{source_type}: {source[:60]}",
74
+ status="queued",
75
+ created_at=datetime.now().isoformat(),
76
+ )
77
+ with self._conn() as conn:
78
+ conn.execute(
79
+ "INSERT INTO jobs (id, type, source, title, status, progress, message, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
80
+ (job.id, job.type, job.source, job.title, job.status, 0, "Queued", job.created_at),
81
+ )
82
+ return job
83
+
84
+ def get(self, job_id: str) -> Optional[Job]:
85
+ with self._conn() as conn:
86
+ row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
87
+ if not row:
88
+ return None
89
+ return Job(**dict(row))
90
+
91
+ def update_progress(self, job_id: str, progress: int, message: str, status: str = "processing") -> None:
92
+ with self._conn() as conn:
93
+ conn.execute(
94
+ "UPDATE jobs SET progress = ?, message = ?, status = ? WHERE id = ?",
95
+ (progress, message, status, job_id),
96
+ )
97
+
98
+ def start(self, job_id: str) -> None:
99
+ with self._conn() as conn:
100
+ conn.execute(
101
+ "UPDATE jobs SET status = 'processing', started_at = ? WHERE id = ?",
102
+ (datetime.now().isoformat(), job_id),
103
+ )
104
+
105
+ def complete(self, job_id: str, chunks_created: int = 0, media_path: str = "") -> None:
106
+ with self._conn() as conn:
107
+ conn.execute(
108
+ "UPDATE jobs SET status = 'completed', progress = 100, message = 'Done', chunks_created = ?, media_path = ?, completed_at = ? WHERE id = ?",
109
+ (chunks_created, media_path, datetime.now().isoformat(), job_id),
110
+ )
111
+
112
+ def fail(self, job_id: str, error: str) -> None:
113
+ with self._conn() as conn:
114
+ conn.execute(
115
+ "UPDATE jobs SET status = 'failed', error = ?, completed_at = ? WHERE id = ?",
116
+ (error, datetime.now().isoformat(), job_id),
117
+ )
118
+
119
+ def cancel(self, job_id: str) -> bool:
120
+ with self._conn() as conn:
121
+ result = conn.execute(
122
+ "UPDATE jobs SET status = 'cancelled', completed_at = ? WHERE id = ? AND status = 'queued'",
123
+ (datetime.now().isoformat(), job_id),
124
+ )
125
+ return result.rowcount > 0
126
+
127
+ def list_all(self, limit: int = 50) -> list[Job]:
128
+ with self._conn() as conn:
129
+ rows = conn.execute(
130
+ "SELECT * FROM jobs ORDER BY created_at DESC LIMIT ?", (limit,)
131
+ ).fetchall()
132
+ return [Job(**dict(r)) for r in rows]
133
+
134
+ def list_active(self) -> list[Job]:
135
+ with self._conn() as conn:
136
+ rows = conn.execute(
137
+ "SELECT * FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding') ORDER BY created_at ASC"
138
+ ).fetchall()
139
+ return [Job(**dict(r)) for r in rows]
140
+
141
+ def list_by_status(self, status: str, limit: int = 50) -> list[Job]:
142
+ with self._conn() as conn:
143
+ rows = conn.execute(
144
+ "SELECT * FROM jobs WHERE status = ? ORDER BY created_at DESC LIMIT ?", (status, limit)
145
+ ).fetchall()
146
+ return [Job(**dict(r)) for r in rows]
147
+
148
+ def summary(self) -> dict:
149
+ with self._conn() as conn:
150
+ total = conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
151
+ active = conn.execute("SELECT COUNT(*) FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding')").fetchone()[0]
152
+ completed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'completed'").fetchone()[0]
153
+ failed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'failed'").fetchone()[0]
154
+ total_chunks = conn.execute("SELECT COALESCE(SUM(chunks_created), 0) FROM jobs WHERE status = 'completed'").fetchone()[0]
155
+ return {
156
+ "total": total,
157
+ "active": active,
158
+ "completed": completed,
159
+ "failed": failed,
160
+ "total_chunks": total_chunks,
161
+ }
162
+
163
+ def clear_completed(self, keep_last: int = 20) -> int:
164
+ with self._conn() as conn:
165
+ rows = conn.execute(
166
+ "SELECT id FROM jobs WHERE status IN ('completed', 'failed', 'cancelled') ORDER BY completed_at DESC"
167
+ ).fetchall()
168
+ to_delete = [r["id"] for r in rows[keep_last:]]
169
+ if to_delete:
170
+ placeholders = ",".join("?" * len(to_delete))
171
+ conn.execute(f"DELETE FROM jobs WHERE id IN ({placeholders})", to_delete)
172
+ return len(to_delete)
@@ -159,15 +159,33 @@ class IngestEngine:
159
159
  )
160
160
 
161
161
  def _process_youtube(self, url: str, progress: ProgressCallback) -> tuple[str, str]:
162
- """Download YouTube video and transcribe audio."""
162
+ """Download YouTube video and transcribe audio.
163
+
164
+ 5 distinct phases with clear progress:
165
+ Phase 1: Fetch video info (0-5%)
166
+ Phase 2: Download video (5-25%)
167
+ Phase 3: Extract audio (25-35%)
168
+ Phase 4: Transcribe audio (35-65%)
169
+ Phase 5: Return text for chunking/indexing (handled by caller, 75-100%)
170
+ """
163
171
  try:
164
172
  import yt_dlp
165
173
  except ImportError:
166
174
  raise RuntimeError("yt-dlp not installed. Run: pip install yt-dlp")
167
175
 
168
- progress(5, "Fetching video info...")
176
+ # === Phase 1: Fetch video info ===
177
+ progress(2, "Phase 1/4 — Fetching video info...")
178
+ try:
179
+ with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
180
+ info = ydl.extract_info(url, download=False)
181
+ title = info.get("title", "YouTube Video")
182
+ duration = info.get("duration", 0)
183
+ progress(5, f"Phase 1/4 — Found: {title} ({duration}s)")
184
+ except Exception as e:
185
+ raise RuntimeError(f"YouTube access failed: {str(e)[:200]}")
169
186
 
170
- # Download audio only
187
+ # === Phase 2: Download video + extract audio ===
188
+ progress(8, f"Phase 2/4 — Downloading video...")
171
189
  audio_path = str(self._media_dir / "yt_audio.wav")
172
190
  ydl_opts = {
173
191
  "format": "bestaudio/best",
@@ -179,21 +197,50 @@ class IngestEngine:
179
197
  }],
180
198
  "quiet": True,
181
199
  "no_warnings": True,
200
+ "progress_hooks": [lambda d: progress(
201
+ 8 + int((d.get("downloaded_bytes", 0) / max(d.get("total_bytes", 1), 1)) * 17),
202
+ f"Phase 2/4 — Downloading... {d.get('_percent_str', '').strip()}"
203
+ ) if d.get("status") == "downloading" else None],
182
204
  }
183
205
 
184
- progress(10, "Downloading audio...")
185
206
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
186
- info = ydl.extract_info(url, download=True)
187
- title = info.get("title", "YouTube Video")
188
-
189
- progress(35, "Transcribing audio...")
207
+ ydl.extract_info(url, download=True)
208
+
209
+ # === Phase 3: Extract audio (FFmpeg post-processing) ===
210
+ progress(28, "Phase 3/4 — Extracting audio from video...")
211
+
212
+ # Verify audio file exists
213
+ if not os.path.exists(audio_path):
214
+ # Try to find the downloaded file with different extension
215
+ for ext in ["wav", "m4a", "webm", "mp3", "opus"]:
216
+ alt = str(self._media_dir / f"yt_audio.{ext}")
217
+ if os.path.exists(alt):
218
+ audio_path = alt
219
+ break
220
+ else:
221
+ raise RuntimeError("Audio extraction failed — no output file found")
222
+
223
+ audio_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
224
+ progress(35, f"Phase 3/4 — Audio extracted ({audio_size_mb:.1f} MB)")
225
+
226
+ # === Phase 4: Transcribe audio ===
227
+ progress(38, "Phase 4/4 — Transcribing audio (this may take a while)...")
190
228
  text = self._transcribe_audio(audio_path)
191
229
 
192
- # Cleanup
230
+ if not text or len(text.strip()) < 20:
231
+ raise RuntimeError("Transcription produced no usable text")
232
+
233
+ word_count = len(text.split())
234
+ progress(70, f"Phase 4/4 — Transcribed: {word_count} words")
235
+
236
+ # Rename audio to include title for easy identification
237
+ safe_title = "".join(c if c.isalnum() or c in " -_" else "" for c in title)[:50].strip()
238
+ final_audio = self._media_dir / f"{safe_title}.wav"
193
239
  try:
194
- os.remove(audio_path)
195
- except OSError:
196
- pass
240
+ import shutil
241
+ shutil.move(audio_path, str(final_audio))
242
+ except Exception:
243
+ final_audio = Path(audio_path)
197
244
 
198
245
  return text, title
199
246
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arkaos",
3
- "version": "2.2.1",
3
+ "version": "2.2.2",
4
4
  "description": "The Operating System for AI Agent Teams",
5
5
  "type": "module",
6
6
  "bin": {
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "arkaos-core"
3
- version = "2.2.1"
3
+ version = "2.2.2"
4
4
  description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}