arkaos 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 2.2.0
1
+ 2.2.2
package/arka/SKILL.md CHANGED
@@ -1,16 +1,16 @@
1
1
  ---
2
2
  name: arka
3
3
  description: >
4
- ArkaOS v2 main orchestrator. Routes commands to 16 departments, resolves natural language
5
- to slash commands, runs standups, system monitoring, and cross-department coordination.
6
- The entry point for every user interaction.
4
+ ArkaOS v2 main orchestrator. Routes commands to 17 departments, resolves natural language
5
+ to slash commands, runs standups, system monitoring, dashboard, knowledge base, personas,
6
+ and cross-department coordination. The entry point for every user interaction.
7
7
  allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
8
8
  ---
9
9
 
10
10
  # ArkaOS v2 — Main Orchestrator
11
11
 
12
12
  > **The Operating System for AI Agent Teams**
13
- > 56 agents. 16 departments. ~180 commands. Multi-runtime.
13
+ > 65 agents. 17 departments. 244+ skills. Multi-runtime. Dashboard. Knowledge RAG.
14
14
 
15
15
  ## System Commands
16
16
 
@@ -23,6 +23,11 @@ allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
23
23
  | `/arka help` | List all department commands |
24
24
  | `/arka setup` | Interactive profile setup (name, company, role, objectives) |
25
25
  | `/arka conclave` | Activate personal AI advisory board (The Conclave) |
26
+ | `/arka dashboard` | Open monitoring dashboard (localhost:3333) |
27
+ | `/arka index` | Index Obsidian vault into knowledge base |
28
+ | `/arka search <query>` | Semantic search in knowledge base |
29
+ | `/arka keys` | Manage API keys (OpenAI, Google, fal.ai) |
30
+ | `/arka personas` | Manage AI personas (create, clone to agent) |
26
31
  | `/do <description>` | Universal routing — natural language to department command |
27
32
 
28
33
  ## Universal Orchestrator (/do)
@@ -99,8 +104,9 @@ Every workflow includes a Quality Gate phase before delivery:
99
104
  | Tier | Role | Count | Authority |
100
105
  |------|------|-------|-----------|
101
106
  | 0 | C-Suite | 6 | Veto power, strategic decisions |
102
- | 1 | Squad Leads | 15 | Orchestrate department, domain decisions |
103
- | 2 | Specialists | 35 | Execute within domain expertise |
107
+ | 1 | Squad Leads | 16 | Orchestrate department, domain decisions |
108
+ | 2 | Specialists | 40 | Execute within domain expertise |
109
+ | 3 | Support | 3 | Research, documentation, data collection |
104
110
 
105
111
  ## Cross-Department Collaboration
106
112
 
@@ -0,0 +1,5 @@
1
+ """Job queue — SQLite-based persistent job tracking."""
2
+
3
+ from core.jobs.manager import JobManager, Job
4
+
5
+ __all__ = ["JobManager", "Job"]
@@ -0,0 +1,172 @@
1
+ """SQLite-based job queue for persistent task tracking.
2
+
3
+ Cross-platform (Mac, Linux, Windows). Thread-safe. Survives restarts.
4
+ """
5
+
6
+ import sqlite3
7
+ import uuid
8
+ from dataclasses import dataclass, asdict
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+
14
+ @dataclass
15
+ class Job:
16
+ id: str
17
+ type: str = "" # youtube, pdf, audio, web, markdown, kb_index
18
+ source: str = "" # URL or file path
19
+ title: str = ""
20
+ status: str = "queued" # queued, processing, downloading, transcribing, embedding, completed, failed, cancelled
21
+ progress: int = 0 # 0-100
22
+ message: str = "" # Current step description
23
+ chunks_created: int = 0
24
+ media_path: str = "" # Path to downloaded media file
25
+ error: str = ""
26
+ created_at: str = ""
27
+ started_at: str = ""
28
+ completed_at: str = ""
29
+
30
+ def to_dict(self) -> dict:
31
+ return asdict(self)
32
+
33
+
34
+ class JobManager:
35
+ """SQLite-backed job queue. Thread-safe for concurrent reads."""
36
+
37
+ def __init__(self, db_path: str | Path = ""):
38
+ self._db_path = str(db_path) if db_path else str(Path.home() / ".arkaos" / "jobs.db")
39
+ Path(self._db_path).parent.mkdir(parents=True, exist_ok=True)
40
+ self._init_db()
41
+
42
+ def _conn(self) -> sqlite3.Connection:
43
+ conn = sqlite3.connect(self._db_path)
44
+ conn.row_factory = sqlite3.Row
45
+ conn.execute("PRAGMA journal_mode=WAL") # Better concurrency
46
+ return conn
47
+
48
+ def _init_db(self) -> None:
49
+ with self._conn() as conn:
50
+ conn.execute("""
51
+ CREATE TABLE IF NOT EXISTS jobs (
52
+ id TEXT PRIMARY KEY,
53
+ type TEXT DEFAULT '',
54
+ source TEXT DEFAULT '',
55
+ title TEXT DEFAULT '',
56
+ status TEXT DEFAULT 'queued',
57
+ progress INTEGER DEFAULT 0,
58
+ message TEXT DEFAULT '',
59
+ chunks_created INTEGER DEFAULT 0,
60
+ media_path TEXT DEFAULT '',
61
+ error TEXT DEFAULT '',
62
+ created_at TEXT DEFAULT '',
63
+ started_at TEXT DEFAULT '',
64
+ completed_at TEXT DEFAULT ''
65
+ )
66
+ """)
67
+
68
+ def create(self, source: str, source_type: str, title: str = "") -> Job:
69
+ job = Job(
70
+ id=f"job-{uuid.uuid4().hex[:8]}",
71
+ type=source_type,
72
+ source=source,
73
+ title=title or f"{source_type}: {source[:60]}",
74
+ status="queued",
75
+ created_at=datetime.now().isoformat(),
76
+ )
77
+ with self._conn() as conn:
78
+ conn.execute(
79
+ "INSERT INTO jobs (id, type, source, title, status, progress, message, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
80
+ (job.id, job.type, job.source, job.title, job.status, 0, "Queued", job.created_at),
81
+ )
82
+ return job
83
+
84
+ def get(self, job_id: str) -> Optional[Job]:
85
+ with self._conn() as conn:
86
+ row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
87
+ if not row:
88
+ return None
89
+ return Job(**dict(row))
90
+
91
+ def update_progress(self, job_id: str, progress: int, message: str, status: str = "processing") -> None:
92
+ with self._conn() as conn:
93
+ conn.execute(
94
+ "UPDATE jobs SET progress = ?, message = ?, status = ? WHERE id = ?",
95
+ (progress, message, status, job_id),
96
+ )
97
+
98
+ def start(self, job_id: str) -> None:
99
+ with self._conn() as conn:
100
+ conn.execute(
101
+ "UPDATE jobs SET status = 'processing', started_at = ? WHERE id = ?",
102
+ (datetime.now().isoformat(), job_id),
103
+ )
104
+
105
+ def complete(self, job_id: str, chunks_created: int = 0, media_path: str = "") -> None:
106
+ with self._conn() as conn:
107
+ conn.execute(
108
+ "UPDATE jobs SET status = 'completed', progress = 100, message = 'Done', chunks_created = ?, media_path = ?, completed_at = ? WHERE id = ?",
109
+ (chunks_created, media_path, datetime.now().isoformat(), job_id),
110
+ )
111
+
112
+ def fail(self, job_id: str, error: str) -> None:
113
+ with self._conn() as conn:
114
+ conn.execute(
115
+ "UPDATE jobs SET status = 'failed', error = ?, completed_at = ? WHERE id = ?",
116
+ (error, datetime.now().isoformat(), job_id),
117
+ )
118
+
119
+ def cancel(self, job_id: str) -> bool:
120
+ with self._conn() as conn:
121
+ result = conn.execute(
122
+ "UPDATE jobs SET status = 'cancelled', completed_at = ? WHERE id = ? AND status = 'queued'",
123
+ (datetime.now().isoformat(), job_id),
124
+ )
125
+ return result.rowcount > 0
126
+
127
+ def list_all(self, limit: int = 50) -> list[Job]:
128
+ with self._conn() as conn:
129
+ rows = conn.execute(
130
+ "SELECT * FROM jobs ORDER BY created_at DESC LIMIT ?", (limit,)
131
+ ).fetchall()
132
+ return [Job(**dict(r)) for r in rows]
133
+
134
+ def list_active(self) -> list[Job]:
135
+ with self._conn() as conn:
136
+ rows = conn.execute(
137
+ "SELECT * FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding') ORDER BY created_at ASC"
138
+ ).fetchall()
139
+ return [Job(**dict(r)) for r in rows]
140
+
141
+ def list_by_status(self, status: str, limit: int = 50) -> list[Job]:
142
+ with self._conn() as conn:
143
+ rows = conn.execute(
144
+ "SELECT * FROM jobs WHERE status = ? ORDER BY created_at DESC LIMIT ?", (status, limit)
145
+ ).fetchall()
146
+ return [Job(**dict(r)) for r in rows]
147
+
148
+ def summary(self) -> dict:
149
+ with self._conn() as conn:
150
+ total = conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
151
+ active = conn.execute("SELECT COUNT(*) FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding')").fetchone()[0]
152
+ completed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'completed'").fetchone()[0]
153
+ failed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'failed'").fetchone()[0]
154
+ total_chunks = conn.execute("SELECT COALESCE(SUM(chunks_created), 0) FROM jobs WHERE status = 'completed'").fetchone()[0]
155
+ return {
156
+ "total": total,
157
+ "active": active,
158
+ "completed": completed,
159
+ "failed": failed,
160
+ "total_chunks": total_chunks,
161
+ }
162
+
163
+ def clear_completed(self, keep_last: int = 20) -> int:
164
+ with self._conn() as conn:
165
+ rows = conn.execute(
166
+ "SELECT id FROM jobs WHERE status IN ('completed', 'failed', 'cancelled') ORDER BY completed_at DESC"
167
+ ).fetchall()
168
+ to_delete = [r["id"] for r in rows[keep_last:]]
169
+ if to_delete:
170
+ placeholders = ",".join("?" * len(to_delete))
171
+ conn.execute(f"DELETE FROM jobs WHERE id IN ({placeholders})", to_delete)
172
+ return len(to_delete)
@@ -106,16 +106,30 @@ class IngestEngine:
106
106
  # Chunk and index
107
107
  progress(75, "Chunking content...")
108
108
  chunks = chunk_markdown(text, max_tokens=512, source=source)
109
+ total_chunks = len(chunks)
109
110
 
110
- progress(85, f"Indexing {len(chunks)} chunks...")
111
+ if total_chunks == 0:
112
+ progress(100, "No chunks to index")
113
+ return IngestResult(source=source, source_type=source_type, text_length=len(text), chunks_created=0, title=title, success=True)
114
+
115
+ # Index in batches with granular progress (85→99%)
111
116
  texts = [c.text for c in chunks]
112
117
  headings = [c.heading for c in chunks]
113
- count = self._store.index_chunks(
114
- texts=texts,
115
- headings=headings,
116
- source=source,
117
- metadata={"type": source_type, "title": title, **(metadata or {})},
118
- )
118
+ batch_size = 10
119
+ count = 0
120
+
121
+ for i in range(0, total_chunks, batch_size):
122
+ batch_end = min(i + batch_size, total_chunks)
123
+ pct = 85 + int((i / total_chunks) * 14)
124
+ progress(pct, f"Embedding & indexing chunks {i + 1}—{batch_end} of {total_chunks}...")
125
+
126
+ batch_count = self._store.index_chunks(
127
+ texts=texts[i:batch_end],
128
+ headings=headings[i:batch_end] if headings else None,
129
+ source=source,
130
+ metadata={"type": source_type, "title": title, **(metadata or {})},
131
+ )
132
+ count += batch_count
119
133
 
120
134
  progress(100, f"Done — {count} chunks indexed")
121
135
 
@@ -145,15 +159,33 @@ class IngestEngine:
145
159
  )
146
160
 
147
161
  def _process_youtube(self, url: str, progress: ProgressCallback) -> tuple[str, str]:
148
- """Download YouTube video and transcribe audio."""
162
+ """Download YouTube video and transcribe audio.
163
+
164
+ 5 distinct phases with clear progress:
165
+ Phase 1: Fetch video info (0-5%)
166
+ Phase 2: Download video (5-25%)
167
+ Phase 3: Extract audio (25-35%)
168
+ Phase 4: Transcribe audio (35-65%)
169
+ Phase 5: Return text for chunking/indexing (handled by caller, 75-100%)
170
+ """
149
171
  try:
150
172
  import yt_dlp
151
173
  except ImportError:
152
174
  raise RuntimeError("yt-dlp not installed. Run: pip install yt-dlp")
153
175
 
154
- progress(5, "Fetching video info...")
176
+ # === Phase 1: Fetch video info ===
177
+ progress(2, "Phase 1/4 — Fetching video info...")
178
+ try:
179
+ with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
180
+ info = ydl.extract_info(url, download=False)
181
+ title = info.get("title", "YouTube Video")
182
+ duration = info.get("duration", 0)
183
+ progress(5, f"Phase 1/4 — Found: {title} ({duration}s)")
184
+ except Exception as e:
185
+ raise RuntimeError(f"YouTube access failed: {str(e)[:200]}")
155
186
 
156
- # Download audio only
187
+ # === Phase 2: Download video + extract audio ===
188
+ progress(8, f"Phase 2/4 — Downloading video...")
157
189
  audio_path = str(self._media_dir / "yt_audio.wav")
158
190
  ydl_opts = {
159
191
  "format": "bestaudio/best",
@@ -165,21 +197,50 @@ class IngestEngine:
165
197
  }],
166
198
  "quiet": True,
167
199
  "no_warnings": True,
200
+ "progress_hooks": [lambda d: progress(
201
+ 8 + int((d.get("downloaded_bytes", 0) / max(d.get("total_bytes", 1), 1)) * 17),
202
+ f"Phase 2/4 — Downloading... {d.get('_percent_str', '').strip()}"
203
+ ) if d.get("status") == "downloading" else None],
168
204
  }
169
205
 
170
- progress(10, "Downloading audio...")
171
206
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
172
- info = ydl.extract_info(url, download=True)
173
- title = info.get("title", "YouTube Video")
174
-
175
- progress(35, "Transcribing audio...")
207
+ ydl.extract_info(url, download=True)
208
+
209
+ # === Phase 3: Extract audio (FFmpeg post-processing) ===
210
+ progress(28, "Phase 3/4 — Extracting audio from video...")
211
+
212
+ # Verify audio file exists
213
+ if not os.path.exists(audio_path):
214
+ # Try to find the downloaded file with different extension
215
+ for ext in ["wav", "m4a", "webm", "mp3", "opus"]:
216
+ alt = str(self._media_dir / f"yt_audio.{ext}")
217
+ if os.path.exists(alt):
218
+ audio_path = alt
219
+ break
220
+ else:
221
+ raise RuntimeError("Audio extraction failed — no output file found")
222
+
223
+ audio_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
224
+ progress(35, f"Phase 3/4 — Audio extracted ({audio_size_mb:.1f} MB)")
225
+
226
+ # === Phase 4: Transcribe audio ===
227
+ progress(38, "Phase 4/4 — Transcribing audio (this may take a while)...")
176
228
  text = self._transcribe_audio(audio_path)
177
229
 
178
- # Cleanup
230
+ if not text or len(text.strip()) < 20:
231
+ raise RuntimeError("Transcription produced no usable text")
232
+
233
+ word_count = len(text.split())
234
+ progress(70, f"Phase 4/4 — Transcribed: {word_count} words")
235
+
236
+ # Rename audio to include title for easy identification
237
+ safe_title = "".join(c if c.isalnum() or c in " -_" else "" for c in title)[:50].strip()
238
+ final_audio = self._media_dir / f"{safe_title}.wav"
179
239
  try:
180
- os.remove(audio_path)
181
- except OSError:
182
- pass
240
+ import shutil
241
+ shutil.move(audio_path, str(final_audio))
242
+ except Exception:
243
+ final_audio = Path(audio_path)
183
244
 
184
245
  return text, title
185
246
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arkaos",
3
- "version": "2.2.0",
3
+ "version": "2.2.2",
4
4
  "description": "The Operating System for AI Agent Teams",
5
5
  "type": "module",
6
6
  "bin": {
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "arkaos-core"
3
- version = "2.2.0"
3
+ version = "2.2.2"
4
4
  description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}