arkaos 2.2.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/arka/SKILL.md +12 -6
- package/core/jobs/__init__.py +5 -0
- package/core/jobs/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/jobs/__pycache__/manager.cpython-313.pyc +0 -0
- package/core/jobs/manager.py +172 -0
- package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
- package/core/knowledge/ingest.py +80 -19
- package/package.json +1 -1
- package/pyproject.toml +1 -1
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.2.
|
|
1
|
+
2.2.2
|
package/arka/SKILL.md
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: arka
|
|
3
3
|
description: >
|
|
4
|
-
ArkaOS v2 main orchestrator. Routes commands to
|
|
5
|
-
to slash commands, runs standups, system monitoring,
|
|
6
|
-
The entry point for every user interaction.
|
|
4
|
+
ArkaOS v2 main orchestrator. Routes commands to 17 departments, resolves natural language
|
|
5
|
+
to slash commands, runs standups, system monitoring, dashboard, knowledge base, personas,
|
|
6
|
+
and cross-department coordination. The entry point for every user interaction.
|
|
7
7
|
allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
|
|
8
8
|
---
|
|
9
9
|
|
|
10
10
|
# ArkaOS v2 — Main Orchestrator
|
|
11
11
|
|
|
12
12
|
> **The Operating System for AI Agent Teams**
|
|
13
|
-
>
|
|
13
|
+
> 65 agents. 17 departments. 244+ skills. Multi-runtime. Dashboard. Knowledge RAG.
|
|
14
14
|
|
|
15
15
|
## System Commands
|
|
16
16
|
|
|
@@ -23,6 +23,11 @@ allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
|
|
|
23
23
|
| `/arka help` | List all department commands |
|
|
24
24
|
| `/arka setup` | Interactive profile setup (name, company, role, objectives) |
|
|
25
25
|
| `/arka conclave` | Activate personal AI advisory board (The Conclave) |
|
|
26
|
+
| `/arka dashboard` | Open monitoring dashboard (localhost:3333) |
|
|
27
|
+
| `/arka index` | Index Obsidian vault into knowledge base |
|
|
28
|
+
| `/arka search <query>` | Semantic search in knowledge base |
|
|
29
|
+
| `/arka keys` | Manage API keys (OpenAI, Google, fal.ai) |
|
|
30
|
+
| `/arka personas` | Manage AI personas (create, clone to agent) |
|
|
26
31
|
| `/do <description>` | Universal routing — natural language to department command |
|
|
27
32
|
|
|
28
33
|
## Universal Orchestrator (/do)
|
|
@@ -99,8 +104,9 @@ Every workflow includes a Quality Gate phase before delivery:
|
|
|
99
104
|
| Tier | Role | Count | Authority |
|
|
100
105
|
|------|------|-------|-----------|
|
|
101
106
|
| 0 | C-Suite | 6 | Veto power, strategic decisions |
|
|
102
|
-
| 1 | Squad Leads |
|
|
103
|
-
| 2 | Specialists |
|
|
107
|
+
| 1 | Squad Leads | 16 | Orchestrate department, domain decisions |
|
|
108
|
+
| 2 | Specialists | 40 | Execute within domain expertise |
|
|
109
|
+
| 3 | Support | 3 | Research, documentation, data collection |
|
|
104
110
|
|
|
105
111
|
## Cross-Department Collaboration
|
|
106
112
|
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""SQLite-based job queue for persistent task tracking.
|
|
2
|
+
|
|
3
|
+
Cross-platform (Mac, Linux, Windows). Thread-safe. Survives restarts.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sqlite3
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass, asdict
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Job:
|
|
16
|
+
id: str
|
|
17
|
+
type: str = "" # youtube, pdf, audio, web, markdown, kb_index
|
|
18
|
+
source: str = "" # URL or file path
|
|
19
|
+
title: str = ""
|
|
20
|
+
status: str = "queued" # queued, processing, downloading, transcribing, embedding, completed, failed, cancelled
|
|
21
|
+
progress: int = 0 # 0-100
|
|
22
|
+
message: str = "" # Current step description
|
|
23
|
+
chunks_created: int = 0
|
|
24
|
+
media_path: str = "" # Path to downloaded media file
|
|
25
|
+
error: str = ""
|
|
26
|
+
created_at: str = ""
|
|
27
|
+
started_at: str = ""
|
|
28
|
+
completed_at: str = ""
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> dict:
|
|
31
|
+
return asdict(self)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class JobManager:
|
|
35
|
+
"""SQLite-backed job queue. Thread-safe for concurrent reads."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, db_path: str | Path = ""):
|
|
38
|
+
self._db_path = str(db_path) if db_path else str(Path.home() / ".arkaos" / "jobs.db")
|
|
39
|
+
Path(self._db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
self._init_db()
|
|
41
|
+
|
|
42
|
+
def _conn(self) -> sqlite3.Connection:
|
|
43
|
+
conn = sqlite3.connect(self._db_path)
|
|
44
|
+
conn.row_factory = sqlite3.Row
|
|
45
|
+
conn.execute("PRAGMA journal_mode=WAL") # Better concurrency
|
|
46
|
+
return conn
|
|
47
|
+
|
|
48
|
+
def _init_db(self) -> None:
|
|
49
|
+
with self._conn() as conn:
|
|
50
|
+
conn.execute("""
|
|
51
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
|
52
|
+
id TEXT PRIMARY KEY,
|
|
53
|
+
type TEXT DEFAULT '',
|
|
54
|
+
source TEXT DEFAULT '',
|
|
55
|
+
title TEXT DEFAULT '',
|
|
56
|
+
status TEXT DEFAULT 'queued',
|
|
57
|
+
progress INTEGER DEFAULT 0,
|
|
58
|
+
message TEXT DEFAULT '',
|
|
59
|
+
chunks_created INTEGER DEFAULT 0,
|
|
60
|
+
media_path TEXT DEFAULT '',
|
|
61
|
+
error TEXT DEFAULT '',
|
|
62
|
+
created_at TEXT DEFAULT '',
|
|
63
|
+
started_at TEXT DEFAULT '',
|
|
64
|
+
completed_at TEXT DEFAULT ''
|
|
65
|
+
)
|
|
66
|
+
""")
|
|
67
|
+
|
|
68
|
+
def create(self, source: str, source_type: str, title: str = "") -> Job:
|
|
69
|
+
job = Job(
|
|
70
|
+
id=f"job-{uuid.uuid4().hex[:8]}",
|
|
71
|
+
type=source_type,
|
|
72
|
+
source=source,
|
|
73
|
+
title=title or f"{source_type}: {source[:60]}",
|
|
74
|
+
status="queued",
|
|
75
|
+
created_at=datetime.now().isoformat(),
|
|
76
|
+
)
|
|
77
|
+
with self._conn() as conn:
|
|
78
|
+
conn.execute(
|
|
79
|
+
"INSERT INTO jobs (id, type, source, title, status, progress, message, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
|
80
|
+
(job.id, job.type, job.source, job.title, job.status, 0, "Queued", job.created_at),
|
|
81
|
+
)
|
|
82
|
+
return job
|
|
83
|
+
|
|
84
|
+
def get(self, job_id: str) -> Optional[Job]:
|
|
85
|
+
with self._conn() as conn:
|
|
86
|
+
row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
|
|
87
|
+
if not row:
|
|
88
|
+
return None
|
|
89
|
+
return Job(**dict(row))
|
|
90
|
+
|
|
91
|
+
def update_progress(self, job_id: str, progress: int, message: str, status: str = "processing") -> None:
|
|
92
|
+
with self._conn() as conn:
|
|
93
|
+
conn.execute(
|
|
94
|
+
"UPDATE jobs SET progress = ?, message = ?, status = ? WHERE id = ?",
|
|
95
|
+
(progress, message, status, job_id),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def start(self, job_id: str) -> None:
|
|
99
|
+
with self._conn() as conn:
|
|
100
|
+
conn.execute(
|
|
101
|
+
"UPDATE jobs SET status = 'processing', started_at = ? WHERE id = ?",
|
|
102
|
+
(datetime.now().isoformat(), job_id),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def complete(self, job_id: str, chunks_created: int = 0, media_path: str = "") -> None:
|
|
106
|
+
with self._conn() as conn:
|
|
107
|
+
conn.execute(
|
|
108
|
+
"UPDATE jobs SET status = 'completed', progress = 100, message = 'Done', chunks_created = ?, media_path = ?, completed_at = ? WHERE id = ?",
|
|
109
|
+
(chunks_created, media_path, datetime.now().isoformat(), job_id),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def fail(self, job_id: str, error: str) -> None:
|
|
113
|
+
with self._conn() as conn:
|
|
114
|
+
conn.execute(
|
|
115
|
+
"UPDATE jobs SET status = 'failed', error = ?, completed_at = ? WHERE id = ?",
|
|
116
|
+
(error, datetime.now().isoformat(), job_id),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def cancel(self, job_id: str) -> bool:
|
|
120
|
+
with self._conn() as conn:
|
|
121
|
+
result = conn.execute(
|
|
122
|
+
"UPDATE jobs SET status = 'cancelled', completed_at = ? WHERE id = ? AND status = 'queued'",
|
|
123
|
+
(datetime.now().isoformat(), job_id),
|
|
124
|
+
)
|
|
125
|
+
return result.rowcount > 0
|
|
126
|
+
|
|
127
|
+
def list_all(self, limit: int = 50) -> list[Job]:
|
|
128
|
+
with self._conn() as conn:
|
|
129
|
+
rows = conn.execute(
|
|
130
|
+
"SELECT * FROM jobs ORDER BY created_at DESC LIMIT ?", (limit,)
|
|
131
|
+
).fetchall()
|
|
132
|
+
return [Job(**dict(r)) for r in rows]
|
|
133
|
+
|
|
134
|
+
def list_active(self) -> list[Job]:
|
|
135
|
+
with self._conn() as conn:
|
|
136
|
+
rows = conn.execute(
|
|
137
|
+
"SELECT * FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding') ORDER BY created_at ASC"
|
|
138
|
+
).fetchall()
|
|
139
|
+
return [Job(**dict(r)) for r in rows]
|
|
140
|
+
|
|
141
|
+
def list_by_status(self, status: str, limit: int = 50) -> list[Job]:
|
|
142
|
+
with self._conn() as conn:
|
|
143
|
+
rows = conn.execute(
|
|
144
|
+
"SELECT * FROM jobs WHERE status = ? ORDER BY created_at DESC LIMIT ?", (status, limit)
|
|
145
|
+
).fetchall()
|
|
146
|
+
return [Job(**dict(r)) for r in rows]
|
|
147
|
+
|
|
148
|
+
def summary(self) -> dict:
|
|
149
|
+
with self._conn() as conn:
|
|
150
|
+
total = conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
|
|
151
|
+
active = conn.execute("SELECT COUNT(*) FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding')").fetchone()[0]
|
|
152
|
+
completed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'completed'").fetchone()[0]
|
|
153
|
+
failed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'failed'").fetchone()[0]
|
|
154
|
+
total_chunks = conn.execute("SELECT COALESCE(SUM(chunks_created), 0) FROM jobs WHERE status = 'completed'").fetchone()[0]
|
|
155
|
+
return {
|
|
156
|
+
"total": total,
|
|
157
|
+
"active": active,
|
|
158
|
+
"completed": completed,
|
|
159
|
+
"failed": failed,
|
|
160
|
+
"total_chunks": total_chunks,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
def clear_completed(self, keep_last: int = 20) -> int:
|
|
164
|
+
with self._conn() as conn:
|
|
165
|
+
rows = conn.execute(
|
|
166
|
+
"SELECT id FROM jobs WHERE status IN ('completed', 'failed', 'cancelled') ORDER BY completed_at DESC"
|
|
167
|
+
).fetchall()
|
|
168
|
+
to_delete = [r["id"] for r in rows[keep_last:]]
|
|
169
|
+
if to_delete:
|
|
170
|
+
placeholders = ",".join("?" * len(to_delete))
|
|
171
|
+
conn.execute(f"DELETE FROM jobs WHERE id IN ({placeholders})", to_delete)
|
|
172
|
+
return len(to_delete)
|
|
Binary file
|
package/core/knowledge/ingest.py
CHANGED
|
@@ -106,16 +106,30 @@ class IngestEngine:
|
|
|
106
106
|
# Chunk and index
|
|
107
107
|
progress(75, "Chunking content...")
|
|
108
108
|
chunks = chunk_markdown(text, max_tokens=512, source=source)
|
|
109
|
+
total_chunks = len(chunks)
|
|
109
110
|
|
|
110
|
-
|
|
111
|
+
if total_chunks == 0:
|
|
112
|
+
progress(100, "No chunks to index")
|
|
113
|
+
return IngestResult(source=source, source_type=source_type, text_length=len(text), chunks_created=0, title=title, success=True)
|
|
114
|
+
|
|
115
|
+
# Index in batches with granular progress (85→99%)
|
|
111
116
|
texts = [c.text for c in chunks]
|
|
112
117
|
headings = [c.heading for c in chunks]
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
118
|
+
batch_size = 10
|
|
119
|
+
count = 0
|
|
120
|
+
|
|
121
|
+
for i in range(0, total_chunks, batch_size):
|
|
122
|
+
batch_end = min(i + batch_size, total_chunks)
|
|
123
|
+
pct = 85 + int((i / total_chunks) * 14)
|
|
124
|
+
progress(pct, f"Embedding & indexing chunks {i + 1}—{batch_end} of {total_chunks}...")
|
|
125
|
+
|
|
126
|
+
batch_count = self._store.index_chunks(
|
|
127
|
+
texts=texts[i:batch_end],
|
|
128
|
+
headings=headings[i:batch_end] if headings else None,
|
|
129
|
+
source=source,
|
|
130
|
+
metadata={"type": source_type, "title": title, **(metadata or {})},
|
|
131
|
+
)
|
|
132
|
+
count += batch_count
|
|
119
133
|
|
|
120
134
|
progress(100, f"Done — {count} chunks indexed")
|
|
121
135
|
|
|
@@ -145,15 +159,33 @@ class IngestEngine:
|
|
|
145
159
|
)
|
|
146
160
|
|
|
147
161
|
def _process_youtube(self, url: str, progress: ProgressCallback) -> tuple[str, str]:
|
|
148
|
-
"""Download YouTube video and transcribe audio.
|
|
162
|
+
"""Download YouTube video and transcribe audio.
|
|
163
|
+
|
|
164
|
+
5 distinct phases with clear progress:
|
|
165
|
+
Phase 1: Fetch video info (0-5%)
|
|
166
|
+
Phase 2: Download video (5-25%)
|
|
167
|
+
Phase 3: Extract audio (25-35%)
|
|
168
|
+
Phase 4: Transcribe audio (35-65%)
|
|
169
|
+
Phase 5: Return text for chunking/indexing (handled by caller, 75-100%)
|
|
170
|
+
"""
|
|
149
171
|
try:
|
|
150
172
|
import yt_dlp
|
|
151
173
|
except ImportError:
|
|
152
174
|
raise RuntimeError("yt-dlp not installed. Run: pip install yt-dlp")
|
|
153
175
|
|
|
154
|
-
|
|
176
|
+
# === Phase 1: Fetch video info ===
|
|
177
|
+
progress(2, "Phase 1/4 — Fetching video info...")
|
|
178
|
+
try:
|
|
179
|
+
with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
|
|
180
|
+
info = ydl.extract_info(url, download=False)
|
|
181
|
+
title = info.get("title", "YouTube Video")
|
|
182
|
+
duration = info.get("duration", 0)
|
|
183
|
+
progress(5, f"Phase 1/4 — Found: {title} ({duration}s)")
|
|
184
|
+
except Exception as e:
|
|
185
|
+
raise RuntimeError(f"YouTube access failed: {str(e)[:200]}")
|
|
155
186
|
|
|
156
|
-
# Download audio
|
|
187
|
+
# === Phase 2: Download video + extract audio ===
|
|
188
|
+
progress(8, f"Phase 2/4 — Downloading video...")
|
|
157
189
|
audio_path = str(self._media_dir / "yt_audio.wav")
|
|
158
190
|
ydl_opts = {
|
|
159
191
|
"format": "bestaudio/best",
|
|
@@ -165,21 +197,50 @@ class IngestEngine:
|
|
|
165
197
|
}],
|
|
166
198
|
"quiet": True,
|
|
167
199
|
"no_warnings": True,
|
|
200
|
+
"progress_hooks": [lambda d: progress(
|
|
201
|
+
8 + int((d.get("downloaded_bytes", 0) / max(d.get("total_bytes", 1), 1)) * 17),
|
|
202
|
+
f"Phase 2/4 — Downloading... {d.get('_percent_str', '').strip()}"
|
|
203
|
+
) if d.get("status") == "downloading" else None],
|
|
168
204
|
}
|
|
169
205
|
|
|
170
|
-
progress(10, "Downloading audio...")
|
|
171
206
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
progress(
|
|
207
|
+
ydl.extract_info(url, download=True)
|
|
208
|
+
|
|
209
|
+
# === Phase 3: Extract audio (FFmpeg post-processing) ===
|
|
210
|
+
progress(28, "Phase 3/4 — Extracting audio from video...")
|
|
211
|
+
|
|
212
|
+
# Verify audio file exists
|
|
213
|
+
if not os.path.exists(audio_path):
|
|
214
|
+
# Try to find the downloaded file with different extension
|
|
215
|
+
for ext in ["wav", "m4a", "webm", "mp3", "opus"]:
|
|
216
|
+
alt = str(self._media_dir / f"yt_audio.{ext}")
|
|
217
|
+
if os.path.exists(alt):
|
|
218
|
+
audio_path = alt
|
|
219
|
+
break
|
|
220
|
+
else:
|
|
221
|
+
raise RuntimeError("Audio extraction failed — no output file found")
|
|
222
|
+
|
|
223
|
+
audio_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
|
|
224
|
+
progress(35, f"Phase 3/4 — Audio extracted ({audio_size_mb:.1f} MB)")
|
|
225
|
+
|
|
226
|
+
# === Phase 4: Transcribe audio ===
|
|
227
|
+
progress(38, "Phase 4/4 — Transcribing audio (this may take a while)...")
|
|
176
228
|
text = self._transcribe_audio(audio_path)
|
|
177
229
|
|
|
178
|
-
|
|
230
|
+
if not text or len(text.strip()) < 20:
|
|
231
|
+
raise RuntimeError("Transcription produced no usable text")
|
|
232
|
+
|
|
233
|
+
word_count = len(text.split())
|
|
234
|
+
progress(70, f"Phase 4/4 — Transcribed: {word_count} words")
|
|
235
|
+
|
|
236
|
+
# Rename audio to include title for easy identification
|
|
237
|
+
safe_title = "".join(c if c.isalnum() or c in " -_" else "" for c in title)[:50].strip()
|
|
238
|
+
final_audio = self._media_dir / f"{safe_title}.wav"
|
|
179
239
|
try:
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
240
|
+
import shutil
|
|
241
|
+
shutil.move(audio_path, str(final_audio))
|
|
242
|
+
except Exception:
|
|
243
|
+
final_audio = Path(audio_path)
|
|
183
244
|
|
|
184
245
|
return text, title
|
|
185
246
|
|
package/package.json
CHANGED