arkaos 2.2.1 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/arka/SKILL.md +12 -6
- package/core/jobs/__init__.py +5 -0
- package/core/jobs/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/jobs/__pycache__/manager.cpython-313.pyc +0 -0
- package/core/jobs/manager.py +172 -0
- package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
- package/core/knowledge/ingest.py +59 -12
- package/package.json +1 -1
- package/pyproject.toml +1 -1
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.2.
|
|
1
|
+
2.2.2
|
package/arka/SKILL.md
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: arka
|
|
3
3
|
description: >
|
|
4
|
-
ArkaOS v2 main orchestrator. Routes commands to
|
|
5
|
-
to slash commands, runs standups, system monitoring,
|
|
6
|
-
The entry point for every user interaction.
|
|
4
|
+
ArkaOS v2 main orchestrator. Routes commands to 17 departments, resolves natural language
|
|
5
|
+
to slash commands, runs standups, system monitoring, dashboard, knowledge base, personas,
|
|
6
|
+
and cross-department coordination. The entry point for every user interaction.
|
|
7
7
|
allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
|
|
8
8
|
---
|
|
9
9
|
|
|
10
10
|
# ArkaOS v2 — Main Orchestrator
|
|
11
11
|
|
|
12
12
|
> **The Operating System for AI Agent Teams**
|
|
13
|
-
>
|
|
13
|
+
> 65 agents. 17 departments. 244+ skills. Multi-runtime. Dashboard. Knowledge RAG.
|
|
14
14
|
|
|
15
15
|
## System Commands
|
|
16
16
|
|
|
@@ -23,6 +23,11 @@ allowed-tools: [Read, Write, Edit, Bash, Grep, Glob, Agent, WebFetch, WebSearch]
|
|
|
23
23
|
| `/arka help` | List all department commands |
|
|
24
24
|
| `/arka setup` | Interactive profile setup (name, company, role, objectives) |
|
|
25
25
|
| `/arka conclave` | Activate personal AI advisory board (The Conclave) |
|
|
26
|
+
| `/arka dashboard` | Open monitoring dashboard (localhost:3333) |
|
|
27
|
+
| `/arka index` | Index Obsidian vault into knowledge base |
|
|
28
|
+
| `/arka search <query>` | Semantic search in knowledge base |
|
|
29
|
+
| `/arka keys` | Manage API keys (OpenAI, Google, fal.ai) |
|
|
30
|
+
| `/arka personas` | Manage AI personas (create, clone to agent) |
|
|
26
31
|
| `/do <description>` | Universal routing — natural language to department command |
|
|
27
32
|
|
|
28
33
|
## Universal Orchestrator (/do)
|
|
@@ -99,8 +104,9 @@ Every workflow includes a Quality Gate phase before delivery:
|
|
|
99
104
|
| Tier | Role | Count | Authority |
|
|
100
105
|
|------|------|-------|-----------|
|
|
101
106
|
| 0 | C-Suite | 6 | Veto power, strategic decisions |
|
|
102
|
-
| 1 | Squad Leads |
|
|
103
|
-
| 2 | Specialists |
|
|
107
|
+
| 1 | Squad Leads | 16 | Orchestrate department, domain decisions |
|
|
108
|
+
| 2 | Specialists | 40 | Execute within domain expertise |
|
|
109
|
+
| 3 | Support | 3 | Research, documentation, data collection |
|
|
104
110
|
|
|
105
111
|
## Cross-Department Collaboration
|
|
106
112
|
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""SQLite-based job queue for persistent task tracking.
|
|
2
|
+
|
|
3
|
+
Cross-platform (Mac, Linux, Windows). Thread-safe. Survives restarts.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sqlite3
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass, asdict
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Job:
|
|
16
|
+
id: str
|
|
17
|
+
type: str = "" # youtube, pdf, audio, web, markdown, kb_index
|
|
18
|
+
source: str = "" # URL or file path
|
|
19
|
+
title: str = ""
|
|
20
|
+
status: str = "queued" # queued, processing, downloading, transcribing, embedding, completed, failed, cancelled
|
|
21
|
+
progress: int = 0 # 0-100
|
|
22
|
+
message: str = "" # Current step description
|
|
23
|
+
chunks_created: int = 0
|
|
24
|
+
media_path: str = "" # Path to downloaded media file
|
|
25
|
+
error: str = ""
|
|
26
|
+
created_at: str = ""
|
|
27
|
+
started_at: str = ""
|
|
28
|
+
completed_at: str = ""
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> dict:
|
|
31
|
+
return asdict(self)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class JobManager:
|
|
35
|
+
"""SQLite-backed job queue. Thread-safe for concurrent reads."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, db_path: str | Path = ""):
|
|
38
|
+
self._db_path = str(db_path) if db_path else str(Path.home() / ".arkaos" / "jobs.db")
|
|
39
|
+
Path(self._db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
self._init_db()
|
|
41
|
+
|
|
42
|
+
def _conn(self) -> sqlite3.Connection:
|
|
43
|
+
conn = sqlite3.connect(self._db_path)
|
|
44
|
+
conn.row_factory = sqlite3.Row
|
|
45
|
+
conn.execute("PRAGMA journal_mode=WAL") # Better concurrency
|
|
46
|
+
return conn
|
|
47
|
+
|
|
48
|
+
def _init_db(self) -> None:
|
|
49
|
+
with self._conn() as conn:
|
|
50
|
+
conn.execute("""
|
|
51
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
|
52
|
+
id TEXT PRIMARY KEY,
|
|
53
|
+
type TEXT DEFAULT '',
|
|
54
|
+
source TEXT DEFAULT '',
|
|
55
|
+
title TEXT DEFAULT '',
|
|
56
|
+
status TEXT DEFAULT 'queued',
|
|
57
|
+
progress INTEGER DEFAULT 0,
|
|
58
|
+
message TEXT DEFAULT '',
|
|
59
|
+
chunks_created INTEGER DEFAULT 0,
|
|
60
|
+
media_path TEXT DEFAULT '',
|
|
61
|
+
error TEXT DEFAULT '',
|
|
62
|
+
created_at TEXT DEFAULT '',
|
|
63
|
+
started_at TEXT DEFAULT '',
|
|
64
|
+
completed_at TEXT DEFAULT ''
|
|
65
|
+
)
|
|
66
|
+
""")
|
|
67
|
+
|
|
68
|
+
def create(self, source: str, source_type: str, title: str = "") -> Job:
|
|
69
|
+
job = Job(
|
|
70
|
+
id=f"job-{uuid.uuid4().hex[:8]}",
|
|
71
|
+
type=source_type,
|
|
72
|
+
source=source,
|
|
73
|
+
title=title or f"{source_type}: {source[:60]}",
|
|
74
|
+
status="queued",
|
|
75
|
+
created_at=datetime.now().isoformat(),
|
|
76
|
+
)
|
|
77
|
+
with self._conn() as conn:
|
|
78
|
+
conn.execute(
|
|
79
|
+
"INSERT INTO jobs (id, type, source, title, status, progress, message, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
|
80
|
+
(job.id, job.type, job.source, job.title, job.status, 0, "Queued", job.created_at),
|
|
81
|
+
)
|
|
82
|
+
return job
|
|
83
|
+
|
|
84
|
+
def get(self, job_id: str) -> Optional[Job]:
|
|
85
|
+
with self._conn() as conn:
|
|
86
|
+
row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
|
|
87
|
+
if not row:
|
|
88
|
+
return None
|
|
89
|
+
return Job(**dict(row))
|
|
90
|
+
|
|
91
|
+
def update_progress(self, job_id: str, progress: int, message: str, status: str = "processing") -> None:
|
|
92
|
+
with self._conn() as conn:
|
|
93
|
+
conn.execute(
|
|
94
|
+
"UPDATE jobs SET progress = ?, message = ?, status = ? WHERE id = ?",
|
|
95
|
+
(progress, message, status, job_id),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def start(self, job_id: str) -> None:
|
|
99
|
+
with self._conn() as conn:
|
|
100
|
+
conn.execute(
|
|
101
|
+
"UPDATE jobs SET status = 'processing', started_at = ? WHERE id = ?",
|
|
102
|
+
(datetime.now().isoformat(), job_id),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def complete(self, job_id: str, chunks_created: int = 0, media_path: str = "") -> None:
|
|
106
|
+
with self._conn() as conn:
|
|
107
|
+
conn.execute(
|
|
108
|
+
"UPDATE jobs SET status = 'completed', progress = 100, message = 'Done', chunks_created = ?, media_path = ?, completed_at = ? WHERE id = ?",
|
|
109
|
+
(chunks_created, media_path, datetime.now().isoformat(), job_id),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def fail(self, job_id: str, error: str) -> None:
|
|
113
|
+
with self._conn() as conn:
|
|
114
|
+
conn.execute(
|
|
115
|
+
"UPDATE jobs SET status = 'failed', error = ?, completed_at = ? WHERE id = ?",
|
|
116
|
+
(error, datetime.now().isoformat(), job_id),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def cancel(self, job_id: str) -> bool:
|
|
120
|
+
with self._conn() as conn:
|
|
121
|
+
result = conn.execute(
|
|
122
|
+
"UPDATE jobs SET status = 'cancelled', completed_at = ? WHERE id = ? AND status = 'queued'",
|
|
123
|
+
(datetime.now().isoformat(), job_id),
|
|
124
|
+
)
|
|
125
|
+
return result.rowcount > 0
|
|
126
|
+
|
|
127
|
+
def list_all(self, limit: int = 50) -> list[Job]:
|
|
128
|
+
with self._conn() as conn:
|
|
129
|
+
rows = conn.execute(
|
|
130
|
+
"SELECT * FROM jobs ORDER BY created_at DESC LIMIT ?", (limit,)
|
|
131
|
+
).fetchall()
|
|
132
|
+
return [Job(**dict(r)) for r in rows]
|
|
133
|
+
|
|
134
|
+
def list_active(self) -> list[Job]:
|
|
135
|
+
with self._conn() as conn:
|
|
136
|
+
rows = conn.execute(
|
|
137
|
+
"SELECT * FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding') ORDER BY created_at ASC"
|
|
138
|
+
).fetchall()
|
|
139
|
+
return [Job(**dict(r)) for r in rows]
|
|
140
|
+
|
|
141
|
+
def list_by_status(self, status: str, limit: int = 50) -> list[Job]:
|
|
142
|
+
with self._conn() as conn:
|
|
143
|
+
rows = conn.execute(
|
|
144
|
+
"SELECT * FROM jobs WHERE status = ? ORDER BY created_at DESC LIMIT ?", (status, limit)
|
|
145
|
+
).fetchall()
|
|
146
|
+
return [Job(**dict(r)) for r in rows]
|
|
147
|
+
|
|
148
|
+
def summary(self) -> dict:
|
|
149
|
+
with self._conn() as conn:
|
|
150
|
+
total = conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
|
|
151
|
+
active = conn.execute("SELECT COUNT(*) FROM jobs WHERE status IN ('queued', 'processing', 'downloading', 'transcribing', 'embedding')").fetchone()[0]
|
|
152
|
+
completed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'completed'").fetchone()[0]
|
|
153
|
+
failed = conn.execute("SELECT COUNT(*) FROM jobs WHERE status = 'failed'").fetchone()[0]
|
|
154
|
+
total_chunks = conn.execute("SELECT COALESCE(SUM(chunks_created), 0) FROM jobs WHERE status = 'completed'").fetchone()[0]
|
|
155
|
+
return {
|
|
156
|
+
"total": total,
|
|
157
|
+
"active": active,
|
|
158
|
+
"completed": completed,
|
|
159
|
+
"failed": failed,
|
|
160
|
+
"total_chunks": total_chunks,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
def clear_completed(self, keep_last: int = 20) -> int:
|
|
164
|
+
with self._conn() as conn:
|
|
165
|
+
rows = conn.execute(
|
|
166
|
+
"SELECT id FROM jobs WHERE status IN ('completed', 'failed', 'cancelled') ORDER BY completed_at DESC"
|
|
167
|
+
).fetchall()
|
|
168
|
+
to_delete = [r["id"] for r in rows[keep_last:]]
|
|
169
|
+
if to_delete:
|
|
170
|
+
placeholders = ",".join("?" * len(to_delete))
|
|
171
|
+
conn.execute(f"DELETE FROM jobs WHERE id IN ({placeholders})", to_delete)
|
|
172
|
+
return len(to_delete)
|
|
Binary file
|
package/core/knowledge/ingest.py
CHANGED
|
@@ -159,15 +159,33 @@ class IngestEngine:
|
|
|
159
159
|
)
|
|
160
160
|
|
|
161
161
|
def _process_youtube(self, url: str, progress: ProgressCallback) -> tuple[str, str]:
|
|
162
|
-
"""Download YouTube video and transcribe audio.
|
|
162
|
+
"""Download YouTube video and transcribe audio.
|
|
163
|
+
|
|
164
|
+
5 distinct phases with clear progress:
|
|
165
|
+
Phase 1: Fetch video info (0-5%)
|
|
166
|
+
Phase 2: Download video (5-25%)
|
|
167
|
+
Phase 3: Extract audio (25-35%)
|
|
168
|
+
Phase 4: Transcribe audio (35-65%)
|
|
169
|
+
Phase 5: Return text for chunking/indexing (handled by caller, 75-100%)
|
|
170
|
+
"""
|
|
163
171
|
try:
|
|
164
172
|
import yt_dlp
|
|
165
173
|
except ImportError:
|
|
166
174
|
raise RuntimeError("yt-dlp not installed. Run: pip install yt-dlp")
|
|
167
175
|
|
|
168
|
-
|
|
176
|
+
# === Phase 1: Fetch video info ===
|
|
177
|
+
progress(2, "Phase 1/4 — Fetching video info...")
|
|
178
|
+
try:
|
|
179
|
+
with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
|
|
180
|
+
info = ydl.extract_info(url, download=False)
|
|
181
|
+
title = info.get("title", "YouTube Video")
|
|
182
|
+
duration = info.get("duration", 0)
|
|
183
|
+
progress(5, f"Phase 1/4 — Found: {title} ({duration}s)")
|
|
184
|
+
except Exception as e:
|
|
185
|
+
raise RuntimeError(f"YouTube access failed: {str(e)[:200]}")
|
|
169
186
|
|
|
170
|
-
# Download audio
|
|
187
|
+
# === Phase 2: Download video + extract audio ===
|
|
188
|
+
progress(8, f"Phase 2/4 — Downloading video...")
|
|
171
189
|
audio_path = str(self._media_dir / "yt_audio.wav")
|
|
172
190
|
ydl_opts = {
|
|
173
191
|
"format": "bestaudio/best",
|
|
@@ -179,21 +197,50 @@ class IngestEngine:
|
|
|
179
197
|
}],
|
|
180
198
|
"quiet": True,
|
|
181
199
|
"no_warnings": True,
|
|
200
|
+
"progress_hooks": [lambda d: progress(
|
|
201
|
+
8 + int((d.get("downloaded_bytes", 0) / max(d.get("total_bytes", 1), 1)) * 17),
|
|
202
|
+
f"Phase 2/4 — Downloading... {d.get('_percent_str', '').strip()}"
|
|
203
|
+
) if d.get("status") == "downloading" else None],
|
|
182
204
|
}
|
|
183
205
|
|
|
184
|
-
progress(10, "Downloading audio...")
|
|
185
206
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
progress(
|
|
207
|
+
ydl.extract_info(url, download=True)
|
|
208
|
+
|
|
209
|
+
# === Phase 3: Extract audio (FFmpeg post-processing) ===
|
|
210
|
+
progress(28, "Phase 3/4 — Extracting audio from video...")
|
|
211
|
+
|
|
212
|
+
# Verify audio file exists
|
|
213
|
+
if not os.path.exists(audio_path):
|
|
214
|
+
# Try to find the downloaded file with different extension
|
|
215
|
+
for ext in ["wav", "m4a", "webm", "mp3", "opus"]:
|
|
216
|
+
alt = str(self._media_dir / f"yt_audio.{ext}")
|
|
217
|
+
if os.path.exists(alt):
|
|
218
|
+
audio_path = alt
|
|
219
|
+
break
|
|
220
|
+
else:
|
|
221
|
+
raise RuntimeError("Audio extraction failed — no output file found")
|
|
222
|
+
|
|
223
|
+
audio_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
|
|
224
|
+
progress(35, f"Phase 3/4 — Audio extracted ({audio_size_mb:.1f} MB)")
|
|
225
|
+
|
|
226
|
+
# === Phase 4: Transcribe audio ===
|
|
227
|
+
progress(38, "Phase 4/4 — Transcribing audio (this may take a while)...")
|
|
190
228
|
text = self._transcribe_audio(audio_path)
|
|
191
229
|
|
|
192
|
-
|
|
230
|
+
if not text or len(text.strip()) < 20:
|
|
231
|
+
raise RuntimeError("Transcription produced no usable text")
|
|
232
|
+
|
|
233
|
+
word_count = len(text.split())
|
|
234
|
+
progress(70, f"Phase 4/4 — Transcribed: {word_count} words")
|
|
235
|
+
|
|
236
|
+
# Rename audio to include title for easy identification
|
|
237
|
+
safe_title = "".join(c if c.isalnum() or c in " -_" else "" for c in title)[:50].strip()
|
|
238
|
+
final_audio = self._media_dir / f"{safe_title}.wav"
|
|
193
239
|
try:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
240
|
+
import shutil
|
|
241
|
+
shutil.move(audio_path, str(final_audio))
|
|
242
|
+
except Exception:
|
|
243
|
+
final_audio = Path(audio_path)
|
|
197
244
|
|
|
198
245
|
return text, title
|
|
199
246
|
|
package/package.json
CHANGED