arkaos 3.78.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -30
- package/VERSION +1 -1
- package/arka/SKILL.md +2 -2
- package/config/agent-allowlists/laravel.yaml +1 -0
- package/config/agent-allowlists/node.yaml +1 -0
- package/config/agent-allowlists/nuxt.yaml +1 -0
- package/config/agent-allowlists/python.yaml +1 -0
- package/core/agents/__pycache__/registry_gen.cpython-313.pyc +0 -0
- package/core/agents/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/agents/registry_gen.py +6 -1
- package/core/agents/schema.py +4 -0
- package/core/cognition/__pycache__/reorganizer.cpython-313.pyc +0 -0
- package/core/cognition/reorganizer.py +37 -7
- package/core/governance/__pycache__/design_system_lint.cpython-313.pyc +0 -0
- package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/agent_match.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/sources.cpython-313.pyc +0 -0
- package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
- package/core/knowledge/agent_match.py +114 -0
- package/core/knowledge/chunker.py +45 -0
- package/core/knowledge/ingest.py +156 -78
- package/core/knowledge/sources.py +138 -0
- package/core/knowledge/vector_store.py +52 -0
- package/core/squads/__pycache__/loader.cpython-313.pyc +0 -0
- package/core/squads/loader.py +25 -0
- package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc +0 -0
- package/core/sync/agent_provisioner.py +19 -8
- package/dashboard/app/components/KnowledgeSourcesList.vue +40 -13
- package/dashboard/app/pages/cognition.vue +9 -4
- package/dashboard/app/pages/knowledge/[id].vue +669 -0
- package/dashboard/app/pages/knowledge/index.vue +1281 -0
- package/dashboard/app/types/index.d.ts +1 -1
- package/departments/brand/agents/ux-designer.yaml +15 -1
- package/departments/brand/agents/ux-researcher.yaml +73 -0
- package/departments/brand/agents/ux-strategist.yaml +72 -0
- package/departments/dev/agents/ai-engineering/ai-engineering-lead.yaml +76 -0
- package/departments/dev/agents/architect.yaml +9 -3
- package/departments/dev/agents/backend-core/laravel-eng.yaml +76 -0
- package/departments/dev/agents/backend-core/node-ts-eng.yaml +76 -0
- package/departments/dev/agents/backend-core/python-eng.yaml +76 -0
- package/departments/dev/agents/backend-dev.yaml +10 -4
- package/departments/dev/agents/data-platform/etl-eng.yaml +74 -0
- package/departments/dev/agents/dba.yaml +7 -3
- package/departments/dev/references/backend-knowledge-and-tools.md +70 -0
- package/departments/ecom/agents/retention-manager.yaml +13 -1
- package/departments/leadership/agents/culture-coach.yaml +20 -0
- package/departments/leadership/agents/hr-specialist.yaml +18 -0
- package/departments/leadership/agents/leadership-director.yaml +10 -0
- package/departments/org/agents/chief-of-staff.yaml +76 -0
- package/departments/org/agents/coo.yaml +11 -0
- package/departments/org/agents/okr-steward.yaml +71 -0
- package/departments/org/agents/org-designer.yaml +23 -0
- package/departments/org/skills/okr-cadence/SKILL.md +34 -0
- package/departments/org/skills/principles-audit/SKILL.md +36 -0
- package/departments/pm/agents/pm-director.yaml +21 -8
- package/departments/pm/agents/product-owner.yaml +24 -2
- package/departments/pm/agents/scrum-master.yaml +21 -0
- package/departments/pm/agents/strategic-pm.yaml +72 -0
- package/departments/pm/skills/discovery-plan/SKILL.md +7 -1
- package/departments/quality/agents/cqo.yaml +8 -0
- package/departments/saas/agents/cs-manager.yaml +19 -2
- package/departments/saas/agents/growth-engineer.yaml +14 -1
- package/departments/saas/agents/metrics-analyst.yaml +17 -1
- package/departments/saas/agents/revops-lead.yaml +73 -0
- package/departments/saas/skills/leaky-bucket/SKILL.md +28 -0
- package/departments/saas/skills/voc-loop/SKILL.md +29 -0
- package/departments/sales/agents/sales-director.yaml +9 -0
- package/departments/sales/agents/sdr.yaml +72 -0
- package/departments/strategy/agents/decision-quality.yaml +72 -0
- package/departments/strategy/agents/strategy-director.yaml +13 -0
- package/departments/strategy/skills/premortem/SKILL.md +33 -0
- package/knowledge/agents-registry-v2.json +1218 -78
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
- package/scripts/bench/__init__.py +5 -0
- package/scripts/bench/__pycache__/__init__.cpython-313.pyc +0 -0
- package/scripts/bench/__pycache__/harness.cpython-313.pyc +0 -0
- package/scripts/bench/__pycache__/run.cpython-313.pyc +0 -0
- package/scripts/bench/harness.py +138 -0
- package/scripts/bench/run.py +136 -0
- package/scripts/dashboard-api.py +376 -13
- package/scripts/tools/__pycache__/docs_stats.cpython-313.pyc +0 -0
- package/scripts/tools/docs_stats.py +154 -0
- package/dashboard/app/pages/knowledge.vue +0 -918
package/package.json
CHANGED
package/pyproject.toml
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""ArkaOS benchmark harness -- core engine measurements.
|
|
2
|
+
|
|
3
|
+
Three honest measurements:
|
|
4
|
+
|
|
5
|
+
1. Synapse injection latency (engine-only, no vector store) -- cold vs warm,
|
|
6
|
+
plus per-layer compute time so the "cached layers are sub-millisecond"
|
|
7
|
+
claim can be verified against the "full engine costs N ms" reality.
|
|
8
|
+
2. Subagent handoff artifact size -- measured token estimate vs the documented
|
|
9
|
+
~379-token claim.
|
|
10
|
+
3. Routing accuracy -- DepartmentLayer keyword detection over a fixed labelled
|
|
11
|
+
prompt set.
|
|
12
|
+
|
|
13
|
+
All numbers are reproducible. Timings vary by machine; routing accuracy and
|
|
14
|
+
handoff sizes are deterministic.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import statistics
|
|
19
|
+
import sys
|
|
20
|
+
import time
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Callable
|
|
23
|
+
|
|
24
|
+
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
25
|
+
if str(_REPO_ROOT) not in sys.path:
|
|
26
|
+
sys.path.insert(0, str(_REPO_ROOT))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _percentiles(samples_ms: list[float]) -> dict:
|
|
30
|
+
"""Summarise a list of millisecond samples."""
|
|
31
|
+
ordered = sorted(samples_ms)
|
|
32
|
+
return {
|
|
33
|
+
"runs": len(ordered),
|
|
34
|
+
"min": round(ordered[0], 3),
|
|
35
|
+
"p50": round(statistics.median(ordered), 3),
|
|
36
|
+
"mean": round(statistics.mean(ordered), 3),
|
|
37
|
+
"max": round(ordered[-1], 3),
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _time_call(fn: Callable[[], object]) -> float:
|
|
42
|
+
"""Time a single call, return elapsed milliseconds."""
|
|
43
|
+
start = time.perf_counter()
|
|
44
|
+
fn()
|
|
45
|
+
return (time.perf_counter() - start) * 1000.0
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def bench_synapse_latency(runs: int = 50) -> dict:
|
|
49
|
+
"""Measure Synapse engine injection latency (cold vs warm) + per-layer ms."""
|
|
50
|
+
from core.synapse.engine import create_default_engine
|
|
51
|
+
from core.synapse.layers import PromptContext
|
|
52
|
+
|
|
53
|
+
engine = create_default_engine()
|
|
54
|
+
ctx = PromptContext(
|
|
55
|
+
user_input="fix the authentication bug in the login controller",
|
|
56
|
+
cwd="/tmp/project", git_branch="feat/auth", project_name="demo",
|
|
57
|
+
project_stack="laravel 11", active_agent="backend-dev",
|
|
58
|
+
)
|
|
59
|
+
cold = [_time_call(lambda: (engine.clear_cache(), engine.inject(ctx))) for _ in range(runs)]
|
|
60
|
+
engine.inject(ctx) # warm the cache
|
|
61
|
+
warm = [_time_call(lambda: engine.inject(ctx)) for _ in range(runs)]
|
|
62
|
+
last = engine.metrics[-1] if engine.metrics else {}
|
|
63
|
+
profile = {
|
|
64
|
+
"layers_computed": last.get("layers_computed"),
|
|
65
|
+
"layers_skipped": last.get("layers_skipped"),
|
|
66
|
+
"tokens_injected": last.get("tokens_injected"),
|
|
67
|
+
}
|
|
68
|
+
return {"layer_count": engine.layer_count,
|
|
69
|
+
"cold_ms": _percentiles(cold), "warm_ms": _percentiles(warm),
|
|
70
|
+
"injection_profile": profile}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def bench_subagent_handoff() -> dict:
|
|
74
|
+
"""Measure a representative handoff artifact's token estimate."""
|
|
75
|
+
from core.runtime.subagent import HandoffArtifact
|
|
76
|
+
|
|
77
|
+
artifact = HandoffArtifact(
|
|
78
|
+
task_id="task-0042",
|
|
79
|
+
task_description="Implement Stripe subscription billing with idempotent webhooks",
|
|
80
|
+
agent_id="backend-dev", agent_role="Senior Backend Developer",
|
|
81
|
+
agent_disc="D:80 I:50 S:45 C:78", department="dev",
|
|
82
|
+
relevant_files=["app/Services/BillingService.php",
|
|
83
|
+
"app/Http/Controllers/WebhookController.php",
|
|
84
|
+
"tests/Feature/BillingTest.php"],
|
|
85
|
+
context_summary=("Laravel 11 app, Cashier installed. Customer model has "
|
|
86
|
+
"stripe_id. Need tiered pricing with volume discounts."),
|
|
87
|
+
constraints=["SOLID + Services/Repositories", "Feature tests required",
|
|
88
|
+
"Idempotent webhook handling"],
|
|
89
|
+
expected_output="Tested, secure billing implementation with passing suite",
|
|
90
|
+
quality_criteria=["80%+ coverage", "OWASP reviewed", "Conventional commits"],
|
|
91
|
+
)
|
|
92
|
+
return {"documented_claim": 379,
|
|
93
|
+
"measured_tokens": artifact.estimated_tokens,
|
|
94
|
+
"prompt_chars": len(artifact.to_prompt())}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# Fixed labelled prompt set for routing accuracy. (prompt, expected_department)
|
|
98
|
+
_ROUTING_SET: list[tuple[str, str]] = [
|
|
99
|
+
("fix the authentication bug in the login controller", "dev"),
|
|
100
|
+
("refactor the payment service and add unit tests", "dev"),
|
|
101
|
+
("create a go-to-market plan for our new SaaS", "saas"),
|
|
102
|
+
("design a brand identity with logo and color palette", "brand"),
|
|
103
|
+
("write viral content hooks for our TikTok channel", "content"),
|
|
104
|
+
("build a high-converting landing page funnel", "landing"),
|
|
105
|
+
("audit our online store conversion rate", "ecom"),
|
|
106
|
+
("model our Q3 budget and cash flow forecast", "finance"),
|
|
107
|
+
("run a competitive analysis with Porter's Five Forces", "strategy"),
|
|
108
|
+
("plan the next sprint and groom the backlog", "pm"),
|
|
109
|
+
("set up an SEO and paid ads growth campaign", "marketing"),
|
|
110
|
+
("automate our client onboarding with an SOP", "ops"),
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def bench_routing_accuracy() -> dict:
|
|
115
|
+
"""Measure DepartmentLayer keyword routing over the labelled prompt set."""
|
|
116
|
+
from core.synapse.layers import DepartmentLayer, PromptContext
|
|
117
|
+
|
|
118
|
+
layer = DepartmentLayer()
|
|
119
|
+
hits, details = 0, []
|
|
120
|
+
for prompt, expected in _ROUTING_SET:
|
|
121
|
+
result = layer.compute(PromptContext(user_input=prompt))
|
|
122
|
+
detected = (result.content or "").strip()
|
|
123
|
+
ok = detected == expected
|
|
124
|
+
hits += int(ok)
|
|
125
|
+
details.append({"prompt": prompt, "expected": expected,
|
|
126
|
+
"detected": detected or "(none)", "ok": ok})
|
|
127
|
+
total = len(_ROUTING_SET)
|
|
128
|
+
return {"total": total, "correct": hits,
|
|
129
|
+
"accuracy_pct": round(100.0 * hits / total, 1), "details": details}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def run_all(runs: int = 50) -> dict:
|
|
133
|
+
"""Run every benchmark and return a combined result dict."""
|
|
134
|
+
return {
|
|
135
|
+
"synapse_latency": bench_synapse_latency(runs=runs),
|
|
136
|
+
"subagent_handoff": bench_subagent_handoff(),
|
|
137
|
+
"routing_accuracy": bench_routing_accuracy(),
|
|
138
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Run the ArkaOS benchmark harness and persist results.
|
|
3
|
+
|
|
4
|
+
Writes:
|
|
5
|
+
- benchmarks/results.json -- machine-readable, consumed by the wiki
|
|
6
|
+
- benchmarks/results.md -- human-readable summary table
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/bench/run.py # default 50 runs
|
|
10
|
+
python scripts/bench/run.py --runs 100
|
|
11
|
+
python scripts/bench/run.py --runs 30 --no-write # print only
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
import platform
|
|
18
|
+
import sys
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
22
|
+
if str(_REPO_ROOT) not in sys.path:
|
|
23
|
+
sys.path.insert(0, str(_REPO_ROOT))
|
|
24
|
+
|
|
25
|
+
from scripts.bench import harness # noqa: E402
|
|
26
|
+
|
|
27
|
+
_OUT_DIR = _REPO_ROOT / "benchmarks"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _environment() -> dict:
|
|
31
|
+
"""Capture the machine environment (numbers are machine-relative)."""
|
|
32
|
+
return {
|
|
33
|
+
"python": platform.python_version(),
|
|
34
|
+
"platform": platform.platform(),
|
|
35
|
+
"machine": platform.machine(),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _synapse_section(sl: dict) -> list[str]:
|
|
40
|
+
"""Render the Synapse latency section."""
|
|
41
|
+
prof = sl["injection_profile"]
|
|
42
|
+
return [
|
|
43
|
+
"## Synapse context injection (engine-only, no vector store)",
|
|
44
|
+
"",
|
|
45
|
+
f"- Registered layers: **{sl['layer_count']}**",
|
|
46
|
+
f"- Cold injection (cache cleared each run): "
|
|
47
|
+
f"p50 **{sl['cold_ms']['p50']} ms**, mean {sl['cold_ms']['mean']} ms, "
|
|
48
|
+
f"min {sl['cold_ms']['min']} ms, max {sl['cold_ms']['max']} ms "
|
|
49
|
+
f"({sl['cold_ms']['runs']} runs)",
|
|
50
|
+
f"- Warm injection (cached): "
|
|
51
|
+
f"p50 **{sl['warm_ms']['p50']} ms**, mean {sl['warm_ms']['mean']} ms "
|
|
52
|
+
f"({sl['warm_ms']['runs']} runs)",
|
|
53
|
+
"- The small cold/warm delta is expected: cacheable layers are a "
|
|
54
|
+
"minority of total compute, so warming the cache saves only a few ms.",
|
|
55
|
+
f"- Representative injection: {prof['layers_computed']} layers computed, "
|
|
56
|
+
f"{prof['layers_skipped']} skipped, {prof['tokens_injected']} tokens injected",
|
|
57
|
+
"",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _handoff_section(ho: dict) -> list[str]:
|
|
62
|
+
"""Render the subagent handoff section."""
|
|
63
|
+
return [
|
|
64
|
+
"## Subagent handoff artifact",
|
|
65
|
+
"",
|
|
66
|
+
f"- Measured (representative artifact): **{ho['measured_tokens']} word-tokens** "
|
|
67
|
+
f"({ho['prompt_chars']} chars). 'word-tokens' is a whitespace-split estimate, "
|
|
68
|
+
"not a BPE tokenizer count.",
|
|
69
|
+
f"- Previously documented claim: {ho['documented_claim']}",
|
|
70
|
+
"",
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _routing_section(ra: dict) -> list[str]:
|
|
75
|
+
"""Render the routing accuracy section + table."""
|
|
76
|
+
out = [
|
|
77
|
+
"## Routing accuracy (DepartmentLayer keyword detection)",
|
|
78
|
+
"",
|
|
79
|
+
f"- **{ra['correct']}/{ra['total']} = {ra['accuracy_pct']}%** on a fixed "
|
|
80
|
+
"labelled prompt set",
|
|
81
|
+
"",
|
|
82
|
+
"| Prompt | Expected | Detected | OK |",
|
|
83
|
+
"|---|---|---|:--:|",
|
|
84
|
+
]
|
|
85
|
+
for d in ra["details"]:
|
|
86
|
+
mark = "yes" if d["ok"] else "no"
|
|
87
|
+
prompt = d["prompt"] if len(d["prompt"]) <= 48 else d["prompt"][:45] + "..."
|
|
88
|
+
out.append(f"| {prompt} | {d['expected']} | {d['detected']} | {mark} |")
|
|
89
|
+
out.append("")
|
|
90
|
+
return out
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def render_markdown(results: dict, env: dict) -> str:
|
|
94
|
+
"""Render a human-readable benchmark summary."""
|
|
95
|
+
header = [
|
|
96
|
+
"# ArkaOS Benchmarks",
|
|
97
|
+
"",
|
|
98
|
+
"> Generated by `python scripts/bench/run.py`. Timings are "
|
|
99
|
+
"machine-relative; routing accuracy and handoff size are deterministic.",
|
|
100
|
+
"",
|
|
101
|
+
f"**Environment:** Python {env['python']} - {env['platform']}",
|
|
102
|
+
"",
|
|
103
|
+
]
|
|
104
|
+
return "\n".join(header
|
|
105
|
+
+ _synapse_section(results["synapse_latency"])
|
|
106
|
+
+ _handoff_section(results["subagent_handoff"])
|
|
107
|
+
+ _routing_section(results["routing_accuracy"]))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def main() -> int:
|
|
111
|
+
"""Entry point."""
|
|
112
|
+
parser = argparse.ArgumentParser(description="Run ArkaOS benchmarks")
|
|
113
|
+
parser.add_argument("--runs", type=int, default=50, help="Latency samples (default 50)")
|
|
114
|
+
parser.add_argument("--no-write", action="store_true", help="Print only, do not write files")
|
|
115
|
+
args = parser.parse_args()
|
|
116
|
+
|
|
117
|
+
env = _environment()
|
|
118
|
+
results = harness.run_all(runs=args.runs)
|
|
119
|
+
payload = {"environment": env, "results": results}
|
|
120
|
+
md = render_markdown(results, env)
|
|
121
|
+
|
|
122
|
+
if args.no_write:
|
|
123
|
+
print(json.dumps(payload, indent=2))
|
|
124
|
+
print("\n" + md)
|
|
125
|
+
return 0
|
|
126
|
+
|
|
127
|
+
_OUT_DIR.mkdir(exist_ok=True)
|
|
128
|
+
(_OUT_DIR / "results.json").write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
129
|
+
(_OUT_DIR / "results.md").write_text(md + "\n", encoding="utf-8")
|
|
130
|
+
print(f"Wrote {_OUT_DIR / 'results.json'} and {_OUT_DIR / 'results.md'}")
|
|
131
|
+
print("\n" + md)
|
|
132
|
+
return 0
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
if __name__ == "__main__":
|
|
136
|
+
sys.exit(main())
|