arkaos 3.78.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +42 -30
  2. package/VERSION +1 -1
  3. package/arka/SKILL.md +2 -2
  4. package/config/agent-allowlists/laravel.yaml +1 -0
  5. package/config/agent-allowlists/node.yaml +1 -0
  6. package/config/agent-allowlists/nuxt.yaml +1 -0
  7. package/config/agent-allowlists/python.yaml +1 -0
  8. package/core/agents/__pycache__/registry_gen.cpython-313.pyc +0 -0
  9. package/core/agents/__pycache__/schema.cpython-313.pyc +0 -0
  10. package/core/agents/registry_gen.py +6 -1
  11. package/core/agents/schema.py +4 -0
  12. package/core/cognition/__pycache__/reorganizer.cpython-313.pyc +0 -0
  13. package/core/cognition/reorganizer.py +37 -7
  14. package/core/governance/__pycache__/design_system_lint.cpython-313.pyc +0 -0
  15. package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc +0 -0
  16. package/core/knowledge/__pycache__/agent_match.cpython-313.pyc +0 -0
  17. package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
  18. package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
  19. package/core/knowledge/__pycache__/sources.cpython-313.pyc +0 -0
  20. package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
  21. package/core/knowledge/agent_match.py +114 -0
  22. package/core/knowledge/chunker.py +45 -0
  23. package/core/knowledge/ingest.py +156 -78
  24. package/core/knowledge/sources.py +138 -0
  25. package/core/knowledge/vector_store.py +52 -0
  26. package/core/squads/__pycache__/loader.cpython-313.pyc +0 -0
  27. package/core/squads/loader.py +25 -0
  28. package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc +0 -0
  29. package/core/sync/agent_provisioner.py +19 -8
  30. package/dashboard/app/components/KnowledgeSourcesList.vue +40 -13
  31. package/dashboard/app/pages/cognition.vue +9 -4
  32. package/dashboard/app/pages/knowledge/[id].vue +669 -0
  33. package/dashboard/app/pages/knowledge/index.vue +1281 -0
  34. package/dashboard/app/types/index.d.ts +1 -1
  35. package/departments/brand/agents/ux-designer.yaml +15 -1
  36. package/departments/brand/agents/ux-researcher.yaml +73 -0
  37. package/departments/brand/agents/ux-strategist.yaml +72 -0
  38. package/departments/dev/agents/ai-engineering/ai-engineering-lead.yaml +76 -0
  39. package/departments/dev/agents/architect.yaml +9 -3
  40. package/departments/dev/agents/backend-core/laravel-eng.yaml +76 -0
  41. package/departments/dev/agents/backend-core/node-ts-eng.yaml +76 -0
  42. package/departments/dev/agents/backend-core/python-eng.yaml +76 -0
  43. package/departments/dev/agents/backend-dev.yaml +10 -4
  44. package/departments/dev/agents/data-platform/etl-eng.yaml +74 -0
  45. package/departments/dev/agents/dba.yaml +7 -3
  46. package/departments/dev/references/backend-knowledge-and-tools.md +70 -0
  47. package/departments/ecom/agents/retention-manager.yaml +13 -1
  48. package/departments/leadership/agents/culture-coach.yaml +20 -0
  49. package/departments/leadership/agents/hr-specialist.yaml +18 -0
  50. package/departments/leadership/agents/leadership-director.yaml +10 -0
  51. package/departments/org/agents/chief-of-staff.yaml +76 -0
  52. package/departments/org/agents/coo.yaml +11 -0
  53. package/departments/org/agents/okr-steward.yaml +71 -0
  54. package/departments/org/agents/org-designer.yaml +23 -0
  55. package/departments/org/skills/okr-cadence/SKILL.md +34 -0
  56. package/departments/org/skills/principles-audit/SKILL.md +36 -0
  57. package/departments/pm/agents/pm-director.yaml +21 -8
  58. package/departments/pm/agents/product-owner.yaml +24 -2
  59. package/departments/pm/agents/scrum-master.yaml +21 -0
  60. package/departments/pm/agents/strategic-pm.yaml +72 -0
  61. package/departments/pm/skills/discovery-plan/SKILL.md +7 -1
  62. package/departments/quality/agents/cqo.yaml +8 -0
  63. package/departments/saas/agents/cs-manager.yaml +19 -2
  64. package/departments/saas/agents/growth-engineer.yaml +14 -1
  65. package/departments/saas/agents/metrics-analyst.yaml +17 -1
  66. package/departments/saas/agents/revops-lead.yaml +73 -0
  67. package/departments/saas/skills/leaky-bucket/SKILL.md +28 -0
  68. package/departments/saas/skills/voc-loop/SKILL.md +29 -0
  69. package/departments/sales/agents/sales-director.yaml +9 -0
  70. package/departments/sales/agents/sdr.yaml +72 -0
  71. package/departments/strategy/agents/decision-quality.yaml +72 -0
  72. package/departments/strategy/agents/strategy-director.yaml +13 -0
  73. package/departments/strategy/skills/premortem/SKILL.md +33 -0
  74. package/knowledge/agents-registry-v2.json +1218 -78
  75. package/package.json +1 -1
  76. package/pyproject.toml +1 -1
  77. package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
  78. package/scripts/bench/__init__.py +5 -0
  79. package/scripts/bench/__pycache__/__init__.cpython-313.pyc +0 -0
  80. package/scripts/bench/__pycache__/harness.cpython-313.pyc +0 -0
  81. package/scripts/bench/__pycache__/run.cpython-313.pyc +0 -0
  82. package/scripts/bench/harness.py +138 -0
  83. package/scripts/bench/run.py +136 -0
  84. package/scripts/dashboard-api.py +376 -13
  85. package/scripts/tools/__pycache__/docs_stats.cpython-313.pyc +0 -0
  86. package/scripts/tools/docs_stats.py +154 -0
  87. package/dashboard/app/pages/knowledge.vue +0 -918
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arkaos",
3
- "version": "3.78.0",
3
+ "version": "4.0.1",
4
4
  "description": "The Operating System for AI Agent Teams",
5
5
  "type": "module",
6
6
  "bin": {
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "arkaos-core"
3
- version = "3.78.0"
3
+ version = "4.0.1"
4
4
  description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -0,0 +1,5 @@
1
+ """ArkaOS benchmark harness.
2
+
3
+ Reproducible, honest measurements of the core engine. No fabricated numbers:
4
+ every value in the docs comes from running `python scripts/bench/run.py`.
5
+ """
@@ -0,0 +1,138 @@
1
+ """ArkaOS benchmark harness -- core engine measurements.
2
+
3
+ Three honest measurements:
4
+
5
+ 1. Synapse injection latency (engine-only, no vector store) -- cold vs warm,
6
+ plus per-layer compute time so the "cached layers are sub-millisecond"
7
+ claim can be verified against the "full engine costs N ms" reality.
8
+ 2. Subagent handoff artifact size -- measured token estimate vs the documented
9
+ ~379-token claim.
10
+ 3. Routing accuracy -- DepartmentLayer keyword detection over a fixed labelled
11
+ prompt set.
12
+
13
+ All numbers are reproducible. Timings vary by machine; routing accuracy and
14
+ handoff sizes are deterministic.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import statistics
19
+ import sys
20
+ import time
21
+ from pathlib import Path
22
+ from typing import Callable
23
+
24
+ _REPO_ROOT = Path(__file__).resolve().parents[2]
25
+ if str(_REPO_ROOT) not in sys.path:
26
+ sys.path.insert(0, str(_REPO_ROOT))
27
+
28
+
29
+ def _percentiles(samples_ms: list[float]) -> dict:
30
+ """Summarise a list of millisecond samples."""
31
+ ordered = sorted(samples_ms)
32
+ return {
33
+ "runs": len(ordered),
34
+ "min": round(ordered[0], 3),
35
+ "p50": round(statistics.median(ordered), 3),
36
+ "mean": round(statistics.mean(ordered), 3),
37
+ "max": round(ordered[-1], 3),
38
+ }
39
+
40
+
41
+ def _time_call(fn: Callable[[], object]) -> float:
42
+ """Time a single call, return elapsed milliseconds."""
43
+ start = time.perf_counter()
44
+ fn()
45
+ return (time.perf_counter() - start) * 1000.0
46
+
47
+
48
+ def bench_synapse_latency(runs: int = 50) -> dict:
49
+ """Measure Synapse engine injection latency (cold vs warm) + per-layer ms."""
50
+ from core.synapse.engine import create_default_engine
51
+ from core.synapse.layers import PromptContext
52
+
53
+ engine = create_default_engine()
54
+ ctx = PromptContext(
55
+ user_input="fix the authentication bug in the login controller",
56
+ cwd="/tmp/project", git_branch="feat/auth", project_name="demo",
57
+ project_stack="laravel 11", active_agent="backend-dev",
58
+ )
59
+ cold = [_time_call(lambda: (engine.clear_cache(), engine.inject(ctx))) for _ in range(runs)]
60
+ engine.inject(ctx) # warm the cache
61
+ warm = [_time_call(lambda: engine.inject(ctx)) for _ in range(runs)]
62
+ last = engine.metrics[-1] if engine.metrics else {}
63
+ profile = {
64
+ "layers_computed": last.get("layers_computed"),
65
+ "layers_skipped": last.get("layers_skipped"),
66
+ "tokens_injected": last.get("tokens_injected"),
67
+ }
68
+ return {"layer_count": engine.layer_count,
69
+ "cold_ms": _percentiles(cold), "warm_ms": _percentiles(warm),
70
+ "injection_profile": profile}
71
+
72
+
73
+ def bench_subagent_handoff() -> dict:
74
+ """Measure a representative handoff artifact's token estimate."""
75
+ from core.runtime.subagent import HandoffArtifact
76
+
77
+ artifact = HandoffArtifact(
78
+ task_id="task-0042",
79
+ task_description="Implement Stripe subscription billing with idempotent webhooks",
80
+ agent_id="backend-dev", agent_role="Senior Backend Developer",
81
+ agent_disc="D:80 I:50 S:45 C:78", department="dev",
82
+ relevant_files=["app/Services/BillingService.php",
83
+ "app/Http/Controllers/WebhookController.php",
84
+ "tests/Feature/BillingTest.php"],
85
+ context_summary=("Laravel 11 app, Cashier installed. Customer model has "
86
+ "stripe_id. Need tiered pricing with volume discounts."),
87
+ constraints=["SOLID + Services/Repositories", "Feature tests required",
88
+ "Idempotent webhook handling"],
89
+ expected_output="Tested, secure billing implementation with passing suite",
90
+ quality_criteria=["80%+ coverage", "OWASP reviewed", "Conventional commits"],
91
+ )
92
+ return {"documented_claim": 379,
93
+ "measured_tokens": artifact.estimated_tokens,
94
+ "prompt_chars": len(artifact.to_prompt())}
95
+
96
+
97
+ # Fixed labelled prompt set for routing accuracy. (prompt, expected_department)
98
+ _ROUTING_SET: list[tuple[str, str]] = [
99
+ ("fix the authentication bug in the login controller", "dev"),
100
+ ("refactor the payment service and add unit tests", "dev"),
101
+ ("create a go-to-market plan for our new SaaS", "saas"),
102
+ ("design a brand identity with logo and color palette", "brand"),
103
+ ("write viral content hooks for our TikTok channel", "content"),
104
+ ("build a high-converting landing page funnel", "landing"),
105
+ ("audit our online store conversion rate", "ecom"),
106
+ ("model our Q3 budget and cash flow forecast", "finance"),
107
+ ("run a competitive analysis with Porter's Five Forces", "strategy"),
108
+ ("plan the next sprint and groom the backlog", "pm"),
109
+ ("set up an SEO and paid ads growth campaign", "marketing"),
110
+ ("automate our client onboarding with an SOP", "ops"),
111
+ ]
112
+
113
+
114
+ def bench_routing_accuracy() -> dict:
115
+ """Measure DepartmentLayer keyword routing over the labelled prompt set."""
116
+ from core.synapse.layers import DepartmentLayer, PromptContext
117
+
118
+ layer = DepartmentLayer()
119
+ hits, details = 0, []
120
+ for prompt, expected in _ROUTING_SET:
121
+ result = layer.compute(PromptContext(user_input=prompt))
122
+ detected = (result.content or "").strip()
123
+ ok = detected == expected
124
+ hits += int(ok)
125
+ details.append({"prompt": prompt, "expected": expected,
126
+ "detected": detected or "(none)", "ok": ok})
127
+ total = len(_ROUTING_SET)
128
+ return {"total": total, "correct": hits,
129
+ "accuracy_pct": round(100.0 * hits / total, 1), "details": details}
130
+
131
+
132
+ def run_all(runs: int = 50) -> dict:
133
+ """Run every benchmark and return a combined result dict."""
134
+ return {
135
+ "synapse_latency": bench_synapse_latency(runs=runs),
136
+ "subagent_handoff": bench_subagent_handoff(),
137
+ "routing_accuracy": bench_routing_accuracy(),
138
+ }
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env python3
2
+ """Run the ArkaOS benchmark harness and persist results.
3
+
4
+ Writes:
5
+ - benchmarks/results.json -- machine-readable, consumed by the wiki
6
+ - benchmarks/results.md -- human-readable summary table
7
+
8
+ Usage:
9
+ python scripts/bench/run.py # default 50 runs
10
+ python scripts/bench/run.py --runs 100
11
+ python scripts/bench/run.py --runs 30 --no-write # print only
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import json
17
+ import platform
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ _REPO_ROOT = Path(__file__).resolve().parents[2]
22
+ if str(_REPO_ROOT) not in sys.path:
23
+ sys.path.insert(0, str(_REPO_ROOT))
24
+
25
+ from scripts.bench import harness # noqa: E402
26
+
27
+ _OUT_DIR = _REPO_ROOT / "benchmarks"
28
+
29
+
30
+ def _environment() -> dict:
31
+ """Capture the machine environment (numbers are machine-relative)."""
32
+ return {
33
+ "python": platform.python_version(),
34
+ "platform": platform.platform(),
35
+ "machine": platform.machine(),
36
+ }
37
+
38
+
39
+ def _synapse_section(sl: dict) -> list[str]:
40
+ """Render the Synapse latency section."""
41
+ prof = sl["injection_profile"]
42
+ return [
43
+ "## Synapse context injection (engine-only, no vector store)",
44
+ "",
45
+ f"- Registered layers: **{sl['layer_count']}**",
46
+ f"- Cold injection (cache cleared each run): "
47
+ f"p50 **{sl['cold_ms']['p50']} ms**, mean {sl['cold_ms']['mean']} ms, "
48
+ f"min {sl['cold_ms']['min']} ms, max {sl['cold_ms']['max']} ms "
49
+ f"({sl['cold_ms']['runs']} runs)",
50
+ f"- Warm injection (cached): "
51
+ f"p50 **{sl['warm_ms']['p50']} ms**, mean {sl['warm_ms']['mean']} ms "
52
+ f"({sl['warm_ms']['runs']} runs)",
53
+ "- The small cold/warm delta is expected: cacheable layers are a "
54
+ "minority of total compute, so warming the cache saves only a few ms.",
55
+ f"- Representative injection: {prof['layers_computed']} layers computed, "
56
+ f"{prof['layers_skipped']} skipped, {prof['tokens_injected']} tokens injected",
57
+ "",
58
+ ]
59
+
60
+
61
+ def _handoff_section(ho: dict) -> list[str]:
62
+ """Render the subagent handoff section."""
63
+ return [
64
+ "## Subagent handoff artifact",
65
+ "",
66
+ f"- Measured (representative artifact): **{ho['measured_tokens']} word-tokens** "
67
+ f"({ho['prompt_chars']} chars). 'word-tokens' is a whitespace-split estimate, "
68
+ "not a BPE tokenizer count.",
69
+ f"- Previously documented claim: {ho['documented_claim']}",
70
+ "",
71
+ ]
72
+
73
+
74
+ def _routing_section(ra: dict) -> list[str]:
75
+ """Render the routing accuracy section + table."""
76
+ out = [
77
+ "## Routing accuracy (DepartmentLayer keyword detection)",
78
+ "",
79
+ f"- **{ra['correct']}/{ra['total']} = {ra['accuracy_pct']}%** on a fixed "
80
+ "labelled prompt set",
81
+ "",
82
+ "| Prompt | Expected | Detected | OK |",
83
+ "|---|---|---|:--:|",
84
+ ]
85
+ for d in ra["details"]:
86
+ mark = "yes" if d["ok"] else "no"
87
+ prompt = d["prompt"] if len(d["prompt"]) <= 48 else d["prompt"][:45] + "..."
88
+ out.append(f"| {prompt} | {d['expected']} | {d['detected']} | {mark} |")
89
+ out.append("")
90
+ return out
91
+
92
+
93
+ def render_markdown(results: dict, env: dict) -> str:
94
+ """Render a human-readable benchmark summary."""
95
+ header = [
96
+ "# ArkaOS Benchmarks",
97
+ "",
98
+ "> Generated by `python scripts/bench/run.py`. Timings are "
99
+ "machine-relative; routing accuracy and handoff size are deterministic.",
100
+ "",
101
+ f"**Environment:** Python {env['python']} - {env['platform']}",
102
+ "",
103
+ ]
104
+ return "\n".join(header
105
+ + _synapse_section(results["synapse_latency"])
106
+ + _handoff_section(results["subagent_handoff"])
107
+ + _routing_section(results["routing_accuracy"]))
108
+
109
+
110
+ def main() -> int:
111
+ """Entry point."""
112
+ parser = argparse.ArgumentParser(description="Run ArkaOS benchmarks")
113
+ parser.add_argument("--runs", type=int, default=50, help="Latency samples (default 50)")
114
+ parser.add_argument("--no-write", action="store_true", help="Print only, do not write files")
115
+ args = parser.parse_args()
116
+
117
+ env = _environment()
118
+ results = harness.run_all(runs=args.runs)
119
+ payload = {"environment": env, "results": results}
120
+ md = render_markdown(results, env)
121
+
122
+ if args.no_write:
123
+ print(json.dumps(payload, indent=2))
124
+ print("\n" + md)
125
+ return 0
126
+
127
+ _OUT_DIR.mkdir(exist_ok=True)
128
+ (_OUT_DIR / "results.json").write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
129
+ (_OUT_DIR / "results.md").write_text(md + "\n", encoding="utf-8")
130
+ print(f"Wrote {_OUT_DIR / 'results.json'} and {_OUT_DIR / 'results.md'}")
131
+ print("\n" + md)
132
+ return 0
133
+
134
+
135
+ if __name__ == "__main__":
136
+ sys.exit(main())