arkaos 2.73.0 → 2.75.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/config/hooks/_lib/workflow-classifier.sh +8 -1
- package/core/personas/__pycache__/builder.cpython-313.pyc +0 -0
- package/core/personas/builder.py +230 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
- package/scripts/dashboard-api.py +42 -0
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.
|
|
1
|
+
2.75.0
|
|
@@ -33,7 +33,14 @@ arka_wf_safe_session_id() {
|
|
|
33
33
|
|
|
34
34
|
# Verb + noun patterns shared with the original inline classifier in
|
|
35
35
|
# user-prompt-submit.sh. Keep in sync when adding new intent verbs.
|
|
36
|
-
|
|
36
|
+
#
|
|
37
|
+
# PR58 v2.75.0 — pattern widened with continuation + ship verbs after
|
|
38
|
+
# telemetry analysis showed 97% of prompts in a 30h continuous-build
|
|
39
|
+
# session were classified "classifier-did-not-match". Most of the
|
|
40
|
+
# missed prompts were short continuations ("continua", "força") or
|
|
41
|
+
# ship-tier verbs ("ship", "publish", "merge", "release", "deploy")
|
|
42
|
+
# that prolong existing flow-required activity.
|
|
43
|
+
ARKA_WF_VERB_PATTERN='(criar?|crie[ms]?|cria[mr]?|adicionar?|adiciona[mr]?|implementar?|implementa[mr]?|desenvolver?|desenvolve[mr]?|construir?|constru[ií]a?[mr]?|fazer?|faz[ae]?[mr]?|refactor(izar?)?|corrigir?|corrige[mr]?|consertar?|conserta[mr]?|continuar?|continua[mr]?|forçar?|força[mr]?|colocar?|coloca[mr]?|p[oô]r|melhorar?|melhora[mr]?|terminar?|termina[mr]?|acabar?|acaba[mr]?|publicar?|publica[mr]?|lançar?|lança[mr]?|create[sd]?|creating|build(s|ing)?|add(s|ed|ing)?|implement(s|ed|ing)?|develop(s|ed|ing)?|fix(es|ed|ing)?|refactor(s|ed|ing)?|make[sd]?|making|continue[sd]?|continuing|ship(s|ped|ping)?|merge[sd]?|merging|publish(es|ed|ing)?|release[sd]?|releasing|deploy(s|ed|ing)?|finish(es|ed|ing)?|improve[sd]?|improving)'
|
|
37
44
|
|
|
38
45
|
# Classify: returns "true" if the prompt looks like a creation/
|
|
39
46
|
# implementation/modification request, "false" otherwise.
|
|
Binary file
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""AI-powered persona builder (PR57 v2.74.0).
|
|
2
|
+
|
|
3
|
+
Generates a draft Persona from already-indexed content in the vector
|
|
4
|
+
store. The user ingests sources (YouTube transcripts, articles, PDFs)
|
|
5
|
+
via the knowledge dashboard, then the builder:
|
|
6
|
+
|
|
7
|
+
1. Searches the vector store for chunks about the target person/topic.
|
|
8
|
+
2. Sends those chunks to the configured LLM via the multi-backend
|
|
9
|
+
`LLMProvider` (Claude Code subagent / Anthropic API / Ollama local).
|
|
10
|
+
3. Parses the LLM's JSON response into a `Persona` draft for the
|
|
11
|
+
operator to review and edit before saving.
|
|
12
|
+
|
|
13
|
+
The builder NEVER writes to the database — that's the existing
|
|
14
|
+
`PersonaManager.create()` path. The builder produces a draft; the
|
|
15
|
+
operator owns the persist decision (per the project memory's
|
|
16
|
+
"Generated persona presented for review" step).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import re
|
|
23
|
+
import uuid
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from datetime import datetime, timezone
|
|
26
|
+
|
|
27
|
+
from core.knowledge.vector_store import VectorStore
|
|
28
|
+
from core.personas.schema import (
|
|
29
|
+
Persona,
|
|
30
|
+
PersonaBigFive,
|
|
31
|
+
PersonaCommunication,
|
|
32
|
+
PersonaDISC,
|
|
33
|
+
PersonaEnneagram,
|
|
34
|
+
)
|
|
35
|
+
from core.runtime.llm_provider import LLMProvider, get_llm_provider
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
_PERSONA_SYSTEM_PROMPT = """You build behavioural-DNA personas from quotes
|
|
39
|
+
and writings of real people. Read the supplied content carefully, then
|
|
40
|
+
emit a single JSON object that follows this exact schema. Use ONLY the
|
|
41
|
+
JSON keys listed — no prose, no markdown fences, no extra fields.
|
|
42
|
+
|
|
43
|
+
{
|
|
44
|
+
"title": "<one-line role label>",
|
|
45
|
+
"tagline": "<one-line essence>",
|
|
46
|
+
"disc": {
|
|
47
|
+
"primary": "D|I|S|C",
|
|
48
|
+
"secondary": "D|I|S|C",
|
|
49
|
+
"communication_style": "<one sentence>",
|
|
50
|
+
"under_pressure": "<one sentence>",
|
|
51
|
+
"motivator": "<one sentence>"
|
|
52
|
+
},
|
|
53
|
+
"enneagram": {
|
|
54
|
+
"type": 1-9,
|
|
55
|
+
"wing": 1-9,
|
|
56
|
+
"core_motivation": "<one sentence>",
|
|
57
|
+
"core_fear": "<one sentence>",
|
|
58
|
+
"subtype": "self-preservation|social|sexual"
|
|
59
|
+
},
|
|
60
|
+
"big_five": {
|
|
61
|
+
"openness": 0-100,
|
|
62
|
+
"conscientiousness": 0-100,
|
|
63
|
+
"extraversion": 0-100,
|
|
64
|
+
"agreeableness": 0-100,
|
|
65
|
+
"neuroticism": 0-100
|
|
66
|
+
},
|
|
67
|
+
"mbti": "<4-letter type>",
|
|
68
|
+
"mental_models": ["<model>", ...],
|
|
69
|
+
"expertise_domains": ["<domain>", ...],
|
|
70
|
+
"frameworks": ["<framework>", ...],
|
|
71
|
+
"key_quotes": ["<verbatim quote>", ...],
|
|
72
|
+
"communication": {
|
|
73
|
+
"tone": "<adjective>",
|
|
74
|
+
"vocabulary_level": "lay|specialist|expert",
|
|
75
|
+
"preferred_format": "<format hint>",
|
|
76
|
+
"avoid": ["<phrase to avoid>", ...]
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
If the content is insufficient to infer a field, use the closest neutral
|
|
81
|
+
default rather than fabricating. NEVER invent quotes — only include
|
|
82
|
+
verbatim text that appears in the content."""
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(frozen=True)
|
|
86
|
+
class BuildResult:
|
|
87
|
+
"""Output of a persona-builder run."""
|
|
88
|
+
|
|
89
|
+
persona: Persona
|
|
90
|
+
chunks_used: int
|
|
91
|
+
provider_name: str
|
|
92
|
+
raw_response: str
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class PersonaBuildError(RuntimeError):
|
|
96
|
+
"""Raised when the LLM response can't be parsed into a Persona."""
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class PersonaBuilder:
|
|
100
|
+
"""Generate persona drafts from indexed content."""
|
|
101
|
+
|
|
102
|
+
MAX_CONTEXT_CHARS = 18_000
|
|
103
|
+
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
store: VectorStore,
|
|
107
|
+
provider: LLMProvider | None = None,
|
|
108
|
+
) -> None:
|
|
109
|
+
self._store = store
|
|
110
|
+
self._provider = provider or get_llm_provider()
|
|
111
|
+
|
|
112
|
+
def generate(
|
|
113
|
+
self,
|
|
114
|
+
name: str,
|
|
115
|
+
search_query: str = "",
|
|
116
|
+
top_k: int = 20,
|
|
117
|
+
source_label: str = "",
|
|
118
|
+
) -> BuildResult:
|
|
119
|
+
"""Build a persona draft for `name`.
|
|
120
|
+
|
|
121
|
+
Searches the vector store for `search_query` (defaults to the
|
|
122
|
+
name), truncates the joined chunks to MAX_CONTEXT_CHARS, sends
|
|
123
|
+
them to the configured LLM, parses the JSON response, and
|
|
124
|
+
returns a draft Persona plus telemetry.
|
|
125
|
+
"""
|
|
126
|
+
if not name or not name.strip():
|
|
127
|
+
raise PersonaBuildError("name must not be empty")
|
|
128
|
+
query = (search_query or name).strip()
|
|
129
|
+
chunks = self._store.search(query, top_k=top_k)
|
|
130
|
+
if not chunks:
|
|
131
|
+
raise PersonaBuildError(
|
|
132
|
+
f"no indexed content matches {query!r} — "
|
|
133
|
+
"ingest sources first via /api/knowledge/ingest"
|
|
134
|
+
)
|
|
135
|
+
context = self._compose_context(chunks)
|
|
136
|
+
prompt = f"Person: {name}\n\nContent:\n{context}"
|
|
137
|
+
response = self._provider.complete(
|
|
138
|
+
prompt, system=_PERSONA_SYSTEM_PROMPT, max_tokens=3000,
|
|
139
|
+
)
|
|
140
|
+
persona = self._parse(name, source_label or name, response.text)
|
|
141
|
+
return BuildResult(
|
|
142
|
+
persona=persona,
|
|
143
|
+
chunks_used=len(chunks),
|
|
144
|
+
provider_name=self._provider.name(),
|
|
145
|
+
raw_response=response.text,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def _compose_context(self, chunks: list[dict]) -> str:
|
|
149
|
+
parts: list[str] = []
|
|
150
|
+
total = 0
|
|
151
|
+
for chunk in chunks:
|
|
152
|
+
text = chunk.get("text") or ""
|
|
153
|
+
if not text:
|
|
154
|
+
continue
|
|
155
|
+
if total + len(text) > self.MAX_CONTEXT_CHARS:
|
|
156
|
+
break
|
|
157
|
+
heading = chunk.get("heading") or ""
|
|
158
|
+
block = f"[{heading}]\n{text}" if heading else text
|
|
159
|
+
parts.append(block)
|
|
160
|
+
total += len(block)
|
|
161
|
+
return "\n\n---\n\n".join(parts)
|
|
162
|
+
|
|
163
|
+
def _parse(self, name: str, source_label: str, raw: str) -> Persona:
|
|
164
|
+
data = _extract_json_object(raw)
|
|
165
|
+
if data is None:
|
|
166
|
+
raise PersonaBuildError(
|
|
167
|
+
f"LLM did not return a JSON object; raw response: {raw[:200]!r}"
|
|
168
|
+
)
|
|
169
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
170
|
+
try:
|
|
171
|
+
return Persona(
|
|
172
|
+
id=str(uuid.uuid4()),
|
|
173
|
+
name=name,
|
|
174
|
+
title=str(data.get("title") or ""),
|
|
175
|
+
tagline=str(data.get("tagline") or ""),
|
|
176
|
+
source=source_label,
|
|
177
|
+
disc=PersonaDISC(**(data.get("disc") or {})),
|
|
178
|
+
enneagram=PersonaEnneagram(**(data.get("enneagram") or {})),
|
|
179
|
+
big_five=PersonaBigFive(**(data.get("big_five") or {})),
|
|
180
|
+
mbti=str(data.get("mbti") or "INTJ"),
|
|
181
|
+
mental_models=_as_str_list(data.get("mental_models")),
|
|
182
|
+
expertise_domains=_as_str_list(data.get("expertise_domains")),
|
|
183
|
+
frameworks=_as_str_list(data.get("frameworks")),
|
|
184
|
+
key_quotes=_as_str_list(data.get("key_quotes")),
|
|
185
|
+
communication=PersonaCommunication(
|
|
186
|
+
**(data.get("communication") or {})
|
|
187
|
+
),
|
|
188
|
+
created_at=now,
|
|
189
|
+
updated_at=now,
|
|
190
|
+
)
|
|
191
|
+
except (TypeError, ValueError) as exc:
|
|
192
|
+
raise PersonaBuildError(
|
|
193
|
+
f"LLM JSON does not match Persona schema: {exc}"
|
|
194
|
+
) from exc
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
_JSON_OBJECT_RE = re.compile(r"\{.*\}", re.DOTALL)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _extract_json_object(raw: str) -> dict | None:
|
|
201
|
+
"""Parse the first JSON object in `raw`.
|
|
202
|
+
|
|
203
|
+
Tolerates models that wrap JSON in markdown fences or add a leading
|
|
204
|
+
explanation. Returns None when no parseable object is found.
|
|
205
|
+
"""
|
|
206
|
+
if not raw:
|
|
207
|
+
return None
|
|
208
|
+
candidates = [raw.strip()]
|
|
209
|
+
fence_match = re.search(
|
|
210
|
+
r"```(?:json)?\s*(\{.*?\})\s*```", raw, flags=re.DOTALL,
|
|
211
|
+
)
|
|
212
|
+
if fence_match:
|
|
213
|
+
candidates.insert(0, fence_match.group(1))
|
|
214
|
+
bare_match = _JSON_OBJECT_RE.search(raw)
|
|
215
|
+
if bare_match:
|
|
216
|
+
candidates.append(bare_match.group(0))
|
|
217
|
+
for cand in candidates:
|
|
218
|
+
try:
|
|
219
|
+
obj = json.loads(cand)
|
|
220
|
+
except json.JSONDecodeError:
|
|
221
|
+
continue
|
|
222
|
+
if isinstance(obj, dict):
|
|
223
|
+
return obj
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _as_str_list(value: object) -> list[str]:
|
|
228
|
+
if not isinstance(value, list):
|
|
229
|
+
return []
|
|
230
|
+
return [str(item) for item in value if isinstance(item, (str, int, float))]
|
package/package.json
CHANGED
package/pyproject.toml
CHANGED
|
Binary file
|
package/scripts/dashboard-api.py
CHANGED
|
@@ -687,6 +687,48 @@ def persona_delete(persona_id: str):
|
|
|
687
687
|
return {"error": "Persona not found"}
|
|
688
688
|
|
|
689
689
|
|
|
690
|
+
@app.post("/api/personas/build")
|
|
691
|
+
def persona_build(body: dict):
|
|
692
|
+
"""PR57 v2.74.0 — AI-powered persona draft from already-indexed content.
|
|
693
|
+
|
|
694
|
+
Body: {
|
|
695
|
+
"name": "<person to model>",
|
|
696
|
+
"search_query": "<optional vector search query>",
|
|
697
|
+
"top_k": <optional, default 20>,
|
|
698
|
+
"source_label": "<optional label, e.g. 'Alex Hormozi'>"
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
Returns: {persona: {...draft...}, chunks_used, provider_name}
|
|
702
|
+
The draft is NOT saved — the operator reviews and calls
|
|
703
|
+
POST /api/personas to persist.
|
|
704
|
+
"""
|
|
705
|
+
name = (body.get("name") or "").strip()
|
|
706
|
+
if not name:
|
|
707
|
+
return {"error": "name is required"}
|
|
708
|
+
store = _get_vector_store()
|
|
709
|
+
if not store:
|
|
710
|
+
from core.knowledge.vector_store import VectorStore
|
|
711
|
+
kb_db = Path.home() / ".arkaos" / "knowledge.db"
|
|
712
|
+
kb_db.parent.mkdir(parents=True, exist_ok=True)
|
|
713
|
+
store = VectorStore(kb_db)
|
|
714
|
+
from core.personas.builder import PersonaBuilder, PersonaBuildError
|
|
715
|
+
builder = PersonaBuilder(store)
|
|
716
|
+
try:
|
|
717
|
+
result = builder.generate(
|
|
718
|
+
name=name,
|
|
719
|
+
search_query=body.get("search_query", ""),
|
|
720
|
+
top_k=int(body.get("top_k", 20) or 20),
|
|
721
|
+
source_label=body.get("source_label", ""),
|
|
722
|
+
)
|
|
723
|
+
except PersonaBuildError as exc:
|
|
724
|
+
return {"error": str(exc)}
|
|
725
|
+
return {
|
|
726
|
+
"persona": result.persona.model_dump(),
|
|
727
|
+
"chunks_used": result.chunks_used,
|
|
728
|
+
"provider_name": result.provider_name,
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
|
|
690
732
|
# --- API Keys ---
|
|
691
733
|
|
|
692
734
|
@app.get("/api/keys")
|