arkaos 2.73.0 → 2.75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 2.73.0
1
+ 2.75.0
@@ -33,7 +33,14 @@ arka_wf_safe_session_id() {
33
33
 
34
34
  # Verb + noun patterns shared with the original inline classifier in
35
35
  # user-prompt-submit.sh. Keep in sync when adding new intent verbs.
36
- ARKA_WF_VERB_PATTERN='(criar?|crie[ms]?|cria[mr]?|adicionar?|adiciona[mr]?|implementar?|implementa[mr]?|desenvolver?|desenvolve[mr]?|construir?|constru[ií]a?[mr]?|fazer?|faz[ae][mr]?|refactor(izar?)?|corrigir?|corrige[mr]?|consertar?|conserta[mr]?|create[sd]?|creating|build(s|ing)?|add(s|ed|ing)?|implement(s|ed|ing)?|develop(s|ed|ing)?|fix(es|ed|ing)?|refactor(s|ed|ing)?|make[sd]?|making)'
36
+ #
37
+ # PR58 v2.75.0 — pattern widened with continuation + ship verbs after
38
+ # telemetry analysis showed 97% of prompts in a 30h continuous-build
39
+ # session were classified "classifier-did-not-match". Most of the
40
+ # missed prompts were short continuations ("continua", "força") or
41
+ # ship-tier verbs ("ship", "publish", "merge", "release", "deploy")
42
+ # that prolong existing flow-required activity.
43
+ ARKA_WF_VERB_PATTERN='(criar?|crie[ms]?|cria[mr]?|adicionar?|adiciona[mr]?|implementar?|implementa[mr]?|desenvolver?|desenvolve[mr]?|construir?|constru[ií]a?[mr]?|fazer?|faz[ae]?[mr]?|refactor(izar?)?|corrigir?|corrige[mr]?|consertar?|conserta[mr]?|continuar?|continua[mr]?|forçar?|força[mr]?|colocar?|coloca[mr]?|p[oô]r|melhorar?|melhora[mr]?|terminar?|termina[mr]?|acabar?|acaba[mr]?|publicar?|publica[mr]?|lançar?|lança[mr]?|create[sd]?|creating|build(s|ing)?|add(s|ed|ing)?|implement(s|ed|ing)?|develop(s|ed|ing)?|fix(es|ed|ing)?|refactor(s|ed|ing)?|make[sd]?|making|continue[sd]?|continuing|ship(s|ped|ping)?|merge[sd]?|merging|publish(es|ed|ing)?|release[sd]?|releasing|deploy(s|ed|ing)?|finish(es|ed|ing)?|improve[sd]?|improving)'
37
44
 
38
45
  # Classify: returns "true" if the prompt looks like a creation/
39
46
  # implementation/modification request, "false" otherwise.
@@ -0,0 +1,230 @@
1
+ """AI-powered persona builder (PR57 v2.74.0).
2
+
3
+ Generates a draft Persona from already-indexed content in the vector
4
+ store. The user ingests sources (YouTube transcripts, articles, PDFs)
5
+ via the knowledge dashboard, then the builder:
6
+
7
+ 1. Searches the vector store for chunks about the target person/topic.
8
+ 2. Sends those chunks to the configured LLM via the multi-backend
9
+ `LLMProvider` (Claude Code subagent / Anthropic API / Ollama local).
10
+ 3. Parses the LLM's JSON response into a `Persona` draft for the
11
+ operator to review and edit before saving.
12
+
13
+ The builder NEVER writes to the database — that's the existing
14
+ `PersonaManager.create()` path. The builder produces a draft; the
15
+ operator owns the persist decision (per the project memory's
16
+ "Generated persona presented for review" step).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import re
23
+ import uuid
24
+ from dataclasses import dataclass
25
+ from datetime import datetime, timezone
26
+
27
+ from core.knowledge.vector_store import VectorStore
28
+ from core.personas.schema import (
29
+ Persona,
30
+ PersonaBigFive,
31
+ PersonaCommunication,
32
+ PersonaDISC,
33
+ PersonaEnneagram,
34
+ )
35
+ from core.runtime.llm_provider import LLMProvider, get_llm_provider
36
+
37
+
38
+ _PERSONA_SYSTEM_PROMPT = """You build behavioural-DNA personas from quotes
39
+ and writings of real people. Read the supplied content carefully, then
40
+ emit a single JSON object that follows this exact schema. Use ONLY the
41
+ JSON keys listed — no prose, no markdown fences, no extra fields.
42
+
43
+ {
44
+ "title": "<one-line role label>",
45
+ "tagline": "<one-line essence>",
46
+ "disc": {
47
+ "primary": "D|I|S|C",
48
+ "secondary": "D|I|S|C",
49
+ "communication_style": "<one sentence>",
50
+ "under_pressure": "<one sentence>",
51
+ "motivator": "<one sentence>"
52
+ },
53
+ "enneagram": {
54
+ "type": 1-9,
55
+ "wing": 1-9,
56
+ "core_motivation": "<one sentence>",
57
+ "core_fear": "<one sentence>",
58
+ "subtype": "self-preservation|social|sexual"
59
+ },
60
+ "big_five": {
61
+ "openness": 0-100,
62
+ "conscientiousness": 0-100,
63
+ "extraversion": 0-100,
64
+ "agreeableness": 0-100,
65
+ "neuroticism": 0-100
66
+ },
67
+ "mbti": "<4-letter type>",
68
+ "mental_models": ["<model>", ...],
69
+ "expertise_domains": ["<domain>", ...],
70
+ "frameworks": ["<framework>", ...],
71
+ "key_quotes": ["<verbatim quote>", ...],
72
+ "communication": {
73
+ "tone": "<adjective>",
74
+ "vocabulary_level": "lay|specialist|expert",
75
+ "preferred_format": "<format hint>",
76
+ "avoid": ["<phrase to avoid>", ...]
77
+ }
78
+ }
79
+
80
+ If the content is insufficient to infer a field, use the closest neutral
81
+ default rather than fabricating. NEVER invent quotes — only include
82
+ verbatim text that appears in the content."""
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class BuildResult:
87
+ """Output of a persona-builder run."""
88
+
89
+ persona: Persona
90
+ chunks_used: int
91
+ provider_name: str
92
+ raw_response: str
93
+
94
+
95
+ class PersonaBuildError(RuntimeError):
96
+ """Raised when the LLM response can't be parsed into a Persona."""
97
+
98
+
99
+ class PersonaBuilder:
100
+ """Generate persona drafts from indexed content."""
101
+
102
+ MAX_CONTEXT_CHARS = 18_000
103
+
104
+ def __init__(
105
+ self,
106
+ store: VectorStore,
107
+ provider: LLMProvider | None = None,
108
+ ) -> None:
109
+ self._store = store
110
+ self._provider = provider or get_llm_provider()
111
+
112
+ def generate(
113
+ self,
114
+ name: str,
115
+ search_query: str = "",
116
+ top_k: int = 20,
117
+ source_label: str = "",
118
+ ) -> BuildResult:
119
+ """Build a persona draft for `name`.
120
+
121
+ Searches the vector store for `search_query` (defaults to the
122
+ name), truncates the joined chunks to MAX_CONTEXT_CHARS, sends
123
+ them to the configured LLM, parses the JSON response, and
124
+ returns a draft Persona plus telemetry.
125
+ """
126
+ if not name or not name.strip():
127
+ raise PersonaBuildError("name must not be empty")
128
+ query = (search_query or name).strip()
129
+ chunks = self._store.search(query, top_k=top_k)
130
+ if not chunks:
131
+ raise PersonaBuildError(
132
+ f"no indexed content matches {query!r} — "
133
+ "ingest sources first via /api/knowledge/ingest"
134
+ )
135
+ context = self._compose_context(chunks)
136
+ prompt = f"Person: {name}\n\nContent:\n{context}"
137
+ response = self._provider.complete(
138
+ prompt, system=_PERSONA_SYSTEM_PROMPT, max_tokens=3000,
139
+ )
140
+ persona = self._parse(name, source_label or name, response.text)
141
+ return BuildResult(
142
+ persona=persona,
143
+ chunks_used=len(chunks),
144
+ provider_name=self._provider.name(),
145
+ raw_response=response.text,
146
+ )
147
+
148
+ def _compose_context(self, chunks: list[dict]) -> str:
149
+ parts: list[str] = []
150
+ total = 0
151
+ for chunk in chunks:
152
+ text = chunk.get("text") or ""
153
+ if not text:
154
+ continue
155
+ if total + len(text) > self.MAX_CONTEXT_CHARS:
156
+ break
157
+ heading = chunk.get("heading") or ""
158
+ block = f"[{heading}]\n{text}" if heading else text
159
+ parts.append(block)
160
+ total += len(block)
161
+ return "\n\n---\n\n".join(parts)
162
+
163
+ def _parse(self, name: str, source_label: str, raw: str) -> Persona:
164
+ data = _extract_json_object(raw)
165
+ if data is None:
166
+ raise PersonaBuildError(
167
+ f"LLM did not return a JSON object; raw response: {raw[:200]!r}"
168
+ )
169
+ now = datetime.now(timezone.utc).isoformat()
170
+ try:
171
+ return Persona(
172
+ id=str(uuid.uuid4()),
173
+ name=name,
174
+ title=str(data.get("title") or ""),
175
+ tagline=str(data.get("tagline") or ""),
176
+ source=source_label,
177
+ disc=PersonaDISC(**(data.get("disc") or {})),
178
+ enneagram=PersonaEnneagram(**(data.get("enneagram") or {})),
179
+ big_five=PersonaBigFive(**(data.get("big_five") or {})),
180
+ mbti=str(data.get("mbti") or "INTJ"),
181
+ mental_models=_as_str_list(data.get("mental_models")),
182
+ expertise_domains=_as_str_list(data.get("expertise_domains")),
183
+ frameworks=_as_str_list(data.get("frameworks")),
184
+ key_quotes=_as_str_list(data.get("key_quotes")),
185
+ communication=PersonaCommunication(
186
+ **(data.get("communication") or {})
187
+ ),
188
+ created_at=now,
189
+ updated_at=now,
190
+ )
191
+ except (TypeError, ValueError) as exc:
192
+ raise PersonaBuildError(
193
+ f"LLM JSON does not match Persona schema: {exc}"
194
+ ) from exc
195
+
196
+
197
+ _JSON_OBJECT_RE = re.compile(r"\{.*\}", re.DOTALL)
198
+
199
+
200
+ def _extract_json_object(raw: str) -> dict | None:
201
+ """Parse the first JSON object in `raw`.
202
+
203
+ Tolerates models that wrap JSON in markdown fences or add a leading
204
+ explanation. Returns None when no parseable object is found.
205
+ """
206
+ if not raw:
207
+ return None
208
+ candidates = [raw.strip()]
209
+ fence_match = re.search(
210
+ r"```(?:json)?\s*(\{.*?\})\s*```", raw, flags=re.DOTALL,
211
+ )
212
+ if fence_match:
213
+ candidates.insert(0, fence_match.group(1))
214
+ bare_match = _JSON_OBJECT_RE.search(raw)
215
+ if bare_match:
216
+ candidates.append(bare_match.group(0))
217
+ for cand in candidates:
218
+ try:
219
+ obj = json.loads(cand)
220
+ except json.JSONDecodeError:
221
+ continue
222
+ if isinstance(obj, dict):
223
+ return obj
224
+ return None
225
+
226
+
227
+ def _as_str_list(value: object) -> list[str]:
228
+ if not isinstance(value, list):
229
+ return []
230
+ return [str(item) for item in value if isinstance(item, (str, int, float))]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arkaos",
3
- "version": "2.73.0",
3
+ "version": "2.75.0",
4
4
  "description": "The Operating System for AI Agent Teams",
5
5
  "type": "module",
6
6
  "bin": {
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "arkaos-core"
3
- version = "2.73.0"
3
+ version = "2.75.0"
4
4
  description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -687,6 +687,48 @@ def persona_delete(persona_id: str):
687
687
  return {"error": "Persona not found"}
688
688
 
689
689
 
690
+ @app.post("/api/personas/build")
691
+ def persona_build(body: dict):
692
+ """PR57 v2.74.0 — AI-powered persona draft from already-indexed content.
693
+
694
+ Body: {
695
+ "name": "<person to model>",
696
+ "search_query": "<optional vector search query>",
697
+ "top_k": <optional, default 20>,
698
+ "source_label": "<optional label, e.g. 'Alex Hormozi'>"
699
+ }
700
+
701
+ Returns: {persona: {...draft...}, chunks_used, provider_name}
702
+ The draft is NOT saved — the operator reviews and calls
703
+ POST /api/personas to persist.
704
+ """
705
+ name = (body.get("name") or "").strip()
706
+ if not name:
707
+ return {"error": "name is required"}
708
+ store = _get_vector_store()
709
+ if not store:
710
+ from core.knowledge.vector_store import VectorStore
711
+ kb_db = Path.home() / ".arkaos" / "knowledge.db"
712
+ kb_db.parent.mkdir(parents=True, exist_ok=True)
713
+ store = VectorStore(kb_db)
714
+ from core.personas.builder import PersonaBuilder, PersonaBuildError
715
+ builder = PersonaBuilder(store)
716
+ try:
717
+ result = builder.generate(
718
+ name=name,
719
+ search_query=body.get("search_query", ""),
720
+ top_k=int(body.get("top_k", 20) or 20),
721
+ source_label=body.get("source_label", ""),
722
+ )
723
+ except PersonaBuildError as exc:
724
+ return {"error": str(exc)}
725
+ return {
726
+ "persona": result.persona.model_dump(),
727
+ "chunks_used": result.chunks_used,
728
+ "provider_name": result.provider_name,
729
+ }
730
+
731
+
690
732
  # --- API Keys ---
691
733
 
692
734
  @app.get("/api/keys")