npm - arkaos - Versions diffs - 2.72.0 → 2.74.0 - Mend

arkaos 2.72.0 → 2.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/VERSION +1 -1
package/core/personas/__pycache__/builder.cpython-313.pyc +0 -0
package/core/personas/builder.py +230 -0
package/dashboard/app/pages/knowledge.vue +52 -3
package/package.json +1 -1
package/pyproject.toml +1 -1
package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
package/scripts/__pycache__/marketplace_export.cpython-313.pyc +0 -0
package/scripts/dashboard-api.py +84 -0

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 2.72.0
1	+ 2.74.0

package/core/personas/__pycache__/builder.cpython-313.pyc ADDED Viewed

Binary file

package/core/personas/builder.py ADDED Viewed

@@ -0,0 +1,230 @@
+"""AI-powered persona builder (PR57 v2.74.0).
+Generates a draft Persona from already-indexed content in the vector
+store. The user ingests sources (YouTube transcripts, articles, PDFs)
+via the knowledge dashboard, then the builder:
+1. Searches the vector store for chunks about the target person/topic.
+2. Sends those chunks to the configured LLM via the multi-backend
+   `LLMProvider` (Claude Code subagent / Anthropic API / Ollama local).
+3. Parses the LLM's JSON response into a `Persona` draft for the
+   operator to review and edit before saving.
+The builder NEVER writes to the database — that's the existing
+`PersonaManager.create()` path. The builder produces a draft; the
+operator owns the persist decision (per the project memory's
+"Generated persona presented for review" step).
+"""
+from __future__ import annotations
+import json
+import re
+import uuid
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from core.knowledge.vector_store import VectorStore
+from core.personas.schema import (
+    Persona,
+    PersonaBigFive,
+    PersonaCommunication,
+    PersonaDISC,
+    PersonaEnneagram,
+)
+from core.runtime.llm_provider import LLMProvider, get_llm_provider
+_PERSONA_SYSTEM_PROMPT = """You build behavioural-DNA personas from quotes
+and writings of real people. Read the supplied content carefully, then
+emit a single JSON object that follows this exact schema. Use ONLY the
+JSON keys listed — no prose, no markdown fences, no extra fields.
+{
+  "title": "<one-line role label>",
+  "tagline": "<one-line essence>",
+  "disc": {
+    "primary": "D|I|S|C",
+    "secondary": "D|I|S|C",
+    "communication_style": "<one sentence>",
+    "under_pressure": "<one sentence>",
+    "motivator": "<one sentence>"
+  },
+  "enneagram": {
+    "type": 1-9,
+    "wing": 1-9,
+    "core_motivation": "<one sentence>",
+    "core_fear": "<one sentence>",
+    "subtype": "self-preservation|social|sexual"
+  },
+  "big_five": {
+    "openness": 0-100,
+    "conscientiousness": 0-100,
+    "extraversion": 0-100,
+    "agreeableness": 0-100,
+    "neuroticism": 0-100
+  },
+  "mbti": "<4-letter type>",
+  "mental_models": ["<model>", ...],
+  "expertise_domains": ["<domain>", ...],
+  "frameworks": ["<framework>", ...],
+  "key_quotes": ["<verbatim quote>", ...],
+  "communication": {
+    "tone": "<adjective>",
+    "vocabulary_level": "lay|specialist|expert",
+    "preferred_format": "<format hint>",
+    "avoid": ["<phrase to avoid>", ...]
+  }
+}
+If the content is insufficient to infer a field, use the closest neutral
+default rather than fabricating. NEVER invent quotes — only include
+verbatim text that appears in the content."""
+@dataclass(frozen=True)
+class BuildResult:
+    """Output of a persona-builder run."""
+    persona: Persona
+    chunks_used: int
+    provider_name: str
+    raw_response: str
+class PersonaBuildError(RuntimeError):
+    """Raised when the LLM response can't be parsed into a Persona."""
+class PersonaBuilder:
+    """Generate persona drafts from indexed content."""
+    MAX_CONTEXT_CHARS = 18_000
+    def __init__(
+        self,
+        store: VectorStore,
+        provider: LLMProvider | None = None,
+    ) -> None:
+        self._store = store
+        self._provider = provider or get_llm_provider()
+    def generate(
+        self,
+        name: str,
+        search_query: str = "",
+        top_k: int = 20,
+        source_label: str = "",
+    ) -> BuildResult:
+        """Build a persona draft for `name`.
+        Searches the vector store for `search_query` (defaults to the
+        name), truncates the joined chunks to MAX_CONTEXT_CHARS, sends
+        them to the configured LLM, parses the JSON response, and
+        returns a draft Persona plus telemetry.
+        """
+        if not name or not name.strip():
+            raise PersonaBuildError("name must not be empty")
+        query = (search_query or name).strip()
+        chunks = self._store.search(query, top_k=top_k)
+        if not chunks:
+            raise PersonaBuildError(
+                f"no indexed content matches {query!r} — "
+                "ingest sources first via /api/knowledge/ingest"
+            )
+        context = self._compose_context(chunks)
+        prompt = f"Person: {name}\n\nContent:\n{context}"
+        response = self._provider.complete(
+            prompt, system=_PERSONA_SYSTEM_PROMPT, max_tokens=3000,
+        )
+        persona = self._parse(name, source_label or name, response.text)
+        return BuildResult(
+            persona=persona,
+            chunks_used=len(chunks),
+            provider_name=self._provider.name(),
+            raw_response=response.text,
+        )
+    def _compose_context(self, chunks: list[dict]) -> str:
+        parts: list[str] = []
+        total = 0
+        for chunk in chunks:
+            text = chunk.get("text") or ""
+            if not text:
+                continue
+            if total + len(text) > self.MAX_CONTEXT_CHARS:
+                break
+            heading = chunk.get("heading") or ""
+            block = f"[{heading}]\n{text}" if heading else text
+            parts.append(block)
+            total += len(block)
+        return "\n\n---\n\n".join(parts)
+    def _parse(self, name: str, source_label: str, raw: str) -> Persona:
+        data = _extract_json_object(raw)
+        if data is None:
+            raise PersonaBuildError(
+                f"LLM did not return a JSON object; raw response: {raw[:200]!r}"
+            )
+        now = datetime.now(timezone.utc).isoformat()
+        try:
+            return Persona(
+                id=str(uuid.uuid4()),
+                name=name,
+                title=str(data.get("title") or ""),
+                tagline=str(data.get("tagline") or ""),
+                source=source_label,
+                disc=PersonaDISC(**(data.get("disc") or {})),
+                enneagram=PersonaEnneagram(**(data.get("enneagram") or {})),
+                big_five=PersonaBigFive(**(data.get("big_five") or {})),
+                mbti=str(data.get("mbti") or "INTJ"),
+                mental_models=_as_str_list(data.get("mental_models")),
+                expertise_domains=_as_str_list(data.get("expertise_domains")),
+                frameworks=_as_str_list(data.get("frameworks")),
+                key_quotes=_as_str_list(data.get("key_quotes")),
+                communication=PersonaCommunication(
+                    **(data.get("communication") or {})
+                ),
+                created_at=now,
+                updated_at=now,
+            )
+        except (TypeError, ValueError) as exc:
+            raise PersonaBuildError(
+                f"LLM JSON does not match Persona schema: {exc}"
+            ) from exc
+_JSON_OBJECT_RE = re.compile(r"\{.*\}", re.DOTALL)
+def _extract_json_object(raw: str) -> dict | None:
+    """Parse the first JSON object in `raw`.
+    Tolerates models that wrap JSON in markdown fences or add a leading
+    explanation. Returns None when no parseable object is found.
+    """
+    if not raw:
+        return None
+    candidates = [raw.strip()]
+    fence_match = re.search(
+        r"```(?:json)?\s*(\{.*?\})\s*```", raw, flags=re.DOTALL,
+    )
+    if fence_match:
+        candidates.insert(0, fence_match.group(1))
+    bare_match = _JSON_OBJECT_RE.search(raw)
+    if bare_match:
+        candidates.append(bare_match.group(0))
+    for cand in candidates:
+        try:
+            obj = json.loads(cand)
+        except json.JSONDecodeError:
+            continue
+        if isinstance(obj, dict):
+            return obj
+    return None
+def _as_str_list(value: object) -> list[str]:
+    if not isinstance(value, list):
+        return []
+    return [str(item) for item in value if isinstance(item, (str, int, float))]

package/dashboard/app/pages/knowledge.vue CHANGED Viewed

@@ -16,16 +16,27 @@ const ingestError = ref<string | null>(null)
 const isDragging = ref(false)
 const pasteText = ref('')
 const pasteTitle = ref('')
+// PR56 v2.73.0 — bulk URL ingest mode. Paste a list of URLs (one per
+// line) and the backend queues one job per source.
+const bulkUrls = ref('')
-const activeInputMode = ref<'url' | 'file' | 'text' | 'research'>('url')
+const activeInputMode = ref<'url' | 'file' | 'text' | 'research' | 'bulk'>('url')
 const inputModes = [
   { label: 'URL', value: 'url' as const, icon: 'i-lucide-link' },
+  { label: 'Bulk', value: 'bulk' as const, icon: 'i-lucide-list' },
   { label: 'File', value: 'file' as const, icon: 'i-lucide-upload' },
   { label: 'Text', value: 'text' as const, icon: 'i-lucide-type' },
   { label: 'Research', value: 'research' as const, icon: 'i-lucide-search' },
 ]
+const bulkUrlCount = computed(() =>
+  bulkUrls.value
+    .split('\n')
+    .map((s) => s.trim())
+    .filter((s) => s.length > 0).length
+)
 function handleDrop(e: DragEvent) {
   isDragging.value = false
   const file = e.dataTransfer?.files?.[0]
@@ -87,6 +98,7 @@ function clearFile() {
 }
 const canIngest = computed(() => {
+  if (activeInputMode.value === 'bulk') return bulkUrlCount.value > 0
   return detectedType.value !== null
 })
@@ -178,7 +190,11 @@ onUnmounted(() => {
 })
 async function handleIngest() {
-  if (!detectedType.value && activeInputMode.value !== 'text') return
+  if (
+    !detectedType.value
+    && activeInputMode.value !== 'text'
+    && activeInputMode.value !== 'bulk'
+  ) return
   ingestError.value = null
@@ -199,6 +215,17 @@ async function handleIngest() {
         body: { source: pasteText.value.slice(0, 100), type: 'markdown', text: pasteText.value, title: pasteTitle.value },
       })
     }
+    // Bulk URL paste — one job per non-blank line, server caps at 50
+    else if (activeInputMode.value === 'bulk' && bulkUrlCount.value > 0) {
+      const sources = bulkUrls.value
+        .split('\n')
+        .map((s) => s.trim())
+        .filter((s) => s.length > 0)
+      await $fetch(`${apiBase}/api/knowledge/ingest-bulk`, {
+        method: 'POST',
+        body: { sources },
+      })
+    }
     // URL or Research — standard ingest
     else {
       const source = ingestUrl.value.trim()
@@ -215,6 +242,7 @@ async function handleIngest() {
     clearFile()
     pasteText.value = ''
     pasteTitle.value = ''
+    bulkUrls.value = ''
     // Refresh jobs table + connect WebSocket
     fetchJobs()
@@ -415,6 +443,23 @@ function formatScore(score: number): string {
               />
             </div>
+            <!-- Mode: Bulk URLs (PR56 v2.73.0) -->
+            <div v-if="activeInputMode === 'bulk'" class="space-y-3">
+              <UTextarea
+                v-model="bulkUrls"
+                placeholder="Paste one URL per line. Up to 50 sources per batch.&#10;&#10;https://www.youtube.com/watch?v=...&#10;https://example.com/article&#10;https://example.com/paper.pdf"
+                :rows="8"
+                size="lg"
+                class="w-full font-mono text-sm"
+              />
+              <div class="flex items-center justify-between text-xs text-muted">
+                <span>{{ bulkUrlCount }} source{{ bulkUrlCount === 1 ? '' : 's' }} detected</span>
+                <span v-if="bulkUrlCount > 50" class="text-red-400">
+                  Over the 50-source cap — extras will be rejected.
+                </span>
+              </div>
+            </div>
             <!-- Mode: Research -->
             <div v-if="activeInputMode === 'research'" class="space-y-3">
               <UInput
@@ -447,7 +492,11 @@ function formatScore(score: number): string {
               </div>
               <UButton
-                :label="activeInputMode === 'research' ? 'Research & Index' : 'Ingest'"
+                :label="
+                  activeInputMode === 'research' ? 'Research & Index'
+                  : activeInputMode === 'bulk' ? `Ingest ${bulkUrlCount} source${bulkUrlCount === 1 ? '' : 's'}`
+                  : 'Ingest'
+                "
                 icon="i-lucide-zap"
                 size="md"
                 :disabled="!canIngest && !(activeInputMode === 'text' && pasteText.length > 50)"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "arkaos",
-  "version": "2.72.0",
+  "version": "2.74.0",
   "description": "The Operating System for AI Agent Teams",
   "type": "module",
   "bin": {

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "arkaos-core"
-version = "2.72.0"
+version = "2.74.0"
 description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
 readme = "README.md"
 license = {text = "MIT"}

package/scripts/__pycache__/dashboard-api.cpython-313.pyc CHANGED Viewed

Binary file

package/scripts/__pycache__/marketplace_export.cpython-313.pyc CHANGED Viewed

Binary file

package/scripts/dashboard-api.py CHANGED Viewed

@@ -487,6 +487,48 @@ def knowledge_ingest(body: dict):
     return {"job_id": job.id, "source_type": source_type, "status": "queued"}
+@app.post("/api/knowledge/ingest-bulk")
+def knowledge_ingest_bulk(body: dict):
+    """PR56 v2.73.0 — bulk URL ingest.
+    Accepts ``{"sources": ["url1", "url2", ...]}`` and queues one
+    background job per source. Returns ``{"jobs": [{...}, ...]}`` so the
+    dashboard can subscribe to each via the existing /ws/tasks stream.
+    Empty / whitespace-only lines are filtered. Duplicates collapse on
+    the JobManager side (one job per unique source).
+    """
+    raw_sources = body.get("sources") or []
+    if not isinstance(raw_sources, list):
+        return {"error": "sources must be a list"}
+    cleaned = []
+    seen: set[str] = set()
+    for raw in raw_sources:
+        if not isinstance(raw, str):
+            continue
+        s = raw.strip()
+        if not s or s in seen:
+            continue
+        seen.add(s)
+        cleaned.append(s)
+    if not cleaned:
+        return {"error": "no valid sources provided"}
+    if len(cleaned) > 50:
+        return {"error": "bulk ingest is capped at 50 sources per request"}
+    jobs = []
+    for source in cleaned:
+        result = knowledge_ingest({"source": source})
+        if "error" in result:
+            jobs.append({"source": source, "error": result["error"]})
+        else:
+            jobs.append({
+                "source": source,
+                "job_id": result["job_id"],
+                "source_type": result.get("source_type"),
+                "status": result.get("status", "queued"),
+            })
+    return {"jobs": jobs, "count": len(jobs)}
 @app.get("/api/tasks/{task_id}")
 def task_detail(task_id: str):
     """Get a single task by ID. Also checks jobs."""
@@ -645,6 +687,48 @@ def persona_delete(persona_id: str):
     return {"error": "Persona not found"}
+@app.post("/api/personas/build")
+def persona_build(body: dict):
+    """PR57 v2.74.0 — AI-powered persona draft from already-indexed content.
+    Body: {
+        "name": "<person to model>",
+        "search_query": "<optional vector search query>",
+        "top_k": <optional, default 20>,
+        "source_label": "<optional label, e.g. 'Alex Hormozi'>"
+    }
+    Returns: {persona: {...draft...}, chunks_used, provider_name}
+    The draft is NOT saved — the operator reviews and calls
+    POST /api/personas to persist.
+    """
+    name = (body.get("name") or "").strip()
+    if not name:
+        return {"error": "name is required"}
+    store = _get_vector_store()
+    if not store:
+        from core.knowledge.vector_store import VectorStore
+        kb_db = Path.home() / ".arkaos" / "knowledge.db"
+        kb_db.parent.mkdir(parents=True, exist_ok=True)
+        store = VectorStore(kb_db)
+    from core.personas.builder import PersonaBuilder, PersonaBuildError
+    builder = PersonaBuilder(store)
+    try:
+        result = builder.generate(
+            name=name,
+            search_query=body.get("search_query", ""),
+            top_k=int(body.get("top_k", 20) or 20),
+            source_label=body.get("source_label", ""),
+        )
+    except PersonaBuildError as exc:
+        return {"error": str(exc)}
+    return {
+        "persona": result.persona.model_dump(),
+        "chunks_used": result.chunks_used,
+        "provider_name": result.provider_name,
+    }
 # --- API Keys ---
 @app.get("/api/keys")