PyPI - memuron - Versions diffs - 0.1.1__py3-none-any.whl - Mend

memuron 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

memuron/__init__.py +3 -0
memuron/actions/__init__.py +12 -0
memuron/actions/context.py +63 -0
memuron/actions/helpers.py +88 -0
memuron/actions/memory.py +340 -0
memuron/actions/memory_write.py +290 -0
memuron/actions/nodes.py +340 -0
memuron/actions/registry.py +5 -0
memuron/actions/runtime.py +37 -0
memuron/actions/spaces_documents.py +720 -0
memuron/actions/sync.py +155 -0
memuron/application/__init__.py +1 -0
memuron/application/api.py +206 -0
memuron/application/app.py +103 -0
memuron/application/capabilities.py +82 -0
memuron/application/cli.py +35 -0
memuron/application/config.py +176 -0
memuron/application/mcp.py +44 -0
memuron/application/mcp_oauth.py +290 -0
memuron/application/registry.py +52 -0
memuron/context.py +532 -0
memuron/documents/__init__.py +1 -0
memuron/documents/link_guardian.py +192 -0
memuron/documents/linking.py +292 -0
memuron/documents/parser.py +1152 -0
memuron/documents/storage.py +151 -0
memuron/documents/url_ingest.py +375 -0
memuron/domain/__init__.py +1 -0
memuron/domain/decoders.py +1 -0
memuron/domain/encoders.py +185 -0
memuron/domain/lifecycles.py +8 -0
memuron/domain/limits.py +6 -0
memuron/domain/representations.py +56 -0
memuron/domain/schemas.py +581 -0
memuron/domain/scope_filter.py +104 -0
memuron/graphfs/__init__.py +1 -0
memuron/graphfs/manual.py +635 -0
memuron/graphfs/projection.py +578 -0
memuron/graphfs/query.py +1782 -0
memuron/graphfs/read_model.py +574 -0
memuron/ingest/__init__.py +1 -0
memuron/ingest/guardian.py +213 -0
memuron/ingest/jobs.py +424 -0
memuron/ingest/prompts.py +147 -0
memuron/memory/__init__.py +1 -0
memuron/memory/engine.py +35 -0
memuron/memory/projections.py +452 -0
memuron/memory/recipes.py +3247 -0
memuron/persistence/__init__.py +1 -0
memuron/persistence/db_pool.py +57 -0
memuron/persistence/identity_store.py +918 -0
memuron/persistence/store_helpers.py +16 -0
memuron/search/__init__.py +1 -0
memuron/search/fulltext.py +110 -0
memuron/search/hybrid.py +284 -0
memuron/search/pgvector.py +252 -0
memuron/security/__init__.py +1 -0
memuron/security/auth.py +143 -0
memuron/security/auth_provider.py +119 -0
memuron/security/authorization.py +53 -0
memuron/security/clerk_scopes.py +94 -0
memuron/security/clerk_webhooks.py +61 -0
memuron/security/jwt_tokens.py +53 -0
memuron/security/passwords.py +38 -0
memuron/security/tenant.py +58 -0
memuron/spaces/__init__.py +1 -0
memuron/spaces/model.py +35 -0
memuron/spaces/service.py +155 -0
memuron/sync/__init__.py +25 -0
memuron/sync/folder.py +828 -0
memuron-0.1.1.dist-info/METADATA +242 -0
memuron-0.1.1.dist-info/RECORD +74 -0
memuron-0.1.1.dist-info/WHEEL +4 -0
memuron-0.1.1.dist-info/entry_points.txt +4 -0

memuron/context.py ADDED Viewed

@@ -0,0 +1,532 @@
+"""Deterministic prompt context and profile assembly."""
+from __future__ import annotations
+from collections import Counter
+from typing import Any
+from artha_engine import ArthaEngine
+from memuron.graphfs.read_model import (
+    containing_collection,
+    get_nodes,
+    list_entries,
+    node_edges,
+    space_edges,
+)
+from memuron.memory.recipes import collection_members, get_memory, search_memories
+from memuron.security.tenant import org_scope_token
+DEFAULT_CONTEXT_CHAR_BUDGET = 8_000
+TOKEN_TO_CHAR_RATIO = 4
+MAX_LINKS_PER_ITEM = 6
+MAX_BREADCRUMB_DEPTH = 12
+def _compact(text: object) -> str:
+    return " ".join(str(text or "").split())
+def _space_token(scope: list[str]) -> str:
+    return next((token for token in scope if token.startswith("space.")), "")
+def _char_budget(*, token_budget: int | None, char_budget: int | None) -> int:
+    if char_budget is not None:
+        return char_budget
+    if token_budget is not None:
+        return token_budget * TOKEN_TO_CHAR_RATIO
+    return DEFAULT_CONTEXT_CHAR_BUDGET
+def _truncate(text: str, limit: int) -> tuple[str, bool]:
+    if len(text) <= limit:
+        return text, False
+    if limit <= 16:
+        return text[:limit], True
+    return text[: limit - 15].rstrip() + "\n[truncated]", True
+def _node_space_token(memory: dict[str, Any], preferred_space_token: str | None) -> str | None:
+    scope = [str(token) for token in memory.get("scope") or []]
+    if preferred_space_token and preferred_space_token in scope:
+        return preferred_space_token
+    return _space_token(scope) or preferred_space_token
+def _collection_breadcrumbs(
+    engine: ArthaEngine,
+    *,
+    node_id: str,
+    org_token: str,
+    space_token: str | None,
+) -> list[dict[str, str]]:
+    if not space_token:
+        return []
+    breadcrumbs: list[dict[str, str]] = []
+    current = node_id
+    seen = {node_id}
+    for _depth in range(MAX_BREADCRUMB_DEPTH):
+        parent_id = containing_collection(
+            engine.store,
+            current,
+            org_token=org_token,
+            space_token=space_token,
+        )
+        if not parent_id or parent_id in seen:
+            break
+        seen.add(parent_id)
+        nodes = get_nodes(
+            engine.store,
+            [parent_id],
+            org_token=org_token,
+            space_token=space_token,
+        )
+        display = str(nodes[0].get("display") or parent_id) if nodes else parent_id
+        breadcrumbs.append(
+            {
+                "id": parent_id,
+                "display": display,
+                "path": f"/spaces/{space_token}/collections/{parent_id}",
+            }
+        )
+        current = parent_id
+    breadcrumbs.reverse()
+    return breadcrumbs
+def _semantic_links(
+    engine: ArthaEngine,
+    *,
+    node_id: str,
+    org_token: str,
+    space_token: str | None,
+) -> list[dict[str, str]]:
+    if not space_token:
+        return []
+    output: list[dict[str, str]] = []
+    for edge in node_edges(
+        engine.store,
+        {node_id},
+        org_token=org_token,
+        space_token=space_token,
+    ):
+        if str(edge.get("edge_type")) == "placement":
+            continue
+        source_id = str(edge.get("source_id"))
+        target_id = str(edge.get("target_id"))
+        output.append(
+            {
+                "id": str(edge.get("edge_id")),
+                "source_id": source_id,
+                "target_id": target_id,
+                "direction": "outbound" if source_id == node_id else "inbound",
+                "other_id": target_id if source_id == node_id else source_id,
+                "description": str(edge.get("description") or ""),
+            }
+        )
+        if len(output) >= MAX_LINKS_PER_ITEM:
+            break
+    return output
+def _memory_item(
+    engine: ArthaEngine,
+    *,
+    result: dict[str, Any],
+    citation_id: str,
+    org_token: str,
+    preferred_space_token: str | None,
+    include_breadcrumbs: bool,
+    include_links: bool,
+) -> dict[str, Any] | None:
+    memory_id = str(result.get("id") or "")
+    if not memory_id:
+        return None
+    try:
+        memory = get_memory(engine, memory_id)
+    except KeyError:
+        return None
+    space_token = _node_space_token(memory, preferred_space_token)
+    breadcrumbs = (
+        _collection_breadcrumbs(
+            engine,
+            node_id=memory_id,
+            org_token=org_token,
+            space_token=space_token,
+        )
+        if include_breadcrumbs
+        else []
+    )
+    links = (
+        _semantic_links(
+            engine,
+            node_id=memory_id,
+            org_token=org_token,
+            space_token=space_token,
+        )
+        if include_links
+        else []
+    )
+    return {
+        "kind": "memory",
+        "citation_id": citation_id,
+        "memory_id": memory_id,
+        "score": float(result.get("semantic_score") or 0.0),
+        "type": memory.get("type") or memory.get("node_type") or "text",
+        "content": str(memory.get("content") or ""),
+        "scope": list(memory.get("scope") or []),
+        "metadata": {
+            "node_type": memory.get("node_type") or "text",
+            "encoding": memory.get("encoding") or "memory",
+            "payload": memory.get("payload") or {},
+            "timestamp": memory.get("timestamp"),
+        },
+        "breadcrumbs": breadcrumbs,
+        "links": links,
+    }
+def _edge_item(
+    *,
+    result: dict[str, Any],
+    citation_id: str,
+) -> dict[str, Any]:
+    source = result.get("source") if isinstance(result.get("source"), dict) else {}
+    target = result.get("target") if isinstance(result.get("target"), dict) else {}
+    return {
+        "kind": "relationship",
+        "citation_id": citation_id,
+        "link_id": str(result.get("id") or ""),
+        "score": float(result.get("semantic_score") or 0.0),
+        "description": str(result.get("description") or ""),
+        "source": {
+            "id": str(source.get("id") or ""),
+            "content": str(source.get("content") or ""),
+            "scope": list(source.get("scope") or []),
+        },
+        "target": {
+            "id": str(target.get("id") or ""),
+            "content": str(target.get("content") or ""),
+            "scope": list(target.get("scope") or []),
+        },
+    }
+def _citation(item: dict[str, Any]) -> dict[str, Any]:
+    if item["kind"] == "relationship":
+        return {
+            "citation_id": item["citation_id"],
+            "kind": "relationship",
+            "link_id": item["link_id"],
+            "score": item["score"],
+            "source_id": item["source"]["id"],
+            "target_id": item["target"]["id"],
+            "description": item["description"],
+        }
+    return {
+        "citation_id": item["citation_id"],
+        "kind": "memory",
+        "memory_id": item["memory_id"],
+        "score": item["score"],
+        "scope": item["scope"],
+        "breadcrumbs": item["breadcrumbs"],
+        "links": item["links"],
+        "metadata": item["metadata"],
+    }
+def _item_block(item: dict[str, Any]) -> str:
+    if item["kind"] == "relationship":
+        source_preview = _compact(item["source"]["content"])[:220]
+        target_preview = _compact(item["target"]["content"])[:220]
+        heading = (
+            f"[{item['citation_id']}] Relationship {item['link_id']} "
+            f"score={item['score']:.4f}"
+        )
+        return "\n".join(
+            [
+                heading,
+                f"Description: {_compact(item['description'])}",
+                f"Source {item['source']['id']}: {source_preview}",
+                f"Target {item['target']['id']}: {target_preview}",
+            ]
+        )
+    lines = [
+        f"[{item['citation_id']}] Memory {item['memory_id']} score={item['score']:.4f}",
+        f"Type: {item['type']}",
+    ]
+    if item["breadcrumbs"]:
+        path = " > ".join(str(part["display"]) for part in item["breadcrumbs"])
+        lines.append(f"Collection path: {path}")
+    if item["links"]:
+        for link in item["links"]:
+            description = _compact(link["description"])
+            lines.append(
+                f"Link {link['direction']} {link['other_id']}: {description}"
+            )
+    lines.append("Content:")
+    lines.append(str(item["content"]))
+    return "\n".join(lines)
+def _bounded_prompt(
+    *,
+    query: str,
+    items: list[dict[str, Any]],
+    char_budget: int,
+) -> tuple[str, dict[str, Any], list[dict[str, Any]]]:
+    header = (
+        "Memuron context block\n"
+        f"Query: {query}\n"
+        "Use bracketed citation IDs when citing this context.\n"
+    )
+    if not items:
+        text, header_truncated = _truncate(
+            header + "\nNo matching Memuron context found.",
+            char_budget,
+        )
+        return (
+            text,
+            {
+                "is_truncated": header_truncated,
+                "omitted_items": 0,
+                "truncated_items": [],
+            },
+            [],
+        )
+    prompt = header.rstrip()
+    included: list[dict[str, Any]] = []
+    truncated_items: list[str] = []
+    omitted = 0
+    for index, item in enumerate(items):
+        block = "\n\n" + _item_block(item)
+        remaining = char_budget - len(prompt)
+        if remaining <= 0:
+            omitted = len(items) - index
+            break
+        if len(block) > remaining:
+            partial, _did_truncate = _truncate(block, remaining)
+            prompt += partial
+            truncated_items.append(str(item["citation_id"]))
+            included_item = dict(item)
+            included_item["prompt_truncated"] = True
+            included.append(included_item)
+            omitted = len(items) - index - 1
+            break
+        included_item = dict(item)
+        included_item["prompt_truncated"] = False
+        included.append(included_item)
+        prompt += block
+    return (
+        prompt[:char_budget],
+        {
+            "is_truncated": bool(omitted or truncated_items),
+            "omitted_items": omitted,
+            "truncated_items": truncated_items,
+        },
+        included,
+    )
+def assemble_context(
+    engine: ArthaEngine,
+    *,
+    query: str,
+    k: int,
+    scope: list[str] | None,
+    org_id: str,
+    preferred_space_token: str | None = None,
+    token_budget: int | None = None,
+    char_budget: int | None = None,
+    include_links: bool = True,
+    include_breadcrumbs: bool = True,
+) -> dict[str, Any]:
+    search_results, resolved_scope = search_memories(
+        engine,
+        query,
+        k=k,
+        scope=scope,
+        include_links=include_links,
+    )
+    org_token = org_scope_token(org_id)
+    items: list[dict[str, Any]] = []
+    memory_index = 1
+    link_index = 1
+    for result in search_results:
+        if result.get("type") == "relationship_edge":
+            items.append(_edge_item(result=result, citation_id=f"L{link_index}"))
+            link_index += 1
+            continue
+        item = _memory_item(
+            engine,
+            result=result,
+            citation_id=f"M{memory_index}",
+            org_token=org_token,
+            preferred_space_token=preferred_space_token,
+            include_breadcrumbs=include_breadcrumbs,
+            include_links=include_links,
+        )
+        if item is None:
+            continue
+        items.append(item)
+        memory_index += 1
+    budget_chars = _char_budget(token_budget=token_budget, char_budget=char_budget)
+    prompt_text, truncation, included_items = _bounded_prompt(
+        query=query,
+        items=items,
+        char_budget=budget_chars,
+    )
+    return {
+        "query": query,
+        "count": len(included_items),
+        "scope": resolved_scope,
+        "budget": {
+            "char_budget": budget_chars,
+            "token_budget": token_budget,
+            "used_chars": len(prompt_text),
+            "token_estimate": max(
+                1,
+                (len(prompt_text) + TOKEN_TO_CHAR_RATIO - 1) // TOKEN_TO_CHAR_RATIO,
+            ),
+        },
+        "prompt_text": prompt_text,
+        "citations": [_citation(item) for item in included_items],
+        "items": included_items,
+        "truncated": truncation,
+    }
+def space_profile(
+    engine: ArthaEngine,
+    *,
+    space: dict[str, Any],
+    org_id: str,
+    limit: int = 1000,
+) -> dict[str, Any]:
+    token = str(space["token"])
+    org_token = org_scope_token(org_id)
+    entries = list_entries(
+        engine.store,
+        org_token=org_token,
+        space_token=token,
+        limit=limit,
+    )
+    counts = Counter(
+        str(item.get("type") or item.get("node_type") or "text")
+        for item in entries
+    )
+    collections = [
+        {
+            "id": item["id"],
+            "display": item.get("display"),
+            "preview": item.get("preview"),
+            "path": f"/spaces/{token}/collections/{item['id']}",
+        }
+        for item in entries
+        if item.get("node_type") == "collection"
+    ][:10]
+    previews = [
+        {
+            "id": item["id"],
+            "type": item.get("node_type"),
+            "display": item.get("display"),
+            "preview": item.get("preview"),
+        }
+        for item in entries
+        if item.get("node_type") != "collection"
+    ][:10]
+    edges = space_edges(
+        engine.store,
+        org_token=org_token,
+        space_token=token,
+        include_placements=False,
+    )
+    profile = {
+        "space_token": token,
+        "name": space.get("name"),
+        "description": space.get("description") or "",
+        "guardian_prompt": space.get("guardian_prompt") or "",
+        "counts": {
+            "nodes": len(entries),
+            "semantic_links": len(edges),
+            "by_type": dict(sorted(counts.items())),
+        },
+        "collections": collections,
+        "previews": previews,
+    }
+    prompt_lines = [
+        f"Space profile: {space.get('name')} ({token})",
+        f"Description: {space.get('description') or '(none)'}",
+        f"Nodes: {len(entries)}; semantic links: {len(edges)}",
+    ]
+    if collections:
+        prompt_lines.append("Collections:")
+        prompt_lines.extend(f"- {item['display']} ({item['id']})" for item in collections)
+    if previews:
+        prompt_lines.append("Representative memories:")
+        prompt_lines.extend(
+            f"- [{item['id']}] {item['preview']}" for item in previews
+        )
+    return {"profile": profile, "prompt_text": "\n".join(prompt_lines)}
+def collection_profile(
+    engine: ArthaEngine,
+    *,
+    collection_id: str,
+    org_id: str,
+) -> dict[str, Any]:
+    collection = get_memory(engine, collection_id)
+    if collection.get("node_type") != "collection":
+        raise ValueError("collection_id must refer to a collection node")
+    org_token = org_scope_token(org_id)
+    token = _space_token([str(token) for token in collection.get("scope") or []])
+    breadcrumbs = _collection_breadcrumbs(
+        engine,
+        node_id=collection_id,
+        org_token=org_token,
+        space_token=token,
+    )
+    members = collection_members(engine, collection_id)
+    counts = Counter(
+        str(member["node"].get("node_type") or "text") for member in members
+    )
+    member_previews = [
+        {
+            "id": member["node"]["id"],
+            "type": member["node"].get("node_type"),
+            "name": member["placement"].get("name"),
+            "preview": member["node"].get("preview"),
+        }
+        for member in members[:20]
+    ]
+    profile = {
+        "id": collection_id,
+        "name": (collection.get("payload") or {}).get("name")
+        or collection.get("preview"),
+        "summary": collection.get("content") or "",
+        "scope": collection.get("scope") or [],
+        "breadcrumbs": breadcrumbs,
+        "counts": {"members": len(members), "by_type": dict(sorted(counts.items()))},
+        "members": member_previews,
+    }
+    prompt_lines = [
+        f"Collection profile: {profile['name']} ({collection_id})",
+        f"Summary: {_compact(profile['summary']) or '(none)'}",
+        f"Members: {len(members)}",
+    ]
+    if breadcrumbs:
+        prompt_lines.append(
+            "Parent path: " + " > ".join(part["display"] for part in breadcrumbs)
+        )
+    if member_previews:
+        prompt_lines.append("Direct members:")
+        prompt_lines.extend(
+            f"- [{item['id']}] {item['name']}: {item['preview']}"
+            for item in member_previews
+        )
+    return {"profile": profile, "prompt_text": "\n".join(prompt_lines)}

memuron/documents/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Document parsing, source-object storage, and document-link planning."""