PyPI - mllang-protocol - Versions diffs - 0.1.2__tar.gz - Mend

mllang-protocol 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

mllang_protocol-0.1.2/PKG-INFO +21 -0
mllang_protocol-0.1.2/mllang/__init__.py +40 -0
mllang_protocol-0.1.2/mllang/embed.py +91 -0
mllang_protocol-0.1.2/mllang/halt.py +39 -0
mllang_protocol-0.1.2/mllang/operators.py +29 -0
mllang_protocol-0.1.2/mllang/packet.py +361 -0
mllang_protocol-0.1.2/mllang/sanitize.py +270 -0
mllang_protocol-0.1.2/mllang/slots.py +36 -0
mllang_protocol-0.1.2/mllang_protocol.egg-info/PKG-INFO +21 -0
mllang_protocol-0.1.2/mllang_protocol.egg-info/SOURCES.txt +12 -0
mllang_protocol-0.1.2/mllang_protocol.egg-info/dependency_links.txt +1 -0
mllang_protocol-0.1.2/mllang_protocol.egg-info/top_level.txt +1 -0
mllang_protocol-0.1.2/pyproject.toml +35 -0
mllang_protocol-0.1.2/setup.cfg +4 -0

mllang_protocol-0.1.2/PKG-INFO ADDED Viewed

@@ -0,0 +1,21 @@
+Metadata-Version: 2.4
+Name: mllang-protocol
+Version: 0.1.2
+Summary: MLLANG v0.1 reference parser — compact text-surface protocol for AI-agent state, lives inside markdown fenced blocks
+Author: Jake Liu
+License: Apache-2.0
+Project-URL: Homepage, https://github.com/jakeliu/mllang
+Project-URL: Repository, https://github.com/jakeliu/mllang
+Project-URL: Documentation, https://mllang.com
+Project-URL: Issues, https://github.com/jakeliu/mllang/issues
+Keywords: mllang,ai,agents,multi-agent,llm,protocol
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.9

mllang_protocol-0.1.2/mllang/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""MLLANG v0.1 reference parser — pure stdlib.
+Usage:
+    from mllang import Packet, parse, compose, extract_from_markdown
+    p = parse("V:0.1.r1; I:demo; G:{task=test}; S:{x=1}; N:@K -> classify; H:<=>; P:0.85;")
+    print(p.next_agent)   # @K
+    print(p.halt)         # <=>
+    print(p.confidence)   # 0.85
+    text = compose(p)
+    packets = extract_from_markdown(open("task.md").read())
+"""
+from .packet import Packet, parse, compose, extract_from_markdown, validate
+from .slots import SLOT_ORDER, REQUIRED_SLOTS
+from .halt import HALT_ENUM
+from .operators import OPERATORS
+from .sanitize import sanitize, sanitize_to_json, VALID_LEVELS as TELEMETRY_LEVELS
+from .embed import embed_in_markdown, extract_summary_and_packet
+__version__ = "0.1.2"
+__all__ = [
+    "Packet",
+    "parse",
+    "compose",
+    "extract_from_markdown",
+    "validate",
+    "sanitize",
+    "sanitize_to_json",
+    "TELEMETRY_LEVELS",
+    "embed_in_markdown",
+    "extract_summary_and_packet",
+    "SLOT_ORDER",
+    "REQUIRED_SLOTS",
+    "HALT_ENUM",
+    "OPERATORS",
+    "__version__",
+]

mllang_protocol-0.1.2/mllang/embed.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Markdown-embed helpers for MLLANG packets.
+Default pattern (summary mode):
+    short workflow summary + fenced ```mllang block.
+Rationale: when the consumer is an AI, the packet already carries the
+state, and the `EN:` shadow line is the human-skim summary. Long prose
+duplicates the packet content for a human reader who is rarely going to
+read it. Keep the surrounding text tight; let `EN:` do the dual-channel.
+Opt-in `mode="verbose"` keeps a long-prose channel above the packet for
+human-authored docs (PRs, issues, design notes) where readers do want
+the narrative.
+"""
+from __future__ import annotations
+from typing import Optional, Union
+from .packet import Packet, compose, parse
+VALID_MODES = {"summary", "verbose", "packet_only"}
+DEFAULT_FENCE = "mllang"
+def embed_in_markdown(
+    packet: Union[Packet, str],
+    summary: str = "",
+    prose: Optional[str] = None,
+    mode: str = "summary",
+    title: Optional[str] = None,
+    fence: str = DEFAULT_FENCE,
+) -> str:
+    """Return markdown string containing the packet in a fenced block.
+    Args:
+        packet: Packet object or MLLANG text.
+        summary: 1-3 line workflow summary used in `mode="summary"`.
+        prose: long-form human-readable text used in `mode="verbose"`.
+        mode: "summary" (default), "verbose", or "packet_only".
+        title: optional `# Heading` line.
+        fence: code-fence language tag (default "mllang").
+    Returns:
+        Markdown text with the packet embedded.
+    """
+    if mode not in VALID_MODES:
+        raise ValueError(f"mode must be one of {sorted(VALID_MODES)}; got {mode!r}")
+    if isinstance(packet, Packet):
+        packet_text = compose(packet)
+    else:
+        packet_text = packet.strip()
+    parts: list[str] = []
+    if title:
+        parts.append(f"# {title}")
+    if mode == "summary" and summary:
+        parts.append(summary.strip())
+    elif mode == "verbose" and prose:
+        parts.append(prose.strip())
+    parts.append(f"```{fence}\n{packet_text}\n```")
+    return "\n\n".join(parts) + "\n"
+def extract_summary_and_packet(md_text: str, fence: str = DEFAULT_FENCE) -> tuple[str, Optional[Packet]]:
+    """Return (text_before_first_block, parsed_packet | None).
+    Useful when a downstream agent wants both the human summary line(s)
+    and the structured packet from a summary-mode markdown file.
+    """
+    import re
+    block_re = re.compile(rf"```{re.escape(fence)}\s*\n(.*?)\n```", re.DOTALL)
+    m = block_re.search(md_text)
+    if not m:
+        return md_text.strip(), None
+    before = md_text[: m.start()].strip()
+    # Strip leading "# title" line if present, keep the rest as summary
+    if before.startswith("#"):
+        lines = before.splitlines()
+        # Drop heading lines from the top until first non-heading
+        i = 0
+        while i < len(lines) and (lines[i].startswith("#") or not lines[i].strip()):
+            i += 1
+        before = "\n".join(lines[i:]).strip()
+    try:
+        p = parse(m.group(1))
+    except ValueError:
+        p = None
+    return before, p

mllang_protocol-0.1.2/mllang/halt.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""MLLANG halt enum (9-way)."""
+import re
+# 9-way halt enum
+HALT_ENUM = {
+    "accept",
+    "repair",
+    "regen",
+    "escalate@H",
+    "test=pass",
+    "test=fail",
+    "risk!high",
+    "<=>",
+}
+# after-N-rounds is a pattern, not a literal — handled separately
+_AFTER_N_ROUNDS_RE = re.compile(r"^after-\d+-rounds$|^after-N-rounds$")
+def is_valid_halt(halt_str: str) -> bool:
+    """Check if a halt value (or pipe-separated multi-value) is valid."""
+    if not halt_str:
+        return False
+    parts = [p.strip() for p in halt_str.split("|")]
+    for part in parts:
+        if part in HALT_ENUM:
+            continue
+        if _AFTER_N_ROUNDS_RE.match(part):
+            continue
+        return False
+    return True
+def halt_categories(halt_str: str) -> list:
+    """Return list of halt categories in this halt string (after splitting by |)."""
+    if not halt_str:
+        return []
+    return [p.strip() for p in halt_str.split("|")]

mllang_protocol-0.1.2/mllang/operators.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""MLLANG operator definitions."""
+# 19 operators (full set from v0.1 spec section 3)
+OPERATORS = {
+    "=": "is / equals",
+    ":=": "assign",
+    "==": "confirmed equal (verified)",
+    "?": "unknown / open",
+    "!": "assertion / must",
+    "*": "important / pinned",
+    "~": "approximate / loose",
+    "^": "parent / prior round (e.g. ^r1)",
+    "->": "leads to / next step",
+    "=>": "implies / therefore",
+    "<=>": "agreed by all parties (halt value)",
+    "&": "and",
+    "|": "or",
+    "^!": "reserved compound (never appears bare)",
+    "#": "tag / topic",
+    "$": "tool-invocation shorthand (inside Y: slot only)",
+    "[]": "list / set",
+    "{}": "map / struct",
+    "()": "group",
+    ";": "slot separator",
+    ",": "item separator",
+}
+# Agent codes
+AGENT_CODES = {"@C", "@X", "@K", "@G", "@M", "@H", "@?"}

mllang_protocol-0.1.2/mllang/packet.py ADDED Viewed

@@ -0,0 +1,361 @@
+"""MLLANG Packet parser/composer/validator.
+Reference implementation, pure stdlib, ~200 LOC core.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from typing import Optional, Union, List, Dict
+from .slots import SLOT_ORDER, REQUIRED_SLOTS, MAP_SLOTS, LIST_SLOTS
+from .halt import is_valid_halt
+from .operators import AGENT_CODES
+# ── Regex helpers ───────────────────────────────────────────────────────
+# Match a fenced ```mllang block in markdown
+_MARKDOWN_FENCE_RE = re.compile(r"```mllang\s*\n(.*?)\n```", re.DOTALL)
+# Match a slot: KEY:value;
+# Slot value continues until next ; that's not inside brackets/braces/quotes
+_SLOT_KEY_RE = re.compile(r"(?:^|;)\s*([A-Z]):\s*", re.MULTILINE)
+# Match version: V:MAJOR.MINOR.rROUND
+_VERSION_RE = re.compile(r"^\s*(\d+)\.(\d+)\.r(\d+)\s*$")
+# Match next agent: @C -> verb
+_NEXT_AGENT_RE = re.compile(r"@([CXKGMH?])\s*->\s*(\S+)")
+# ── Packet dataclass ────────────────────────────────────────────────────
+@dataclass
+class Packet:
+    """An MLLANG v0.1 packet."""
+    # Required slots
+    version: str = ""            # V: "0.1.r1"
+    goal: Dict[str, str] = field(default_factory=dict)  # G:
+    state: Dict[str, str] = field(default_factory=dict)  # S:
+    next_agent: str = ""         # N: "@K -> verb" (full string)
+    halt: str = ""               # H: "<=>" or "test=pass | after-3-rounds"
+    # Optional slots
+    thread_id: str = ""          # I:
+    decisions: List[str] = field(default_factory=list)  # D:
+    evidence: List[str] = field(default_factory=list)   # E:
+    unknowns: List[str] = field(default_factory=list)   # U:
+    risks: List[str] = field(default_factory=list)      # R:
+    test: Dict[str, str] = field(default_factory=dict)  # T:
+    files: List[str] = field(default_factory=list)      # F:
+    tool_calls: List[str] = field(default_factory=list) # Y:
+    budget: Dict[str, str] = field(default_factory=dict)  # B:
+    confidence: float = 0.0      # P:
+    assumptions: List[str] = field(default_factory=list)  # A:
+    # Dual-channel EN: shadow line (not a slot, but tracked)
+    en_shadow: str = ""
+    @classmethod
+    def parse(cls, text: str) -> "Packet":
+        """Parse MLLANG packet text into Packet object."""
+        return parse(text)
+    def compose(self) -> str:
+        """Serialize Packet to MLLANG text."""
+        return compose(self)
+    def validate(self) -> List[str]:
+        """Return list of validation errors. Empty = valid."""
+        return validate(self)
+    @property
+    def next_agent_code(self) -> str:
+        """Return just the @X part of N: slot."""
+        m = _NEXT_AGENT_RE.search(self.next_agent)
+        return f"@{m.group(1)}" if m else ""
+    @property
+    def next_agent_verb(self) -> str:
+        """Return just the verb part of N: slot."""
+        m = _NEXT_AGENT_RE.search(self.next_agent)
+        return m.group(2) if m else ""
+    def __str__(self) -> str:
+        return self.compose()
+# ── Parser ──────────────────────────────────────────────────────────────
+def _strip_en_shadow(text: str) -> tuple[str, str]:
+    """Strip EN: shadow line from end, return (packet_text, en_text)."""
+    lines = text.strip().splitlines()
+    en = ""
+    packet_lines = []
+    for line in lines:
+        if line.startswith("EN:"):
+            en = line[len("EN:"):].strip()
+        else:
+            packet_lines.append(line)
+    return " ".join(packet_lines).strip(), en
+def _split_slots(packet_text: str) -> List[tuple[str, str]]:
+    """Split packet into [(slot_key, value), ...]. Respects nested brackets/braces/quotes."""
+    slots = []
+    i = 0
+    text = packet_text.strip()
+    while i < len(text):
+        # Skip leading whitespace + semicolons
+        while i < len(text) and text[i] in " \t;\n":
+            i += 1
+        if i >= len(text):
+            break
+        # Expect a slot key (single uppercase letter followed by :)
+        if i + 1 < len(text) and text[i].isupper() and text[i + 1] == ":":
+            key = text[i]
+            i += 2  # skip "K:"
+            # Read value until top-level ;
+            depth = 0
+            in_quote = False
+            start = i
+            while i < len(text):
+                c = text[i]
+                if c == '"' and (i == 0 or text[i - 1] != "\\"):
+                    in_quote = not in_quote
+                elif not in_quote:
+                    if c in "[{(":
+                        depth += 1
+                    elif c in "]})":
+                        depth -= 1
+                    elif c == ";" and depth == 0:
+                        break
+                i += 1
+            value = text[start:i].strip()
+            slots.append((key, value))
+        else:
+            # Unknown character — skip and continue
+            i += 1
+    return slots
+def _parse_map(value: str) -> Dict[str, str]:
+    """Parse {k=v, k=v} into dict."""
+    value = value.strip()
+    if value.startswith("{") and value.endswith("}"):
+        value = value[1:-1].strip()
+    if not value:
+        return {}
+    out = {}
+    parts = _split_top_level(value, ",")
+    for part in parts:
+        part = part.strip()
+        if "=" in part:
+            k, v = part.split("=", 1)
+            out[k.strip()] = v.strip()
+        elif part:
+            out[part] = ""
+    return out
+def _parse_list(value: str) -> List[str]:
+    """Parse [a, b, c] into list."""
+    value = value.strip()
+    if value.startswith("[") and value.endswith("]"):
+        value = value[1:-1].strip()
+    if not value:
+        return []
+    return [p.strip() for p in _split_top_level(value, ",")]
+def _split_top_level(text: str, sep: str) -> List[str]:
+    """Split by `sep` at top level (respects nested brackets/braces/quotes)."""
+    out = []
+    depth = 0
+    in_quote = False
+    cur = []
+    for i, c in enumerate(text):
+        if c == '"' and (i == 0 or text[i - 1] != "\\"):
+            in_quote = not in_quote
+            cur.append(c)
+        elif in_quote:
+            cur.append(c)
+        elif c in "[{(":
+            depth += 1
+            cur.append(c)
+        elif c in "]})":
+            depth -= 1
+            cur.append(c)
+        elif c == sep and depth == 0:
+            out.append("".join(cur))
+            cur = []
+        else:
+            cur.append(c)
+    if cur:
+        out.append("".join(cur))
+    return out
+def parse(text: str) -> Packet:
+    """Parse MLLANG packet text into Packet object.
+    Tolerates optional EN: shadow line.
+    """
+    if not text or not text.strip():
+        raise ValueError("Empty packet")
+    packet_text, en = _strip_en_shadow(text)
+    slots = _split_slots(packet_text)
+    p = Packet()
+    p.en_shadow = en
+    for key, value in slots:
+        if key == "V":
+            p.version = value.strip()
+        elif key == "I":
+            p.thread_id = value.strip()
+        elif key == "G":
+            p.goal = _parse_map(value)
+        elif key == "S":
+            p.state = _parse_map(value)
+        elif key == "D":
+            p.decisions = _parse_list(value)
+        elif key == "E":
+            p.evidence = _parse_list(value)
+        elif key == "U":
+            p.unknowns = _parse_list(value)
+        elif key == "R":
+            p.risks = _parse_list(value)
+        elif key == "T":
+            p.test = _parse_map(value)
+        elif key == "F":
+            p.files = _parse_list(value)
+        elif key == "Y":
+            p.tool_calls = _parse_list(value)
+        elif key == "B":
+            p.budget = _parse_map(value)
+        elif key == "N":
+            p.next_agent = value.strip()
+        elif key == "H":
+            p.halt = value.strip()
+        elif key == "P":
+            try:
+                p.confidence = float(value.strip())
+            except ValueError:
+                pass
+        elif key == "A":
+            p.assumptions = _parse_list(value)
+    return p
+def compose(p: Packet) -> str:
+    """Serialize Packet to MLLANG text (single line + EN: line if present)."""
+    parts = []
+    def fmt_map(m: Dict[str, str]) -> str:
+        return "{" + ", ".join(f"{k}={v}" for k, v in m.items()) + "}"
+    def fmt_list(l: List[str]) -> str:
+        return "[" + ", ".join(l) + "]"
+    if p.version:
+        parts.append(f"V:{p.version}")
+    if p.thread_id:
+        parts.append(f"I:{p.thread_id}")
+    if p.goal:
+        parts.append(f"G:{fmt_map(p.goal)}")
+    if p.state:
+        parts.append(f"S:{fmt_map(p.state)}")
+    if p.decisions:
+        parts.append(f"D:{fmt_list(p.decisions)}")
+    if p.evidence:
+        parts.append(f"E:{fmt_list(p.evidence)}")
+    if p.unknowns:
+        parts.append(f"U:{fmt_list(p.unknowns)}")
+    if p.risks:
+        parts.append(f"R:{fmt_list(p.risks)}")
+    if p.test:
+        parts.append(f"T:{fmt_map(p.test)}")
+    if p.files:
+        parts.append(f"F:{fmt_list(p.files)}")
+    if p.tool_calls:
+        parts.append(f"Y:{fmt_list(p.tool_calls)}")
+    if p.budget:
+        parts.append(f"B:{fmt_map(p.budget)}")
+    if p.next_agent:
+        parts.append(f"N:{p.next_agent}")
+    if p.halt:
+        parts.append(f"H:{p.halt}")
+    if p.confidence > 0:
+        parts.append(f"P:{p.confidence:.2f}")
+    if p.assumptions:
+        parts.append(f"A:{fmt_list(p.assumptions)}")
+    body = "; ".join(parts) + ";"
+    if p.en_shadow:
+        body += f"\nEN: {p.en_shadow}"
+    return body
+def validate(p: Packet) -> List[str]:
+    """Return list of validation errors. Empty = valid."""
+    errors = []
+    # Required slots
+    if not p.version:
+        errors.append("missing required slot V (version)")
+    elif not _VERSION_RE.match(p.version):
+        errors.append(f"V slot must be MAJOR.MINOR.rROUND format, got: {p.version!r}")
+    if not p.goal:
+        errors.append("missing required slot G (goal)")
+    if not p.state:
+        errors.append("missing required slot S (state)")
+    if not p.next_agent:
+        errors.append("missing required slot N (next agent)")
+    elif not _NEXT_AGENT_RE.search(p.next_agent):
+        errors.append(f"N slot must be '@<agent> -> <verb>' format, got: {p.next_agent!r}")
+    if not p.halt:
+        errors.append("missing required slot H (halt)")
+    elif not is_valid_halt(p.halt):
+        errors.append(f"H slot has invalid value(s): {p.halt!r}")
+    # Confidence range
+    if p.confidence and not (0.0 <= p.confidence <= 1.0):
+        errors.append(f"P slot must be 0.00-1.00, got: {p.confidence}")
+    # Agent code validation
+    if p.next_agent:
+        m = _NEXT_AGENT_RE.search(p.next_agent)
+        if m:
+            agent = f"@{m.group(1)}"
+            if agent not in AGENT_CODES:
+                errors.append(f"unknown agent code in N: {agent}")
+    return errors
+def extract_from_markdown(md_text: str) -> List[Packet]:
+    """Find all fenced ```mllang blocks in markdown, parse each into Packet."""
+    matches = _MARKDOWN_FENCE_RE.findall(md_text)
+    out = []
+    for m in matches:
+        try:
+            out.append(parse(m))
+        except ValueError:
+            continue
+    return out

mllang_protocol-0.1.2/mllang/sanitize.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""MLLANG telemetry sanitization.
+Strips IP from packets BEFORE telemetry leaves user's machine. Slot
+SHAPES are public; slot VALUES stay private.
+Public API:
+    sanitize(packet, level=None, reject_leaks=True) -> dict | None
+Levels:
+    off        — return None (nothing sent). Default.
+    shape      — slot presence, halt, confidence, next-agent code, operators.
+    structured — shape + map keys (no values) + verb names + counts.
+    full       — structured + redacted map values + assumption prefixes.
+Set via MLLANG_TELEMETRY env var or explicit level=... kwarg.
+Per-packet override always wins over env var.
+Defense in depth: by default, sanitize() refuses to ship a payload that
+still matches any leak detector (email/path/api-key/long-quote) and
+returns None instead.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import os
+import re
+from typing import Any, Dict, List, Optional, Union
+from .halt import halt_categories
+from .operators import OPERATORS
+from .packet import Packet, compose, parse
+VALID_LEVELS = {"off", "shape", "structured", "full"}
+DEFAULT_LEVEL = "off"
+# Operators excluded from telemetry: structural / too common to be a signal.
+_TRIVIAL_OPS = {";", ",", "=", "[]", "{}", "()"}
+# Leak detectors — last-line defense before payload ships.
+_EMAIL_RE = re.compile(r"\b[\w.+-]+@[\w-]+\.[\w.-]+\b")
+_PATH_RE = re.compile(r"(?:/[A-Za-z0-9._-]+){2,}")
+_API_KEY_RE = re.compile(
+    r"\b(?:sk|pk|api|key|token|secret|bearer)[-_=][A-Za-z0-9_-]{16,}",
+    re.IGNORECASE,
+)
+_LONG_QUOTE_RE = re.compile(r'"[^"]{60,}"')
+_LEAK_DETECTORS = [
+    ("email", _EMAIL_RE),
+    ("path", _PATH_RE),
+    ("api_key", _API_KEY_RE),
+    ("long_quote", _LONG_QUOTE_RE),
+]
+_TOOL_VERB_RE = re.compile(r"\$?([A-Za-z_][A-Za-z0-9_]*)")
+_AGENT_CODE_RE = re.compile(r"@[CXKGMH?]")
+def _level_from_env(explicit: Optional[str]) -> str:
+    if explicit is not None:
+        level = explicit
+    else:
+        level = os.environ.get("MLLANG_TELEMETRY", DEFAULT_LEVEL).lower()
+    if level not in VALID_LEVELS:
+        level = DEFAULT_LEVEL
+    return level
+def _hash_thread_id(thread_id: str) -> str:
+    if not thread_id:
+        return ""
+    digest = hashlib.sha256(thread_id.encode("utf-8")).hexdigest()[:12]
+    return f"<I:hash:{digest}>"
+def _detect_operators(packet_text: str) -> List[str]:
+    """Return operators present in raw text. Multi-char first to avoid shadow."""
+    found: List[str] = []
+    seen: set = set()
+    ops_sorted = sorted(OPERATORS.keys(), key=lambda o: -len(o))
+    for op in ops_sorted:
+        if op in _TRIVIAL_OPS or op in seen:
+            continue
+        if op in packet_text:
+            found.append(op)
+            seen.add(op)
+    return found
+def _redact_map(m: Dict[str, str]) -> Dict[str, str]:
+    return {k: f"<REDACTED:{len(v)}-chars>" for k, v in m.items()}
+def _verb_names(tool_calls: List[str]) -> List[str]:
+    """Verb name from $verb(args). Args NEVER returned."""
+    out: List[str] = []
+    for call in tool_calls:
+        m = _TOOL_VERB_RE.search(call)
+        if m:
+            out.append(m.group(1))
+    return out
+def _agent_code(next_agent: str) -> str:
+    m = _AGENT_CODE_RE.search(next_agent or "")
+    return m.group(0) if m else ""
+def _slots_present(p: Packet) -> List[str]:
+    present: List[str] = []
+    if p.version:
+        present.append("V")
+    if p.thread_id:
+        present.append("I")
+    if p.goal:
+        present.append("G")
+    if p.state:
+        present.append("S")
+    if p.decisions:
+        present.append("D")
+    if p.evidence:
+        present.append("E")
+    if p.unknowns:
+        present.append("U")
+    if p.risks:
+        present.append("R")
+    if p.test:
+        present.append("T")
+    if p.files:
+        present.append("F")
+    if p.tool_calls:
+        present.append("Y")
+    if p.budget:
+        present.append("B")
+    if p.next_agent:
+        present.append("N")
+    if p.halt:
+        present.append("H")
+    if p.confidence:
+        present.append("P")
+    if p.assumptions:
+        present.append("A")
+    return present
+def _walk_strings(value: Any):
+    if isinstance(value, str):
+        yield value
+    elif isinstance(value, dict):
+        for k, v in value.items():
+            yield k
+            yield from _walk_strings(v)
+    elif isinstance(value, list):
+        for item in value:
+            yield from _walk_strings(item)
+def _detect_leaks(payload: Dict[str, Any]) -> List[str]:
+    """Scan payload for residual IP markers. Returns hit list."""
+    leaks: List[str] = []
+    for key, value in payload.items():
+        if key in ("thread_hash",):
+            continue  # hash output deliberately matches no detector
+        for text in _walk_strings(value):
+            for name, rx in _LEAK_DETECTORS:
+                if rx.search(text):
+                    leaks.append(f"{key}:{name}")
+                    break
+    return leaks
+def sanitize(
+    packet: Union[Packet, str],
+    level: Optional[str] = None,
+    reject_leaks: bool = True,
+) -> Optional[Dict[str, Any]]:
+    """Sanitize an MLLANG packet for telemetry.
+    Args:
+        packet: a Packet object or raw MLLANG text.
+        level: "off" | "shape" | "structured" | "full".
+            None = read MLLANG_TELEMETRY env var (default "off").
+        reject_leaks: when True (default), return None instead of a payload
+            whose values still match a leak detector.
+    Returns:
+        dict telemetry payload, or None if level=="off" / leak detected /
+        packet unparseable.
+    """
+    resolved = _level_from_env(level)
+    if resolved == "off":
+        return None
+    if isinstance(packet, Packet):
+        p = packet
+        raw_text = compose(p)
+    else:
+        try:
+            p = parse(packet)
+        except ValueError:
+            return None
+        raw_text = packet
+    if not p.version:
+        return None
+    if reject_leaks:
+        for _name, rx in _LEAK_DETECTORS:
+            if rx.search(raw_text):
+                return None
+    payload: Dict[str, Any] = {
+        "v": p.version,
+        "slots_present": _slots_present(p),
+        "operators_used": _detect_operators(raw_text),
+        "halt": p.halt,
+        "halt_categories": halt_categories(p.halt),
+        "confidence": p.confidence,
+        "next_agent": _agent_code(p.next_agent),
+        "thread_hash": _hash_thread_id(p.thread_id),
+        "level": resolved,
+    }
+    if resolved in ("structured", "full"):
+        payload.update(
+            {
+                "goal_keys": list(p.goal.keys()),
+                "state_keys": list(p.state.keys()),
+                "test_results": {
+                    k: v for k, v in p.test.items() if v in ("pass", "fail")
+                },
+                "tool_verbs": _verb_names(p.tool_calls),
+                "decisions_count": len(p.decisions),
+                "evidence_count": len(p.evidence),
+                "unknowns_count": len(p.unknowns),
+                "risks_count": len(p.risks),
+                "files_count": len(p.files),
+                "tool_calls_count": len(p.tool_calls),
+                "assumptions_count": len(p.assumptions),
+                "en_shadow_length": len(p.en_shadow),
+            }
+        )
+    if resolved == "full":
+        payload.update(
+            {
+                "goal_values_redacted": _redact_map(p.goal),
+                "state_values_redacted": _redact_map(p.state),
+                "assumptions_prefix": [a[:20] for a in p.assumptions],
+            }
+        )
+    if reject_leaks:
+        leaks = _detect_leaks(payload)
+        if leaks:
+            return None
+    return payload
+def sanitize_to_json(
+    packet: Union[Packet, str],
+    level: Optional[str] = None,
+    reject_leaks: bool = True,
+) -> Optional[str]:
+    """sanitize() + JSON encode. None when payload would be empty / rejected."""
+    out = sanitize(packet, level=level, reject_leaks=reject_leaks)
+    return json.dumps(out, separators=(",", ":")) if out is not None else None

mllang_protocol-0.1.2/mllang/slots.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""MLLANG slot definitions."""
+# Canonical slot order (16 slots)
+SLOT_ORDER = ["V", "I", "G", "S", "D", "E", "U", "R", "T", "F", "Y", "B", "N", "H", "P", "A"]
+# Required slots (must be present for valid packet)
+REQUIRED_SLOTS = ["V", "G", "S", "N", "H"]
+# Slot meanings
+SLOT_DESCRIPTIONS = {
+    "V": "version + round (e.g. V:0.1.r1)",
+    "I": "thread-id",
+    "G": "goal-map {key=value, ...}",
+    "S": "state-map {key=value, ...}",
+    "D": "decisions [item, item, ...]",
+    "E": "evidence [path, citation, ...]",
+    "U": "unknowns [?question, ...]",
+    "R": "risks [risk, ...]",
+    "T": "test result {check=pass|fail, ...}",
+    "F": "files [path, ...]",
+    "Y": "tool-calls [$verb(args), ...]",
+    "B": "budget cap {tokens=N, time=Ns, money=N}",
+    "N": "next @agent -> verb",
+    "H": "halt (see HALT_ENUM)",
+    "P": "confidence float 0.00-1.00",
+    "A": "assumptions [item, ...]",
+}
+# Slots that contain maps (key=value)
+MAP_SLOTS = {"G", "S", "T", "B"}
+# Slots that contain lists
+LIST_SLOTS = {"D", "E", "U", "R", "F", "Y", "A"}
+# Slots with scalar values
+SCALAR_SLOTS = {"V", "I", "N", "H", "P"}

mllang_protocol-0.1.2/mllang_protocol.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,21 @@
+Metadata-Version: 2.4
+Name: mllang-protocol
+Version: 0.1.2
+Summary: MLLANG v0.1 reference parser — compact text-surface protocol for AI-agent state, lives inside markdown fenced blocks
+Author: Jake Liu
+License: Apache-2.0
+Project-URL: Homepage, https://github.com/jakeliu/mllang
+Project-URL: Repository, https://github.com/jakeliu/mllang
+Project-URL: Documentation, https://mllang.com
+Project-URL: Issues, https://github.com/jakeliu/mllang/issues
+Keywords: mllang,ai,agents,multi-agent,llm,protocol
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.9

mllang_protocol-0.1.2/mllang_protocol.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,12 @@
+pyproject.toml
+mllang/__init__.py
+mllang/embed.py
+mllang/halt.py
+mllang/operators.py
+mllang/packet.py
+mllang/sanitize.py
+mllang/slots.py
+mllang_protocol.egg-info/PKG-INFO
+mllang_protocol.egg-info/SOURCES.txt
+mllang_protocol.egg-info/dependency_links.txt
+mllang_protocol.egg-info/top_level.txt

mllang_protocol-0.1.2/mllang_protocol.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

mllang_protocol-0.1.2/mllang_protocol.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ mllang

mllang_protocol-0.1.2/pyproject.toml ADDED Viewed

@@ -0,0 +1,35 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "mllang-protocol"
+version = "0.1.2"
+description = "MLLANG v0.1 reference parser — compact text-surface protocol for AI-agent state, lives inside markdown fenced blocks"
+license = {text = "Apache-2.0"}
+requires-python = ">=3.9"
+authors = [
+    {name = "Jake Liu"}
+]
+keywords = ["mllang", "ai", "agents", "multi-agent", "llm", "protocol"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries",
+]
+dependencies = []
+[project.urls]
+Homepage = "https://github.com/jakeliu/mllang"
+Repository = "https://github.com/jakeliu/mllang"
+Documentation = "https://mllang.com"
+Issues = "https://github.com/jakeliu/mllang/issues"
+[tool.setuptools.packages.find]
+include = ["mllang*"]

mllang_protocol-0.1.2/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0