PyPI - memocode - Versions diffs - 0.2.2__tar.gz → 0.3.0__tar.gz - Mend

memocode 0.2.2tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{memocode-0.2.2 → memocode-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: memocode
-Version: 0.2.2
+Version: 0.3.0
 Summary: Personal AI coding agent with memory, tool execution, and safety controls
 Author: AssassinCHN
 Requires-Python: >=3.11
@@ -14,7 +14,6 @@ Requires-Dist: certifi>=2024.0
 Personal AI coding agent CLI with memory, tool execution, and safety controls.
-**Author:** AssassinCHN
 ## Features

{memocode-0.2.2 → memocode-0.3.0}/README.md RENAMED Viewed

@@ -2,7 +2,6 @@
 Personal AI coding agent CLI with memory, tool execution, and safety controls.
-**Author:** AssassinCHN
 ## Features

{memocode-0.2.2 → memocode-0.3.0}/control/brain.py RENAMED Viewed

@@ -68,6 +68,8 @@ def _tool_hint(name: str, args: dict) -> str:
         return dim(args.get("pattern", ""))
     if name == "web_fetch":
         return dim(args.get("url", ""))
+    if name == "memory_search":
+        return dim(args.get("query", ""))
     return ""
@@ -93,42 +95,106 @@ def _print_tool_header_simple(name: str, args: dict):
                 print(dim(f"   {extra}"))
-def _resolve_task_paths(tasks: list, work_dir: str) -> list:
-    """Legacy helper kept for any remaining callers."""
-    result = []
-    for t in tasks:
-        path_args = {
-            "file_read": ["path"], "file_write": ["path"],
-        }.get(t.tool, [])
-        if not path_args:
-            result.append(t)
-            continue
-        args = dict(t.args)
-        for arg_name in path_args:
-            p = args.get(arg_name, "")
-            if (isinstance(p, str) and p
-                    and not os.path.isabs(p)
-                    and not p.startswith("~")
-                    and "task_" not in p):   # skip {{task_N_output}} placeholders
-                args[arg_name] = os.path.join(work_dir, p)
-        result.append(Task(
-            id=t.id, tool=t.tool, args=args,
-            description=t.description, depends_on=t.depends_on,
-            reversible=t.reversible, backup_paths=t.backup_paths,
-        ))
-    return result
+_PROJECT_MEMORY_JSON = os.path.join(_DATA_DIR, "project_memory.json")
+_DEFAULT_PROJECT_MEMORY = {
+    "dimensions": {
+        "architecture": {"label": "Tech stack, modules, structure",             "stability": 40.0},
+        "decisions":    {"label": "Confirmed design decisions and rationale",   "stability": 35.0},
+        "conventions":  {"label": "Naming, interface, format conventions",      "stability": 35.0},
+        "context":      {"label": "Project background, goals, constraints",     "stability": 30.0},
+        "progress":     {"label": "Current phase, pending key tasks",           "stability": 20.0},
+        "completed":    {"label": "Completed tasks and milestones",             "stability": 25.0},
+    },
+}
+def _build_project_judge_prompt(dimensions: dict) -> str:
+    dim_lines = "\n".join(f"- {name}: {cfg.label}" for name, cfg in dimensions.items())
+    dim_names = ", ".join(f'"{name}"' for name in dimensions)
+    return f"""You are a project memory editor. Review the conversation and update the project's knowledge base.
+Project memory dimensions:
+{dim_lines}
+Existing project memory:
+{{existing}}
+New conversation:
+{{text}}
+Rules:
+- One entry per dimension. Key must equal the dimension name (e.g. key="decisions").
+- For each dimension, choose one action:
+  SKIP       — no new project-relevant information for this dimension.
+  SUPPLEMENT — new info extends existing (merge into one updated value).
+  UPDATE     — existing info is clearly superseded or corrected.
+- Special rules by dimension:
+  completed: always SUPPLEMENT (append new completions, never erase old ones).
+  progress:  UPDATE freely when the current phase or pending tasks change.
+             ONLY record what the user explicitly stated. Do NOT infer, extrapolate,
+             or add typical/expected follow-up tasks not mentioned in the conversation.
+  decisions: SUPPLEMENT by default; UPDATE only if the user makes an explicit statement reversing a prior decision
+             (e.g. "we're switching from X to Y", "decided to drop X"). Do NOT update based on the assistant
+             discussing alternatives, or the user asking hypothetical questions about other options.
+  architecture/conventions: SUPPLEMENT by default; UPDATE on explicit refactor or migration.
+- Return ONLY entries that changed. Omit SKIPped ones.
+- Exclude: user personal preferences, one-time questions, reasoning processes.
+- Never fabricate or infer information not present in the conversation.
+Return a JSON array (empty if nothing changed):
+[{{"key": "<dimension_name>", "value": "<merged or new value>", "dimension": "<dimension_name>"}}]
+Dimension name must be one of: {dim_names}
+Output JSON only:"""
+def _build_project_judge_prompt_zh(dimensions: dict) -> str:
+    dim_lines = "\n".join(f"- {name}: {cfg.label}" for name, cfg in dimensions.items())
+    dim_names = ", ".join(f'"{name}"' for name in dimensions)
+    return f"""你是一个项目记忆编辑器。审查对话内容，更新项目知识库。
+项目记忆维度：
+{dim_lines}
+当前项目记忆：
+{{existing}}
+新对话：
+{{text}}
+规则：
+- 每个维度只有一条记录，key 等于维度名（如 key="decisions"）。
+- 对每个维度，选择以下操作之一：
+  跳过 — 该维度没有新的项目相关信息。
+  补充 — 新信息扩展了已有内容（合并为一个更新后的 value）。
+  修正 — 已有信息被明确推翻或替换。
+- 各维度特殊规则：
+  completed：始终补充（追加新完成内容，不删除旧记录）。
+  progress：当前阶段或待办任务变化时，直接修正。
+            只记录用户明确说出的内容，严禁推断、外推或添加"典型后续任务"。
+            用户未提及的待办事项一律不写。
+  decisions：默认补充；仅当用户明确陈述推翻先前决策时才修正
+             （如"我们从X换成Y了"、"决定放弃X"）。不得因为
+             AI 讨论替代方案、或用户询问假设性问题而修正。
+  architecture/conventions：默认补充；仅在明确重构或迁移时修正。
+- 只返回发生变化的条目，跳过的不返回。
+- 不包括：用户个人偏好、一次性问题、推理过程。
+- 严禁捏造或推断对话中未出现的信息。
+返回JSON数组（无变化则返回空数组）：
+[{{"key": "<维度名>", "value": "<合并后或新的内容>", "dimension": "<维度名>"}}]
+维度名必须是以下之一：{dim_names}
+只输出JSON："""
 _DEFAULT_CHATMEM = {
     "llm": {},
     "dimensions": {
-        "identity":      {"label": "Who I am",         "stability": 50.0},
-        "values":        {"label": "What I believe",   "stability": 40.0},
-        "goals":         {"label": "What I want",      "stability": 30.0},
-        "preferences":   {"label": "Likes / dislikes", "stability": 25.0},
-        "capabilities":  {"label": "What I can do",    "stability": 30.0},
-        "emotional":     {"label": "How I react",      "stability": 20.0},
-        "autobiography": {"label": "Key experiences",  "stability": 35.0},
+        "communication": {"label": "Expression style, language preference, tone, detail level",                                        "stability": 45.0},
+        "autonomy":      {"label": "How much decision authority delegated to LLM: executes directly vs presents options vs validates", "stability": 40.0},
     },
 }
@@ -170,11 +236,11 @@ _DEFAULT_AGENT = {
 }
-def _init_data_dir() -> tuple[str, str]:
+def _init_data_dir() -> tuple[str, str, str]:
     """
-    Ensure ~/.mcode/ exists with agent.json and chatmem.json.
+    Ensure ~/.mcode/ exists with agent.json, chatmem.json, project_memory.json.
     Migrates from control/ if old files exist and new ones don't.
-    Returns (agent_json_path, chatmem_json_path).
+    Returns (agent_json_path, chatmem_json_path, project_memory_json_path).
     """
     import json as _json
     import shutil as _shutil
@@ -184,6 +250,7 @@ def _init_data_dir() -> tuple[str, str]:
     agent_path = os.path.join(_DATA_DIR, "agent.json")
     chatmem_path = os.path.join(_DATA_DIR, "chatmem.json")
+    project_memory_path = _PROJECT_MEMORY_JSON
     # Migrate agent.json from old location
     old_agent = os.path.join(_CONTROL, "agent.json")
@@ -205,7 +272,13 @@ def _init_data_dir() -> tuple[str, str]:
                 _json.dump(_DEFAULT_CHATMEM, f, indent=2, ensure_ascii=False)
                 f.write("\n")
-    return agent_path, chatmem_path
+    # Create project_memory.json with default project dimensions
+    if not os.path.exists(project_memory_path):
+        with open(project_memory_path, "w") as f:
+            _json.dump(_DEFAULT_PROJECT_MEMORY, f, indent=2, ensure_ascii=False)
+            f.write("\n")
+    return agent_path, chatmem_path, project_memory_path
 def _sync_active_model(agent_cfg: dict, chatmem_json_path: str | None):
@@ -227,7 +300,7 @@ def _sync_active_model(agent_cfg: dict, chatmem_json_path: str | None):
                 profile = model_cfg
                 agent_cfg["active_model"] = active
                 # Save the updated active_model back to agent.json
-                agent_path, _ = _init_data_dir()
+                agent_path, *_ = _init_data_dir()
                 with open(agent_path) as f:
                     import json as _json
                     data = _json.load(f)
@@ -257,9 +330,10 @@ class Brain:
         config_path: str | None = None,
         verbose: bool | None = None,
         project: str | None = None,
+        no_lock: bool = False,
     ):
         # Ensure ~/.mcode/ exists; migrate old files if needed
-        agent_path, chatmem_path = _init_data_dir()
+        agent_path, chatmem_path, project_memory_path = _init_data_dir()
         self._agent_cfg_path = agent_path
         self._config_path = config_path or chatmem_path
@@ -319,12 +393,25 @@ class Brain:
         else:
             self.projects.find_or_create_for_cwd()
+        # Project memory — per-project CoreMemory in the same project DB
+        self.project_memory = self._make_project_memory(
+            cfg, _api_key, project_memory_path
+        )
         # Memory layer — one DB per project, core DB shared
         self.memory = ContextManager.create(
             config=cfg,
             api_key=_api_key,
             db_path=self.projects.project_db_path,
             core_db_path=self.projects.core_db_path,
+            no_lock=no_lock,
+            extra_context_fn=(
+                self.project_memory.to_context_string if self.project_memory else None
+            ),
+            on_compress_fn=(
+                self._run_project_judge if self.project_memory else None
+            ),
+            auto_recall=False,
         )
         # Audit log
@@ -339,6 +426,7 @@ class Brain:
         self.registry.register(GLOB_TOOL)
         self.registry.register(GREP_TOOL)
         self.registry.register(WEB_FETCH_TOOL)
+        self.registry.register(self._make_memory_search_tool())
         load_external_tools(self.registry)
         self._max_tool_iter: int = agent_cfg.get("max_tool_iter", 40)
@@ -625,7 +713,8 @@ class Brain:
             return True
         if self.auto_mode:
-            whitelist = self._agent_cfg.get("auto_whitelist", None)
+            user_wl = self._agent_cfg.get("auto_whitelist", None)
+            whitelist = list(set(_safety.DEFAULT_AUTO_WHITELIST) | set(user_wl)) if user_wl else None
             result = _safety.check_auto(task, work_dir=work_dir, whitelist=whitelist)
             if result.violation:
                 print(red(f"\n🛑  AUTO-BLOCKED  {result.reason}"))
@@ -787,20 +876,111 @@ class Brain:
     # ------------------------------------------------------------------
     def switch_project(self, name: str):
-        self.memory.end_session()
+        self.end_session()
         self.projects.switch_project(name)
         self._reinit_memory()
+    def _make_memory_search_tool(self):
+        """Build a memory_search Tool that searches recent_memory for relevant past context."""
+        from tools.registry import Tool, ToolSchema
+        recent_memory = self.memory.recent_memory
+        def _search(query: str, top_k: int = 5) -> str:
+            results = recent_memory.search(query, top_k=top_k, min_score=0.0)
+            if not results:
+                return "No relevant memory found."
+            return "\n\n---\n\n".join(results)
+        return Tool(
+            schema=ToolSchema(
+                name="memory_search",
+                description=(
+                    "Search compressed summaries of past conversation sessions for relevant context. "
+                    "Call this when the user references something that may have been discussed in a previous session "
+                    "and you don't have that information in the current context. "
+                    "Use key terms from the user's question as the query."
+                ),
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Key terms to search for (e.g. '8080 端口', 'authentication design')",
+                        },
+                        "top_k": {
+                            "type": "integer",
+                            "description": "Max results to return (default 5)",
+                            "default": 5,
+                        },
+                    },
+                    "required": ["query"],
+                },
+            ),
+            fn=_search,
+        )
+    def _make_project_memory(self, cfg: "ContextConfig", api_key: str, project_memory_path: str):
+        """Instantiate a CoreMemory for project-scoped knowledge."""
+        import json as _json
+        from chatmem.memory.core_memory import CoreMemory
+        from chatmem.config import DimensionConfig
+        try:
+            with open(project_memory_path) as f:
+                pm_data = _json.load(f)
+            dims = {
+                k: DimensionConfig(label=v["label"], stability=float(v["stability"]))
+                for k, v in pm_data.get("dimensions", {}).items()
+            }
+        except Exception:
+            dims = {}
+        if not dims:
+            return None
+        compress_model = cfg.llm.compress_model or cfg.llm.model
+        _THINKING_KEYS = {"reasoning_split", "thinking", "reasoning_effort"}
+        extra_body = (
+            {k: v for k, v in cfg.llm.extra_body.items() if k not in _THINKING_KEYS}
+            if cfg.llm.extra_body else None
+        ) or None
+        provider = getattr(cfg.llm, "provider", "openai") or "openai"
+        pm = CoreMemory(
+            db_path=self.projects.project_db_path,
+            api_key=api_key,
+            model=compress_model,
+            base_url=cfg.llm.base_url,
+            dimensions=dims,
+            extra_body=extra_body,
+            provider=provider,
+            context_label="Project Memory",
+        )
+        # Override judge prompts with project-specific framing
+        pm._judge_prompt_en = _build_project_judge_prompt(dims)
+        pm._judge_prompt_zh = _build_project_judge_prompt_zh(dims)
+        return pm
     def _reinit_memory(self):
         if getattr(self, "memory", None) is not None:
             self.memory.close()
         cfg = ContextConfig.from_json(self._config_path) if self._config_path else ContextConfig()
+        api_key = cfg.llm.resolve_api_key()
+        self.project_memory = self._make_project_memory(cfg, api_key, _PROJECT_MEMORY_JSON)
         self.memory = ContextManager.create(
             config=cfg,
-            api_key=cfg.llm.resolve_api_key(),
+            api_key=api_key,
             db_path=self.projects.project_db_path,
             core_db_path=self.projects.core_db_path,
+            extra_context_fn=(
+                self.project_memory.to_context_string if self.project_memory else None
+            ),
+            on_compress_fn=(
+                self._run_project_judge if self.project_memory else None
+            ),
+            auto_recall=False,
         )
+        self.registry.register(self._make_memory_search_tool())
     def _save_agent_cfg(self):
         """Persist current _agent_cfg to agent.json."""
@@ -813,5 +993,32 @@ class Brain:
     # Session lifecycle
     # ------------------------------------------------------------------
+    def _run_project_judge(self, text: str):
+        if not self.project_memory:
+            return
+        import logging as _logging
+        logger = _logging.getLogger("chatmem")
+        try:
+            written = self.project_memory.judge_and_update(text)
+            if written:
+                for e in written:
+                    logger.info("[project] judge wrote  key=%r  dim=%s  value=%r",
+                                e["key"], e.get("dimension"), e["value"][:60])
+            else:
+                logger.info("[project] judge returned empty")
+        except Exception as exc:
+            logger.warning("[project] judge error: %s", exc)
     def end_session(self):
+        # Capture verbatim history before end_session clears it
+        verbatim = [m for m in self.memory._history if not m.get("_is_summary")]
+        conv_text = "\n".join(
+            f"{m['role']}: {m.get('content', '')}" for m in verbatim
+        ) if verbatim else ""
         self.memory.end_session()
+        if conv_text:
+            threading.Thread(
+                target=self._run_project_judge, args=(conv_text,), daemon=True
+            ).start()

memocode-0.3.0/control/chatmem/compressor.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""
+Compression strategy: LLM structured summarization.
+Captures decisions, rationale, and context — not code or config values.
+Compatible with any OpenAI-compatible API.
+"""
+import re
+from typing import Any
+from openai import OpenAI
+# ---------------------------------------------------------------------------
+# Unified LLM completion (OpenAI-compatible or Anthropic native)
+# ---------------------------------------------------------------------------
+def _llm_complete(
+    messages: list[dict],
+    max_tokens: int,
+    api_key: str,
+    model: str,
+    base_url: str | None = None,
+    extra_body: dict | None = None,
+    provider: str = "openai",
+) -> str:
+    if provider == "anthropic":
+        import anthropic
+        system = None
+        filtered = []
+        for m in messages:
+            if m["role"] == "system":
+                system = m["content"]
+            else:
+                filtered.append(m)
+        kwargs: dict = dict(model=model, max_tokens=max_tokens, messages=filtered)
+        if system:
+            kwargs["system"] = system
+        client = anthropic.Anthropic(api_key=api_key, base_url=base_url)
+        response = client.messages.create(**kwargs)
+        raw = "".join(b.text for b in response.content if b.type == "text").strip()
+    else:
+        client = OpenAI(api_key=api_key, base_url=base_url)
+        kwargs = dict(model=model, messages=messages, max_tokens=max_tokens)
+        if extra_body:
+            kwargs["extra_body"] = extra_body
+        response = client.chat.completions.create(**kwargs)
+        raw = (response.choices[0].message.content or "").strip()
+    return re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _extract_json_array(raw: str) -> list:
+    """Extract a JSON array from raw LLM output that may contain <think> reasoning blocks."""
+    import json
+    clean = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
+    m = re.search(r"\[.*\]", clean, re.DOTALL)
+    if not m:
+        return []
+    try:
+        return json.loads(m.group(0))
+    except json.JSONDecodeError:
+        return []
+def _is_chinese(text: str, threshold: float = 0.3) -> bool:
+    """
+    Return True if Chinese characters make up >= threshold of non-whitespace chars.
+    threshold=0.3 catches mixed Chinese+code conversations while excluding pure English.
+    """
+    chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
+    total_chars = len(text.replace(" ", "").replace("\n", ""))
+    return total_chars > 0 and (chinese_chars / total_chars) >= threshold
+# ---------------------------------------------------------------------------
+# Structured session context summarization
+# ---------------------------------------------------------------------------
+_SESSION_CONTEXT_PROMPT = """You are a conversation summarization assistant. Compress the following conversation into a structured summary for restoring context in subsequent conversations.
+Rules:
+- Focus on decisions, rationale, context, and outcomes — not implementation details
+- EXCLUDE: code snippets, config file contents, file paths, specific values that can be looked up locally in the project
+- INCLUDE: why decisions were made, what was agreed on, what problems were encountered
+- Use concise bullet points, no long paragraphs
+- Output only the following structure, omit sections with no content:
+  [Topics] Main topics and tasks covered
+  [Decisions/Conclusions] Confirmed plans, choices, conclusions — include rationale
+  [Progress] What was completed, current stage
+  [Pending] Unresolved issues, next steps
+  [Key Context] Background, constraints, and non-obvious facts not findable in local files
+- Omit small talk, transitional content, and anything already in project files
+- Goal: preserve knowledge that would otherwise be lost between sessions
+Conversation:
+{text}
+Structured summary:"""
+_SESSION_CONTEXT_PROMPT_ZH = """你是一个对话摘要助手。请将以下对话内容压缩为结构化摘要，用于后续对话的上下文恢复。
+规则：
+- 聚焦于决策、理由、背景和结论——不是实现细节
+- 排除：代码片段、配置文件内容、文件路径、可以在本地项目中查到的具体数值
+- 包含：决策的原因、达成的共识、遇到的问题
+- 用简洁要点，不要长段落
+- 只输出以下结构，没有内容的section直接省略：
+  【话题】涉及的主要话题和任务
+  【决策/结论】已确定的方案、选择、结论——包含理由
+  【进展】完成了什么、进行到哪一步
+  【待解决】未解决的问题、下一步计划
+  【关键背景】背景信息、约束条件、在本地文件里找不到的非显而易见的事实
+- 忽略闲聊、过渡性内容、以及项目文件里已有的内容
+- 目标：保留跨会话后否则会丢失的知识
+对话内容：
+{text}
+结构化摘要："""
+def compress_session_context(
+    text: str,
+    api_key: str,
+    model: str = "gpt-4o-mini",
+    base_url: str | None = None,
+    extra_body: dict | None = None,
+    provider: str = "openai",
+) -> str:
+    """
+    Structured summary for in-session compression events and session-end storage.
+    Preserves topics, decisions, progress, pending issues, key context.
+    Prompt language is auto-selected based on content language.
+    """
+    if not text.strip():
+        return text
+    prompt = _SESSION_CONTEXT_PROMPT_ZH if _is_chinese(text) else _SESSION_CONTEXT_PROMPT
+    return _llm_complete(
+        [{"role": "user", "content": prompt.format(text=text)}],
+        800, api_key, model, base_url, extra_body, provider,
+    )
+class Compressor:
+    """
+    Compression interface.
+    compress_for_session_context(): structured summary (~800 tokens).
+      Used for both in-session history compression and cross-session recent_memory storage.
+    Prompt language is automatically selected based on content language.
+    """
+    def __init__(
+        self,
+        api_key: str,
+        model: str = "gpt-4o-mini",
+        base_url: str | None = None,
+        extra_body: dict | None = None,
+        provider: str = "openai",
+    ):
+        self.api_key = api_key
+        self.model = model
+        self.base_url = base_url
+        self.extra_body = extra_body
+        self.provider = provider
+    def compress_for_session_context(self, text: str) -> str:
+        """
+        Structured compression for in-session history and session-end recent_memory storage.
+        Preserves decisions, rationale, and context — not code or config values.
+        """
+        return compress_session_context(text, self.api_key, self.model, self.base_url, self.extra_body, self.provider)

memocode 0.2.2__tar.gz → 0.3.0__tar.gz

memocode 0.2.2tar.gz → 0.3.0tar.gz