npm - @researai/deepscientist - Versions diffs - 1.5.9 → 1.5.12 - Mend

@researai/deepscientist 1.5.9 → 1.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

package/src/deepscientist/quest/stage_views.py CHANGED Viewed

@@ -153,8 +153,68 @@ class QuestStageViewBuilder:
                 return candidate
         return self.quest_root
+    def _infer_stage_from_branch_name(self) -> str | None:
+        normalized = str(self.branch_name or "").strip().lower()
+        if not normalized:
+            return None
+        if normalized.startswith("analysis/"):
+            return "analysis"
+        if normalized.startswith("run/"):
+            return "experiment"
+        if normalized.startswith("idea/"):
+            return "idea"
+        if normalized.startswith("paper/") or normalized.startswith("write/"):
+            return "paper"
+        if normalized.startswith("baseline/"):
+            return "baseline"
+        return None
+    def _has_paper_state(self) -> bool:
+        paper_root = self._paper_root()
+        return bool(
+            self._paper_candidates()
+            or (paper_root / "selected_outline.json").exists()
+            or (paper_root / "draft.md").exists()
+            or self._paper_bundle_manifest()
+        )
+    def _resolve_effective_stage_key(self) -> str:
+        normalized = normalize_stage_key(self.stage_key)
+        if normalized in {"baseline", "idea", "experiment", "analysis", "paper"}:
+            return normalized
+        if normalized != "general":
+            return normalized
+        inferred = self._infer_stage_from_branch_name()
+        if inferred:
+            return inferred
+        if self._analysis_stage_items(None):
+            return "analysis"
+        if self._experiment_stage_items():
+            return "experiment"
+        if self._idea_stage_items():
+            return "idea"
+        if self._has_paper_state():
+            return "paper"
+        if self._baseline_stage_items():
+            return "baseline"
+        return normalized
+    @staticmethod
+    def _artifact_detail(item: dict[str, Any] | None, payload: dict[str, Any]) -> dict[str, Any] | None:
+        if not isinstance(payload, dict) or not payload:
+            return None
+        record = dict(item or {})
+        return {
+            "artifact_id": payload.get("artifact_id") or payload.get("id"),
+            "artifact_kind": payload.get("kind"),
+            "artifact_path": record.get("path"),
+            "payload": payload,
+        }
     def build(self) -> dict[str, Any]:
         selection_type = str(self.selection.get("selection_type") or "").strip()
+        self.stage_key = self._resolve_effective_stage_key()
         if selection_type == "branch_node" and self.stage_key not in {"experiment", "analysis", "paper"}:
             return self._build_branch()
         if self.stage_key == "baseline":
@@ -855,7 +915,8 @@ class QuestStageViewBuilder:
                     "draft_markdown": draft_markdown,
                     "literature_files": literature_files,
                     "decision_reason": payload.get("reason"),
-                }
+                },
+                "latest_artifact": self._artifact_detail(latest, payload),
             },
             lineage_intent=lineage_intent,
             idea_draft_path=draft_md_rel_path,
@@ -1084,7 +1145,8 @@ class QuestStageViewBuilder:
                     else None,
                     "analysis_summary_path": self._relative_path_or_raw(analysis_summary_path),
                     "analysis_summary_markdown": analysis_summary_markdown,
-                }
+                },
+                "latest_artifact": self._artifact_detail(latest_experiment_item or latest_idea_item, latest_experiment_payload or latest_idea_payload),
             },
             lineage_intent=lineage_intent,
             idea_draft_path=idea_draft_rel_path,
@@ -1198,7 +1260,8 @@ class QuestStageViewBuilder:
                     "trace_summary": trace_summary,
                     "trace_markdown": trace_markdown,
                     "trace_actions": self._recent_trace_actions(),
-                }
+                },
+                "latest_artifact": self._artifact_detail(latest, payload),
             },
         )
@@ -1388,10 +1451,12 @@ class QuestStageViewBuilder:
                     "todo_manifest_markdown": self._markdown_body_for_path(manifest.get("todo_manifest_path")),
                     "summary_path": self._relative_path_or_raw(summary_path) if summary_path else None,
                     "summary_markdown": summary_markdown,
+                    "manifest_payload": manifest,
                     "trace_summary": trace_summary,
                     "trace_markdown": self._trace_markdown(),
                     "trace_actions": self._recent_trace_actions(),
-                }
+                },
+                "latest_artifact": self._artifact_detail(latest, latest_payload),
             },
         )
@@ -1532,6 +1597,7 @@ class QuestStageViewBuilder:
                         "latex_root_path": latex_root_rel,
                         "main_tex_path": main_tex_rel,
                     },
-                }
+                },
+                "latest_artifact": self._artifact_detail(paper_items[-1] if paper_items else None, self._payload(paper_items[-1] if paper_items else {})),
             },
         )

package/src/deepscientist/runners/codex.py CHANGED Viewed

@@ -19,6 +19,11 @@ from ..shared import append_jsonl, ensure_dir, generate_id, read_yaml, resolve_r
 from ..web_search import extract_web_search_payload
 from .base import RunRequest, RunResult
+_TOOL_EVENT_ARGS_TEXT_LIMIT = 8_000
+_TOOL_EVENT_OUTPUT_TEXT_LIMIT = 16_000
+_MAX_QUEST_EVENT_JSON_BYTES = 2_000_000
+_OVERSIZED_EVENT_PREVIEW_TEXT_LIMIT = 12_000
 def _compact_text(value: object, *, limit: int = 1200) -> str:
     if value is None:
@@ -35,6 +40,96 @@ def _compact_text(value: object, *, limit: int = 1200) -> str:
     return text[: limit - 1].rstrip() + "…"
+def _truncate_leaf_text(text: str, *, limit: int) -> str:
+    if limit <= 0 or len(text) <= limit:
+        return text
+    head = max(int(limit * 0.7), 256)
+    tail = max(limit - head - 64, 128)
+    omitted = max(len(text) - head - tail, 0)
+    return f"{text[:head].rstrip()}\n...[truncated {omitted} chars]...\n{text[-tail:].lstrip()}"
+def _truncate_structured_value(value: object, *, string_limit: int) -> object:
+    if isinstance(value, str):
+        return _truncate_leaf_text(value.strip(), limit=string_limit)
+    if isinstance(value, list):
+        return [_truncate_structured_value(item, string_limit=string_limit) for item in value[:200]]
+    if isinstance(value, dict):
+        truncated: dict[object, object] = {}
+        for index, (key, item) in enumerate(value.items()):
+            if index >= 200:
+                truncated["__truncated__"] = f"truncated remaining {len(value) - 200} item(s)"
+                break
+            truncated[key] = _truncate_structured_value(item, string_limit=string_limit)
+        return truncated
+    return value
+def _structured_text(value: object, *, limit: int | None = None) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return _truncate_leaf_text(value.strip(), limit=limit or len(value))
+    normalized_value = _truncate_structured_value(value, string_limit=max(limit or _TOOL_EVENT_OUTPUT_TEXT_LIMIT, 512))
+    try:
+        return json.dumps(normalized_value, ensure_ascii=False, indent=2)
+    except TypeError:
+        return _truncate_leaf_text(str(value), limit=limit or _TOOL_EVENT_OUTPUT_TEXT_LIMIT)
+def _encoded_json_size(value: object) -> int:
+    try:
+        return len(json.dumps(value, ensure_ascii=False).encode("utf-8"))
+    except Exception:
+        return len(str(value).encode("utf-8", errors="ignore"))
+def _compact_tool_event_payload(payload: dict[str, Any]) -> dict[str, Any]:
+    if _encoded_json_size(payload) <= _MAX_QUEST_EVENT_JSON_BYTES:
+        return payload
+    compacted = dict(payload)
+    output_text = str(compacted.get("output") or "")
+    if output_text:
+        compacted["output_bytes"] = len(output_text.encode("utf-8", errors="ignore"))
+        compacted["output"] = _truncate_leaf_text(
+            output_text,
+            limit=_OVERSIZED_EVENT_PREVIEW_TEXT_LIMIT,
+        )
+        compacted["output_truncated"] = True
+    args_text = str(compacted.get("args") or "")
+    if args_text and _encoded_json_size(compacted) > _MAX_QUEST_EVENT_JSON_BYTES:
+        compacted["args"] = _truncate_leaf_text(args_text, limit=4_000)
+        compacted["args_truncated"] = True
+    if _encoded_json_size(compacted) > _MAX_QUEST_EVENT_JSON_BYTES:
+        metadata = compacted.get("metadata")
+        if isinstance(metadata, dict):
+            allowed_keys = {
+                "mcp_server",
+                "mcp_tool",
+                "bash_id",
+                "status",
+                "command",
+                "workdir",
+                "cwd",
+                "started_at",
+                "finished_at",
+                "exit_code",
+                "stop_reason",
+                "log_path",
+            }
+            compacted["metadata"] = {
+                key: metadata.get(key)
+                for key in allowed_keys
+                if key in metadata
+            }
+            compacted["metadata_truncated"] = True
+    if _encoded_json_size(compacted) > _MAX_QUEST_EVENT_JSON_BYTES:
+        compacted["output"] = _compact_text(compacted.get("output"), limit=2_000)
+        compacted["output_truncated"] = True
+    return compacted
 def _iter_event_texts(event: dict[str, Any]) -> list[str]:
     texts: list[str] = []
     for key in ("text", "content", "message"):
@@ -184,7 +279,24 @@ def _tool_name(event: dict[str, Any], item: dict[str, Any]) -> str:
     return "tool"
+def _is_bash_exec_item(event: dict[str, Any], item: dict[str, Any]) -> bool:
+    server = str(item.get("server") or event.get("server") or "").strip()
+    tool = str(item.get("tool") or event.get("tool") or "").strip()
+    return server == "bash_exec" and tool == "bash_exec"
 def _tool_args(event: dict[str, Any], item: dict[str, Any]) -> str:
+    if _is_bash_exec_item(event, item):
+        for value in (
+            item.get("arguments"),
+            event.get("arguments"),
+            item.get("input"),
+            event.get("input"),
+        ):
+            text = _structured_text(value, limit=_TOOL_EVENT_ARGS_TEXT_LIMIT)
+            if text:
+                return text
+        return ""
     for value in (
         item.get("command"),
         item.get("query"),
@@ -204,6 +316,21 @@ def _tool_args(event: dict[str, Any], item: dict[str, Any]) -> str:
 def _tool_output(event: dict[str, Any], item: dict[str, Any]) -> str:
+    if _is_bash_exec_item(event, item):
+        for value in (
+            item.get("result"),
+            item.get("output"),
+            item.get("content"),
+            event.get("result"),
+            event.get("output"),
+            event.get("content"),
+            item.get("aggregated_output"),
+            event.get("aggregated_output"),
+        ):
+            text = _structured_text(value, limit=_TOOL_EVENT_OUTPUT_TEXT_LIMIT)
+            if text:
+                return text
+        return ""
     for value in (
         item.get("aggregated_output"),
         item.get("changes"),
@@ -253,10 +380,12 @@ def _mcp_tool_metadata(
             metadata["workdir"] = arguments.get("workdir")
         if isinstance(arguments.get("mode"), str):
             metadata["mode"] = arguments.get("mode")
-        if isinstance(arguments.get("timeout_seconds"), int):
+        if arguments.get("timeout_seconds") is not None:
             metadata["timeout_seconds"] = arguments.get("timeout_seconds")
         if "comment" in arguments:
             metadata["comment"] = arguments.get("comment")
+        if server == "bash_exec" and tool == "bash_exec" and isinstance(arguments.get("id"), str):
+            metadata["bash_id"] = arguments.get("id")
     metadata["session_id"] = f"quest:{quest_id}"
     metadata["agent_id"] = "pi"
     metadata["agent_instance_id"] = run_id
@@ -266,12 +395,18 @@ def _mcp_tool_metadata(
         for key in (
             "bash_id",
             "status",
+            "command",
+            "workdir",
+            "cwd",
+            "kind",
+            "comment",
             "started_at",
             "finished_at",
             "exit_code",
             "stop_reason",
             "last_progress",
             "log_path",
+            "watchdog_after_seconds",
         ):
             if key in result_payload:
                 metadata[key] = result_payload.get(key)
@@ -310,7 +445,7 @@ def _tool_event(
                 "raw_event_type": event_type,
                 "created_at": created_at,
             }
-        return {
+        return _compact_tool_event_payload({
             "event_id": generate_id("evt"),
             "type": "runner.tool_result",
             "quest_id": quest_id,
@@ -324,7 +459,7 @@ def _tool_event(
             "output": _tool_output(event, item),
             "raw_event_type": event_type,
             "created_at": created_at,
-        }
+        })
     if item_type == "web_search":
         tool_call_id = _tool_call_id(event, item)
@@ -348,7 +483,7 @@ def _tool_event(
                 "raw_event_type": event_type,
                 "created_at": created_at,
             }
-        return {
+        return _compact_tool_event_payload({
             "event_id": generate_id("evt"),
             "type": "runner.tool_result",
             "quest_id": quest_id,
@@ -363,13 +498,13 @@ def _tool_event(
             "metadata": metadata,
             "raw_event_type": event_type,
             "created_at": created_at,
-        }
+        })
     if item_type == "file_change":
         tool_call_id = _tool_call_id(event, item)
         tool_name = "file_change"
         known_tool_names[tool_call_id] = tool_name
-        return {
+        return _compact_tool_event_payload({
             "event_id": generate_id("evt"),
             "type": "runner.tool_result",
             "quest_id": quest_id,
@@ -382,7 +517,7 @@ def _tool_event(
             "output": _tool_output(event, item),
             "raw_event_type": event_type,
             "created_at": created_at,
-        }
+        })
     if item_type == "mcp_tool_call":
         tool_call_id = _tool_call_id(event, item)
@@ -415,7 +550,7 @@ def _tool_event(
                 "raw_event_type": event_type,
                 "created_at": created_at,
             }
-        return {
+        return _compact_tool_event_payload({
             "event_id": generate_id("evt"),
             "type": "runner.tool_result",
             "quest_id": quest_id,
@@ -432,7 +567,7 @@ def _tool_event(
             "metadata": metadata,
             "raw_event_type": event_type,
             "created_at": created_at,
-        }
+        })
     if item_type in {"function_call", "custom_tool_call", "tool_call"} or "function_call" in event_type or "tool_call" in event_type:
         tool_call_id = _tool_call_id(event, item)
@@ -456,7 +591,7 @@ def _tool_event(
     if item_type in {"function_call_output", "custom_tool_call_output", "tool_result", "tool_call_output"} or "function_call_output" in event_type or "tool_result" in event_type:
         tool_call_id = _tool_call_id(event, item)
         tool_name = known_tool_names.get(tool_call_id) or _tool_name(event, item)
-        return {
+        return _compact_tool_event_payload({
             "event_id": generate_id("evt"),
             "type": "runner.tool_result",
             "quest_id": quest_id,
@@ -470,7 +605,7 @@ def _tool_event(
             "output": _tool_output(event, item),
             "raw_event_type": event_type,
             "created_at": created_at,
-        }
+        })
     return None
@@ -531,6 +666,12 @@ class CodexRunner:
         )
         env = dict(**os.environ)
+        runner_env = runner_config.get("env") if isinstance(runner_config.get("env"), dict) else {}
+        for key, value in runner_env.items():
+            env_key = str(key or "").strip()
+            if not env_key or value is None:
+                continue
+            env[env_key] = str(value)
         env["CODEX_HOME"] = str(codex_home)
         env["DEEPSCIENTIST_HOME"] = str(self.home)
         env["DS_HOME"] = str(self.home)
@@ -758,17 +899,25 @@ class CodexRunner:
         workspace_root = request.worktree_root or request.quest_root
         resolved_binary = resolve_runner_binary(self.binary, runner_name="codex")
         resolved_runner_config = runner_config if isinstance(runner_config, dict) else self._load_runner_config()
+        profile = str(resolved_runner_config.get("profile") or "").strip()
+        normalized_model = str(request.model or "").strip()
         command = [
             resolved_binary or self.binary,
             "--search",
-            "exec",
-            "--json",
-            "--cd",
-            str(workspace_root),
-            "--skip-git-repo-check",
-            "--model",
-            request.model,
         ]
+        if profile:
+            command.extend(["--profile", profile])
+        command.extend(
+            [
+                "exec",
+                "--json",
+                "--cd",
+                str(workspace_root),
+                "--skip-git-repo-check",
+            ]
+        )
+        if normalized_model.lower() not in {"", "inherit", "default", "codex-default"}:
+            command.extend(["--model", normalized_model])
         if request.approval_policy:
             command.extend(["-c", f'approval_policy="{request.approval_policy}"'])
         reasoning_effort = request.reasoning_effort
@@ -794,7 +943,9 @@ class CodexRunner:
         runner_config: dict[str, Any] | None = None,
     ) -> Path:
         target = ensure_dir(workspace_root / ".codex")
-        source = Path(os.environ.get("CODEX_HOME", str(Path.home() / ".codex"))).expanduser()
+        resolved_runner_config = runner_config if isinstance(runner_config, dict) else self._load_runner_config()
+        configured_home = str(resolved_runner_config.get("config_dir") or os.environ.get("CODEX_HOME") or str(Path.home() / ".codex"))
+        source = Path(configured_home).expanduser()
         for filename in ("config.toml", "auth.json"):
             source_path = source / filename
             target_path = target / filename

package/src/deepscientist/runners/runtime_overrides.py CHANGED Viewed

@@ -20,6 +20,8 @@ def _as_bool_env(name: str) -> bool:
 def codex_runtime_overrides() -> dict[str, str]:
     approval_policy = _as_text(os.environ.get("DEEPSCIENTIST_CODEX_APPROVAL_POLICY"))
     sandbox_mode = _as_text(os.environ.get("DEEPSCIENTIST_CODEX_SANDBOX_MODE"))
+    profile = _as_text(os.environ.get("DEEPSCIENTIST_CODEX_PROFILE"))
+    model = _as_text(os.environ.get("DEEPSCIENTIST_CODEX_MODEL"))
     if _as_bool_env("DEEPSCIENTIST_CODEX_YOLO"):
         approval_policy = approval_policy or "never"
@@ -30,6 +32,10 @@ def codex_runtime_overrides() -> dict[str, str]:
         overrides["approval_policy"] = approval_policy
     if sandbox_mode:
         overrides["sandbox_mode"] = sandbox_mode
+    if profile:
+        overrides["profile"] = profile
+    if model:
+        overrides["model"] = model
     return overrides

package/src/deepscientist/shared.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+from collections import deque
 import hashlib
 import json
 import os
@@ -9,7 +10,7 @@ import subprocess
 import sys
 from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any
+from typing import Any, Iterator
 from uuid import uuid4
 try:
@@ -90,21 +91,39 @@ def append_jsonl(path: Path, payload: dict[str, Any]) -> None:
         handle.write(json.dumps(payload, ensure_ascii=False) + "\n")
-def read_jsonl(path: Path) -> list[dict[str, Any]]:
+def iter_jsonl(path: Path | str) -> Iterator[dict[str, Any]]:
+    path = Path(path)
     if not path.exists():
+        return
+    with path.open("r", encoding="utf-8") as handle:
+        for raw_line in handle:
+            line = raw_line.strip()
+            if not line:
+                continue
+            try:
+                payload = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            if isinstance(payload, dict):
+                yield payload
+def read_jsonl(path: Path) -> list[dict[str, Any]]:
+    return list(iter_jsonl(path))
+def count_jsonl(path: Path | str) -> int:
+    return sum(1 for _ in iter_jsonl(path))
+def read_jsonl_tail(path: Path | str, limit: int) -> list[dict[str, Any]]:
+    normalized_limit = max(int(limit or 0), 0)
+    if normalized_limit <= 0:
         return []
-    items: list[dict[str, Any]] = []
-    for line in path.read_text(encoding="utf-8").splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            payload = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if isinstance(payload, dict):
-            items.append(payload)
-    return items
+    items: deque[dict[str, Any]] = deque(maxlen=normalized_limit)
+    for payload in iter_jsonl(path):
+        items.append(payload)
+    return list(items)
 def read_yaml(path: Path, default: Any = None) -> Any:

package/src/deepscientist/weixin_support.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .connector.weixin_support import * # noqa: F401,F403

package/src/prompts/connectors/lingzhu.md CHANGED Viewed

@@ -11,5 +11,7 @@
 - lingzhu_safety_rule: request only actions that are clearly justified by the current quest and understandable to the human user
 - lingzhu_text_rule: even when requesting `surface_actions`, always include a clear text explanation of what is happening and why
 - lingzhu_reply_style_rule: for Lingzhu-facing user-visible text sent through `artifact.interact(...)`, keep the message clear, concise, respectful, and high-information-density
-- lingzhu_reply_length_rule: for each Lingzhu-facing `artifact.interact(...)` message, normally answer in at most 2 to 3 sentences unless the user explicitly asks for more detail
+- lingzhu_reply_length_rule: for each Lingzhu-facing `artifact.interact(...)` message, normally keep the text within about 20 Chinese characters or one very short sentence unless the user explicitly asks for more detail
 - lingzhu_summary_first_rule: in Lingzhu-facing `artifact.interact(...)` messages, usually give only the synopsis and key facts needed for the user's next decision or understanding; avoid long preambles, repetition, and low-signal detail
+- lingzhu_task_gate_rule: only treat a Lingzhu user utterance as a new quest instruction when the text explicitly starts with `我现在的任务是`; otherwise assume the device is polling for queued progress or buffered replies
+- lingzhu_poll_rule: when Lingzhu is polling rather than giving a new task, return only the buffered progress checkpoints or the latest short status; do not reinterpret the poll text as a fresh instruction

package/src/prompts/connectors/qq.md CHANGED Viewed

@@ -10,7 +10,8 @@
 - qq_summary_first_rule: start with the conclusion the user cares about, then what it means, then the next action
 - qq_progress_shape_rule: make the current task, the main difficulty or latest real progress, and the next concrete measure explicit whenever possible
 - qq_eta_rule: for baseline reproduction, main experiments, analysis experiments, and other important long-running research phases, include a rough ETA for the next meaningful result or the next update; if uncertain, say that and still give the next check-in window
-- qq_tool_call_keepalive_rule: for ordinary active work, prefer one concise QQ progress update after roughly 10 tool calls when there is already a human-meaningful delta, and do not let work drift beyond roughly 20 tool calls or about 15 minutes without a user-visible checkpoint
+- qq_tool_call_keepalive_rule: for ordinary active work, prefer one concise QQ progress update after roughly 6 tool calls when there is already a human-meaningful delta, and do not let work drift beyond roughly 12 tool calls or about 8 minutes without a user-visible checkpoint
+- qq_read_plan_keepalive_rule: if the active work is still mostly reading, comparison, or planning, do not wait too long for a "big result"; send a short QQ-facing checkpoint after about 5 consecutive tool calls if the user would otherwise see silence
 - qq_internal_detail_rule: omit worker names, heartbeat timestamps, retry counters, pending/running/completed counts, file names, and monitor-window narration unless the user asked for them or the detail changes the recommended action
 - qq_translation_rule: convert internal execution and file-management work into user value, such as saying the baseline record is now organized for easier later comparison instead of listing touched files
 - qq_preflight_rule: before sending a QQ progress update, rewrite it if it still sounds like a monitoring log, execution diary, or file inventory