npm - @researai/deepscientist - Versions diffs - 1.5.12 → 1.5.14 - Mend

@researai/deepscientist 1.5.12 → 1.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/src/deepscientist/prompts/builder.py CHANGED Viewed

@@ -744,10 +744,8 @@ class PromptBuilder:
             f"- baseline_execution_policy: {baseline_execution_policy if launch_mode == 'custom' else 'n/a'}",
             f"- manuscript_edit_mode: {manuscript_edit_mode if custom_profile in {'review_audit', 'revision_rebuttal'} else 'n/a'}",
             f"- delivery_mode: {'paper_required' if need_research_paper else 'algorithm_first'}",
+            "- requested_skill_rule: stage-specific execution detail lives in the requested skill; this block only adds runtime launch policy.",
             "- idea_stage_rule: every accepted idea submission should normally create a new branch/worktree and a new user-visible research node.",
-            "- idea_draft_rule: before `artifact.submit_idea(...)`, first finish a concise durable Markdown draft for the chosen route; keep `idea.md` compact and `draft.md` richer.",
-            "- idea_literature_floor_rule: before writing or submitting a final selected idea, durably survey at least 5 and usually 5 to 10 related and usable papers; prioritize direct task-modeling or mechanism-neighbor work and only backfill with the closest adjacent translatable papers when necessary.",
-            "- idea_reference_rule: the final selected-idea draft should use one consistent standard citation format and include a `References` or `Bibliography` section for the survey-stage papers that actually shaped the motivation, mechanism, or claim boundary.",
             "- lineage_rule: normal idea routing uses exactly two lineage intents: `continue_line` creates a child of the current active branch; `branch_alternative` creates a sibling-like branch from the current branch's parent foundation.",
             "- revise_rule: `artifact.submit_idea(mode='revise', ...)` is maintenance-only compatibility for the same branch and should not be the default research-route mechanism.",
             "- post_main_result_rule: after every `artifact.record_main_experiment(...)`, first interpret the measured result and only then choose the next route.",
@@ -839,13 +837,8 @@ class PromptBuilder:
             lines.extend(
                 [
                     "- delivery_goal: the quest should normally continue until at least one paper-like deliverable exists.",
-                    "- main_result_rule: a strong main experiment is evidence, not the endpoint; usually continue into the necessary analysis, writing, or further strengthening work.",
-                    "- main_run_branch_rule: every durable main experiment should live on its own dedicated `run/*` branch/worktree so the result becomes a stable Canvas node instead of mutating the idea branch in place.",
-                    "- main_run_branch_rule_2: if the current workspace is still an idea branch when `artifact.record_main_experiment(...)` runs, the runtime will materialize a child `run/*` branch before durable recording; still prefer planning and implementation with that dedicated run branch in mind from the start.",
-                    "- paper_branch_rule: after the required analysis for a strong main result is complete, writing should continue on a dedicated `paper/*` branch/worktree derived from that run branch rather than on the quest root or on the evidence branch itself.",
-                    "- paper_branch_rule_2: treat the paper branch as the writing surface and the parent run branch as the evidence source; do not record new main experiments from the paper branch.",
-                    "- paper_template_rule: once paper writing starts, choose a real venue template from the `write` skill's `templates/` folder, copy it into `paper/latex/`, and default to `templates/iclr2026/` for general ML unless the user or venue contract clearly points elsewhere.",
-                    "- writing_rule: when the evidence becomes strong enough, analysis and paper writing remain in scope by default.",
+                    "- main_result_rule: a strong main experiment is evidence, not the endpoint; usually continue into analysis, writing, or strengthening work.",
+                    "- paper_branch_rule: writing should normally continue on a dedicated `paper/*` branch/worktree derived from the evidence line rather than mutating the evidence branch itself.",
                     "- review_gate_rule: before declaring a substantial paper/draft task done, open `review` for an independent skeptical audit; if that audit finds serious gaps, route to `analysis-campaign`, `baseline`, `scout`, or `write` instead of stopping.",
                     "- stop_rule: do not stop with only an improved algorithm or isolated run logs unless the user explicitly narrows scope.",
                 ]
@@ -880,24 +873,15 @@ class PromptBuilder:
             "- collaboration_mode: long-horizon, continuity-first, artifact-aware",
             "- response_pattern: say what changed -> say what it means -> say what happens next",
             "- interaction_protocol: first message may be plain conversation; after that, treat artifact.interact threads and mailbox polls as the main continuity spine across TUI, web, and connectors",
+            "- shared_interaction_contract_precedence: use the shared interaction contract as the default user-facing cadence; the rules below add runtime-specific execution behavior instead of restating the same chat cadence",
             "- mailbox_protocol: artifact.interact(include_recent_inbound_messages=True) is the queued human-message mailbox; when it returns user text, treat that input as higher priority than background subtasks until it has been acknowledged",
             "- acknowledgment_protocol: after artifact.interact returns any human message, immediately send one substantive artifact.interact(...) follow-up; if the active connector runtime already emitted a transport-level receipt acknowledgement, do not send a redundant receipt-only message; if answerable, answer directly, otherwise state the short plan, nearest checkpoint, and that the current background subtask is paused",
-            "- progress_protocol: emit artifact.interact(kind='progress', reply_mode='threaded', ...) at real human-meaningful checkpoints; if no natural checkpoint appears during active user-relevant work, prefer a concise keepalive once work has crossed roughly 6 tool calls with a human-meaningful delta, and do not drift beyond roughly 12 tool calls or about 8 minutes without a user-visible update",
-            "- stage_kickoff_protocol: after entering any stage or companion skill, send one user-visible artifact.interact progress update within the first 3 tool calls of substantial work",
-            "- read_plan_keepalive_protocol: if work is still mostly reading, searching, comparison, or planning, do not wait too long for a 'big result'; send one concise user-visible checkpoint after about 5 consecutive tool calls if the user would otherwise see silence",
             "- subtask_boundary_protocol: send a user-visible update whenever the active subtask changes materially, especially across intake -> audit, audit -> experiment planning, experiment planning -> run launch, run result -> drafting, or drafting -> review/rebuttal",
             "- smoke_then_detach_protocol: for baseline reproduction, main experiments, and analysis experiments, first validate the command path with a bounded smoke test; once the smoke test passes, launch the real long run with bash_exec(mode='detach', ...) and usually leave timeout_seconds unset rather than guessing a fake deadline",
             "- progress_first_monitoring_protocol: when supervising a long-running bash_exec session, judge health by forward progress rather than by whether the final artifact has already appeared within a short window",
-            "- delta_monitoring_protocol: compare deltas such as new sample counters, new task counters, new saved files, new last_output_seq values, or changed last_progress payloads; if any of these move forward, treat the run as alive and keep observing",
-            "- long_run_reporting_protocol: for long-running bash_exec monitoring loops, inspect real logs or status after each completed sleep/await cycle and at least once every 30 minutes at worst, but only send a user-visible update when there is a human-meaningful delta or when the 30-minute visibility bound would otherwise be exceeded; those updates should report the current status, the latest concrete evidence of progress or failure, and the next checkpoint",
-            "- long_run_watchdog_protocol: for baseline reproduction, baseline-running stages, main experiments, and other important detached runs, do not let more than 30 minutes pass without a real progress inspection and, if the run is still active, a user-visible artifact.interact progress update",
+            "- long_run_reporting_protocol: inspect real logs/status after each meaningful await cycle and at least once every 30 minutes at worst, but only send a user-visible update when there is a human-meaningful delta, blocker, recovery, route change, or the visibility bound would otherwise be exceeded",
             "- intervention_threshold_protocol: do not kill or restart a run merely because a short watch window passed without final completion; intervene only on explicit failure, clear invalidity, process exit, or no meaningful delta across a sufficiently long observation window",
-            "- slow_model_patience_protocol: if the user says the model, endpoint, or workload is expected to be slow, widen the observation window before intervention and avoid repeated no-change updates",
-            "- saved_log_read_protocol: bash_exec(mode='read', id=...) returns the full saved rendered log when it is 2000 lines or fewer; for longer logs it returns a preview with the first 500 lines plus the last 1500 lines and tells you to use start/tail for omitted middle windows",
-            "- log_window_protocol: when you need a specific omitted middle region from a long saved log, use bash_exec(mode='read', id=..., start=..., tail=...) to read a forward rendered-line window",
-            "- tail_monitoring_protocol: when monitoring a detached run, prefer bash_exec(mode='read', id=..., tail_limit=..., order='desc') so you inspect the newest seq-based evidence first instead of re-reading full logs every time",
-            "- managed_recovery_protocol: if a detached baseline, main-experiment, or analysis run is clearly invalid, wedged, or superseded, stop it with bash_exec(mode='kill', id=...), document the reason, fix the issue, and relaunch cleanly instead of letting a bad run linger",
-            "- timeout_protocol: before using bash_exec(mode='await', ...), estimate whether the command can finish within the selected wait window; if runtime is uncertain or likely longer, use bash_exec(mode='detach', ...) and monitor, or set timeout_seconds intentionally",
+            "- timeout_protocol: before using bash_exec(mode='await', ...), estimate whether the command can finish within the selected wait window; if runtime is uncertain or likely longer, use bash_exec(mode='detach', ...) and monitor instead of guessing a fake deadline",
             "- blocking_protocol: use reply_mode='blocking' only for true unresolved user decisions; ordinary progress updates should stay threaded and non-blocking",
             "- credential_blocking_protocol: if continuation requires user-supplied external credentials or secrets such as an API key, GitHub key/token, or Hugging Face key/token, emit one structured blocking decision request that asks the user to provide the credential or choose an alternative route; do not invent placeholders or silently skip the blocked step",
             "- credential_wait_protocol: if that credential request remains unanswered, keep the quest waiting rather than self-resolving; if you are resumed without new credentials and no other work is possible, a long low-frequency park such as `bash_exec(command='sleep 3600', mode='await', timeout_seconds=3700)` is acceptable to avoid busy-looping",
@@ -906,42 +890,13 @@ class PromptBuilder:
             "- respect_protocol: write user-facing updates as natural, respectful, easy-to-follow chat; do not sound like a formal status report or internal tool log",
             "- omission_protocol: for ordinary user-facing updates, omit file paths, artifact ids, branch/worktree ids, session ids, raw commands, raw logs, and internal tool names unless the user asked for them or needs them to act",
             "- compaction_protocol: ordinary artifact.interact progress updates should usually fit in 2 to 4 short sentences and should not read like a monitoring transcript or execution diary",
-            "- tool_call_keepalive_protocol: for active multi-step work outside long detached experiment waits, prefer sending one concise artifact.interact progress update after roughly 6 tool calls when there is already a human-meaningful delta, and do not exceed roughly 12 tool calls or about 8 minutes without a user-visible checkpoint",
+            "- watchdog_payload_protocol: if a tool result includes `watchdog_notes`, `progress_watchdog_note`, `visibility_watchdog_note`, or `state_change_watchdog_note`, treat that as an action item and send the required artifact.interact(...) update before doing more background work",
             "- human_progress_shape_protocol: ordinary progress updates should usually make three things explicit in human language: the current task, the main difficulty or latest real progress, and the concrete next measure you will take",
-            "- milestone_graduation_protocol: keep ordinary subtask completions concise; upgrade to a richer milestone report only when a stage-significant deliverable or route-changing checkpoint becomes durably true",
-            "- eta_visibility_protocol: for baseline reproduction, main experiments, analysis experiments, and other important long-running phases, progress updates should also make the expected time to the next meaningful result, next milestone, or next user-visible update explicit; use roughly 10 to 30 minutes as the normal update window, and if the ETA is unreliable, say that and give a realistic next check-in window instead",
-            "- stage_plan_protocol: for `baseline`, `experiment`, and `analysis-campaign`, do not jump straight into substantial setup, code changes, or real runs; first create or update quest-visible `PLAN.md` and `CHECKLIST.md`, then keep them aligned with the actual route",
-            "- baseline_plan_protocol: in `baseline`, read the source paper and source repo first when they exist, then make `PLAN.md` cover the route, source package, code touchpoints, smoke path, real-run path, fallback options, monitoring rules, and verification targets before substantial work continues",
-            "- experiment_plan_protocol: in `experiment`, make `PLAN.md` start with the selected idea summarized in 1 to 2 sentences and then map the idea into code touchpoints, comparability rules, smoke / pilot path, full-run path, fallback options, monitoring rules, and revision notes",
-            "- analysis_plan_protocol: in `analysis-campaign`, treat `PLAN.md` as the campaign charter and make it cover the slice list, comparability boundary, asset and comparator plan, smoke / full-run policy, reporting plan, and revision log before real slices launch",
-            "- checklist_maintenance_protocol: for those same stages, treat `CHECKLIST.md` as the living execution surface and update it during reading, setup, coding, smoke tests, real runs, validation, aggregation, and route changes instead of letting progress live only in chat",
-            "- plan_revision_protocol: if the route, comparability contract, source package, execution strategy, slice ordering, or campaign interpretation changes materially, revise `PLAN.md` before continuing",
-            "- plan_execution_stability_protocol: once `baseline` or `experiment` has a concrete `PLAN.md` route, implement that plan cleanly instead of repeatedly reshaping code and commands mid-flight; the normal default is one bounded smoke or pilot validation and then one real run, and extra retries should happen only after concrete failure, invalidity, or genuinely new evidence justifies them",
-            "- stage_milestone_summary_protocol: for accepted baseline, selected idea, completed main experiment, and completed analysis-campaign milestones, usually open with 1 to 2 sentences that say what happened, what it means, and the exact next step before expanding into more detail",
-            "- idea_milestone_protocol: immediately after a successful accepted artifact.submit_idea(...), send a threaded milestone that explains the idea in plain language and explicitly states whether it currently looks valid, research-worthy, and insight-bearing, plus the main risk and exact next experiment",
-            "- idea_divergence_protocol: in the idea stage, separate divergence from convergence; unless strong durable evidence already narrows the route to one obvious serious option, do not collapse onto the first plausible route before generating a small but meaningfully diverse candidate slate",
-            "- idea_lens_protocol: when idea candidates cluster around one mechanism family, deliberately switch ideation lenses such as problem-first vs solution-first, tension hunting, analogy transfer, inversion, or adjacent-possible reasoning before final selection",
-            "- idea_frontier_protocol: a temporary raw ideation slate may be larger, but after convergence the serious frontier should usually shrink back to 2 to 3 candidates and at most 5",
-            "- idea_why_now_protocol: every serious idea candidate should answer why now or what changed, not just what the mechanism is",
-            "- idea_balance_protocol: when the search space is not tiny, carry at least one conservative route and one higher-upside route into the final comparison",
-            "- idea_pitch_protocol: before artifact.submit_idea(...), make the winner pass a two-sentence pitch, a strongest-objection check, and a concrete why-now statement",
-            "- idea_literature_floor_protocol: do not write or submit the final selected idea until the durable survey covers at least 5 and usually 5 to 10 related and usable papers; if fewer than 5 direct papers exist, document the shortage and use the closest adjacent translatable work instead of skipping the gate",
-            "- idea_reference_protocol: the final selected-idea draft should cite the survey-stage papers it actually uses and end with a standard-format `References` or `Bibliography` section",
-            "- experiment_milestone_protocol: immediately after artifact.record_main_experiment(...) writes the durable result, send a threaded milestone that explains what was run, the main result, whether primary performance improved / worsened / stayed mixed versus the active baseline or best prior anchor, whether the route still looks promising, and the exact next step",
-            "- analysis_milestone_protocol: immediately after a meaningful completed analysis-campaign synthesis or route-significant campaign checkpoint, send a threaded milestone that explains which campaign question or slice set just closed, whether the claim boundary became stronger / weaker / mixed, the main caveat, and the exact next route",
-            "- paper_milestone_protocol: immediately after a meaningful paper or draft milestone such as selected outline, evidence-complete draft, major revision package, or bundle-ready paper, send a threaded milestone that explains what document milestone is now complete, which claims are now supportable, what still needs strengthening, and the exact next revision or execution route",
-            "- asset_grounded_analysis_protocol: before artifact.create_analysis_campaign(...), reuse current quest and user-provided assets first and only plan slices that are executable with the current assets, runtime/tooling, and available credentials",
-            "- infeasible_slice_protocol: if an analysis slice cannot actually be executed after bounded recovery, do not fake completion; record the slice with a non-success status, report the blocker explicitly, and do not pretend the system can do it",
-            "- explicit_improvement_protocol: never make the user infer performance improvement only from raw metrics; say plainly whether performance improved, worsened, or stayed mixed",
-            "- verified_reference_breadth_protocol: for paper-like writing, run broad literature search and reading, aim for roughly 30 to 50 verified references unless scope clearly justifies fewer, use one consistent citation workflow SEARCH -> VERIFY -> RETRIEVE -> VALIDATE -> ADD, use Semantic Scholar by default or Google Scholar manual search/export for discovery, use DOI/Crossref or other real metadata backfills for BibTeX and verification, Every final citation must correspond to a real paper from an actual source, store actual bibliography entries in paper/references.bib as valid BibTeX, do one explicit reference audit before bundling, and never invent citations from memory or hand-write BibTeX from scratch",
-            "- narrative_focus_protocol: for paper-like writing, organize the paper around one cohesive contribution, make What / Why / So What clear early, assume many readers judge in the order title -> abstract -> introduction -> figures, front-load value in those surfaces, use a five-part abstract formula, keep the introduction concise with 2 to 4 specific contribution bullets, and if the first sentence could be pasted into many unrelated ML papers then rewrite it until it becomes specific",
-            "- writing_reasoning_externalization_protocol: for paper-like writing, externalize major reasoning into durable notes such as paper/outline_selection.md, paper/claim_evidence_map.json, paper/related_work_map.md, paper/figure_storyboard.md, and paper/reviewer_first_pass.md; those notes should summarize current judgment, alternatives considered, evidence used, risks, and next revision action rather than hidden chain-of-thought",
-            "- outline_intro_value_protocol: for outlines and introductions, make research value explicit early and use a standard introduction arc: problem and stakes -> concrete gap/bottleneck -> remedy/core idea -> evidence preview -> contributions",
+            "- stage_contract_protocol: stage-specific plan/checklist rules, milestone rules, literature rules, and writing rules belong in the requested skill; do not expect this runtime block to restate them",
             "- teammate_voice_protocol: write like a calm capable teammate using natural first-person phrasing when helpful, for example 'I'm working on ...', 'The main issue right now is ...', 'Next I'll ...'; do not sound like a dashboard or incident log",
-            "- tqdm_progress_protocol: when you control the experiment code for baseline reproduction, main experiments, or analysis experiments, instrument long loops with a throttled tqdm-style progress reporter when feasible and also prefer periodic __DS_PROGRESS__ JSON markers so monitoring stays both human-readable and machine-usable",
             "- translation_protocol: convert internal actions into user-facing meaning; describe what was finished and why it matters instead of naming every touched file, counter, timestamp, or subprocess",
             "- detail_gate_protocol: include exact counters, worker labels, timestamps, retry counts, or file names only when the user explicitly asked for them, when they change the recommended action, or when they are the only honest way to explain a real blocker",
-            "- monitoring_summary_protocol: for long-running monitoring loops, summarize the frontier state in plain language such as still progressing, temporarily stalled, recovered, or needs intervention; do not narrate each watch window and do not send a no-change update merely because a sleep finished unless the user-visible timing bound requires it",
+            "- monitoring_summary_protocol: for long-running monitoring loops, summarize the frontier state in plain language such as still progressing, temporarily stalled, recovered, or needs intervention; do not narrate each watch window",
             "- preflight_rewrite_protocol: before sending artifact.interact, quickly self-check whether the draft reads like a monitoring log, file inventory, or internal diary; if it mentions watch windows, heartbeats, retry counters, raw counts, timestamps, or multiple file names without being necessary for user action, rewrite it into conclusion -> meaning -> next step first",
             "- non_research_mode_protocol: if the user message looks like a non-research request, ask for a second confirmation before engaging stage skills or research workflow; after completion, leave one blocking standby interaction instead of repeatedly pinging",
             "- workspace_discipline: read and modify code inside current_workspace_root; treat quest_root as the canonical repo identity and durable runtime root",
@@ -1057,6 +1012,10 @@ class PromptBuilder:
             f"- latest_thread_interaction_id: {snapshot.get('latest_thread_interaction_id') or 'none'}",
             f"- default_reply_interaction_id: {snapshot.get('default_reply_interaction_id') or 'none'}",
             f"- last_artifact_interact_at: {snapshot.get('last_artifact_interact_at') or 'none'}",
+            f"- seconds_since_last_artifact_interact: {snapshot.get('seconds_since_last_artifact_interact') if snapshot.get('seconds_since_last_artifact_interact') is not None else 'none'}",
+            f"- tool_calls_since_last_artifact_interact: {snapshot.get('tool_calls_since_last_artifact_interact') or 0}",
+            f"- last_tool_activity_at: {snapshot.get('last_tool_activity_at') or 'none'}",
+            f"- last_tool_activity_name: {snapshot.get('last_tool_activity_name') or 'none'}",
             f"- last_delivered_batch_id: {snapshot.get('last_delivered_batch_id') or 'none'}",
             f"- bound_conversations: {', '.join(snapshot.get('bound_conversations') or []) or 'none'}",
             f"- cloud_linked: {snapshot.get('cloud', {}).get('linked', False)}",

package/src/deepscientist/quest/service.py CHANGED Viewed

@@ -864,6 +864,7 @@ class QuestService:
         from ..bash_exec import BashExecService
         bash_summary = BashExecService(self.home).summary(quest_root)
+        interaction_watchdog = self.artifact_interaction_watchdog_status(quest_root)
         payload = {
             "quest_id": quest_yaml.get("quest_id", quest_id),
             "title": quest_yaml.get("title", quest_id),
@@ -887,6 +888,10 @@ class QuestService:
             "stop_reason": runtime_state.get("stop_reason"),
             "active_interaction_id": runtime_state.get("active_interaction_id"),
             "last_artifact_interact_at": runtime_state.get("last_artifact_interact_at"),
+            "last_tool_activity_at": runtime_state.get("last_tool_activity_at"),
+            "last_tool_activity_name": runtime_state.get("last_tool_activity_name"),
+            "tool_calls_since_last_artifact_interact": int(runtime_state.get("tool_calls_since_last_artifact_interact") or 0),
+            "seconds_since_last_artifact_interact": interaction_watchdog.get("seconds_since_last_artifact_interact"),
             "last_delivered_batch_id": runtime_state.get("last_delivered_batch_id"),
             "last_delivered_at": runtime_state.get("last_delivered_at"),
             "bound_conversations": self._binding_sources_payload(quest_root).get("sources") or ["local:default"],
@@ -907,6 +912,7 @@ class QuestService:
                 "bash_session_count": int(bash_summary.get("session_count") or 0),
                 "bash_running_count": int(bash_summary.get("running_count") or 0),
             },
+            "interaction_watchdog": interaction_watchdog,
             "recent_artifacts": [],
             "recent_runs": [],
         }
@@ -1228,6 +1234,7 @@ class QuestService:
             "bash_session_count": int(bash_summary.get("session_count") or 0),
             "bash_running_count": int(bash_summary.get("running_count") or 0),
         }
+        interaction_watchdog = self.artifact_interaction_watchdog_status(quest_root)
         guidance = None
         try:
             from ..artifact.guidance import build_guidance_for_snapshot
@@ -1287,6 +1294,10 @@ class QuestService:
             "retry_state": runtime_state.get("retry_state"),
             "last_transition_at": runtime_state.get("last_transition_at"),
             "last_artifact_interact_at": runtime_state.get("last_artifact_interact_at"),
+            "last_tool_activity_at": runtime_state.get("last_tool_activity_at"),
+            "last_tool_activity_name": runtime_state.get("last_tool_activity_name"),
+            "tool_calls_since_last_artifact_interact": int(runtime_state.get("tool_calls_since_last_artifact_interact") or 0),
+            "seconds_since_last_artifact_interact": interaction_watchdog.get("seconds_since_last_artifact_interact"),
             "last_delivered_batch_id": runtime_state.get("last_delivered_batch_id"),
             "last_delivered_at": runtime_state.get("last_delivered_at"),
             "bound_conversations": self._binding_sources_payload(quest_root).get("sources") or ["local:default"],
@@ -1302,6 +1313,7 @@ class QuestService:
             },
             "paths": paths,
             "counts": counts,
+            "interaction_watchdog": interaction_watchdog,
             "team": {"mode": "single", "active_workers": []},
             "cloud": {"linked": False, "base_url": "https://deepscientist.cc"},
             "history_count": len(history),
@@ -2674,6 +2686,9 @@ class QuestService:
             "stop_reason": None,
             "last_transition_at": timestamp,
             "last_artifact_interact_at": None,
+            "last_tool_activity_at": None,
+            "last_tool_activity_name": None,
+            "tool_calls_since_last_artifact_interact": 0,
             "pending_user_message_count": pending_count,
             "last_delivered_batch_id": None,
             "last_delivered_at": None,
@@ -2738,6 +2753,7 @@ class QuestService:
             payload = defaults
         merged = {**defaults, **payload}
         merged["pending_user_message_count"] = int(merged.get("pending_user_message_count") or 0)
+        merged["tool_calls_since_last_artifact_interact"] = int(merged.get("tool_calls_since_last_artifact_interact") or 0)
         merged["retry_state"] = dict(merged.get("retry_state") or {}) if isinstance(merged.get("retry_state"), dict) else None
         return merged
@@ -2754,6 +2770,9 @@ class QuestService:
         active_interaction_id: str | None | object = _UNSET,
         last_transition_at: str | None | object = _UNSET,
         last_artifact_interact_at: str | None | object = _UNSET,
+        last_tool_activity_at: str | None | object = _UNSET,
+        last_tool_activity_name: str | None | object = _UNSET,
+        tool_calls_since_last_artifact_interact: int | object = _UNSET,
         pending_user_message_count: int | object = _UNSET,
         last_delivered_batch_id: str | None | object = _UNSET,
         last_delivered_at: str | None | object = _UNSET,
@@ -2785,6 +2804,12 @@ class QuestService:
                 state["active_interaction_id"] = str(active_interaction_id).strip() if active_interaction_id else None
             if last_artifact_interact_at is not _UNSET:
                 state["last_artifact_interact_at"] = last_artifact_interact_at
+            if last_tool_activity_at is not _UNSET:
+                state["last_tool_activity_at"] = last_tool_activity_at
+            if last_tool_activity_name is not _UNSET:
+                state["last_tool_activity_name"] = str(last_tool_activity_name).strip() if last_tool_activity_name else None
+            if tool_calls_since_last_artifact_interact is not _UNSET:
+                state["tool_calls_since_last_artifact_interact"] = max(0, int(tool_calls_since_last_artifact_interact))
             if pending_user_message_count is not _UNSET:
                 state["pending_user_message_count"] = max(0, int(pending_user_message_count))
             if last_delivered_batch_id is not _UNSET:
@@ -3056,10 +3081,67 @@ class QuestService:
             quest_root=quest_root,
             active_interaction_id=interaction_id or artifact_id,
             last_artifact_interact_at=timestamp,
+            last_tool_activity_at=timestamp,
+            last_tool_activity_name="artifact.interact",
+            tool_calls_since_last_artifact_interact=0,
             pending_user_message_count=len((self._read_message_queue(quest_root).get("pending") or [])),
         )
         return payload
+    def record_tool_activity(
+        self,
+        quest_root: Path,
+        *,
+        tool_name: str,
+        created_at: str | None = None,
+    ) -> dict[str, Any]:
+        timestamp = created_at or utc_now()
+        current_state = self._read_runtime_state(quest_root)
+        next_count = int(current_state.get("tool_calls_since_last_artifact_interact") or 0) + 1
+        payload = {
+            "event_id": generate_id("evt"),
+            "type": "tool_activity",
+            "quest_id": quest_root.name,
+            "tool_name": str(tool_name or "").strip() or "tool",
+            "tool_calls_since_last_artifact_interact": next_count,
+            "created_at": timestamp,
+        }
+        append_jsonl(self._interaction_journal_path(quest_root), payload)
+        self.update_runtime_state(
+            quest_root=quest_root,
+            last_tool_activity_at=timestamp,
+            last_tool_activity_name=payload["tool_name"],
+            tool_calls_since_last_artifact_interact=next_count,
+        )
+        return payload
+    @staticmethod
+    def _seconds_since_iso_timestamp(value: str | None) -> int | None:
+        normalized = str(value or "").strip()
+        if not normalized:
+            return None
+        candidate = normalized.replace("Z", "+00:00")
+        try:
+            parsed = datetime.fromisoformat(candidate)
+        except ValueError:
+            return None
+        if parsed.tzinfo is None:
+            parsed = parsed.replace(tzinfo=UTC)
+        return max(int((datetime.now(UTC) - parsed.astimezone(UTC)).total_seconds()), 0)
+    def artifact_interaction_watchdog_status(self, quest_root: Path) -> dict[str, Any]:
+        runtime_state = self._read_runtime_state(quest_root)
+        last_artifact_interact_at = str(runtime_state.get("last_artifact_interact_at") or "").strip() or None
+        last_tool_activity_at = str(runtime_state.get("last_tool_activity_at") or "").strip() or None
+        return {
+            "last_artifact_interact_at": last_artifact_interact_at,
+            "seconds_since_last_artifact_interact": self._seconds_since_iso_timestamp(last_artifact_interact_at),
+            "tool_calls_since_last_artifact_interact": int(runtime_state.get("tool_calls_since_last_artifact_interact") or 0),
+            "last_tool_activity_at": last_tool_activity_at,
+            "seconds_since_last_tool_activity": self._seconds_since_iso_timestamp(last_tool_activity_at),
+            "last_tool_activity_name": str(runtime_state.get("last_tool_activity_name") or "").strip() or None,
+        }
     def latest_artifact_interaction_records(self, quest_root: Path, limit: int = 10) -> list[dict[str, Any]]:
         items = [
             item
@@ -3320,6 +3402,7 @@ class QuestService:
                     "path": relative,
                     "kind": "directory",
                     "scope": self._classify_relative_scope(relative)[0],
+                    "folder_kind": self._snapshot_folder_kind(child, relative),
                     "children": self._snapshot_tree_nodes(child, revision=revision, prefix=relative),
                     "git_status": None,
                     "recently_changed": False,
@@ -3361,6 +3444,22 @@ class QuestService:
             cursor[parts[-1]] = None
         return tree
+    @staticmethod
+    def _snapshot_folder_kind(tree: dict[str, dict | None], relative: str) -> str | None:
+        normalized = str(relative or "").strip().replace("\\", "/")
+        if not normalized or normalized.startswith(".ds/"):
+            return None
+        if not isinstance(tree, dict):
+            return None
+        if tree.get("main.tex") is None and "main.tex" in tree:
+            return "latex"
+        for name, child in tree.items():
+            if child is not None:
+                continue
+            if Path(name).suffix.lower() == ".tex":
+                return "latex"
+        return None
     def _git_snapshot_paths(self, quest_root: Path, revision: str) -> list[str]:
         result = run_command(
             ["git", "ls-tree", "-r", "--full-tree", "--name-only", revision],

package/src/deepscientist/quest/stage_views.py CHANGED Viewed

@@ -344,37 +344,83 @@ class QuestStageViewBuilder:
             return True
         return False
-    def _path_in_quest(self, raw_path: object) -> tuple[Path, str] | None:
+    def _path_in_quest(self, raw_path: object) -> tuple[Path, str, str] | None:
         text = str(raw_path or "").strip()
         if not text:
             return None
         path = Path(text)
+        candidates: list[Path] = []
         if not path.is_absolute():
-            path = (self.quest_root / text).resolve()
+            for base in (self.workspace_root, self.quest_root):
+                try:
+                    candidates.append((base / text).resolve())
+                except OSError:
+                    continue
         else:
             try:
-                path = path.resolve()
+                candidates.append(path.resolve())
             except OSError:
                 return None
-        try:
-            relative = path.relative_to(self.quest_root.resolve()).as_posix()
-        except ValueError:
+        if not candidates:
             return None
-        return path, relative
+        seen: set[str] = set()
+        unique_candidates: list[Path] = []
+        for candidate in candidates:
+            key = str(candidate)
+            if key in seen:
+                continue
+            seen.add(key)
+            unique_candidates.append(candidate)
+        existing_candidates: list[Path] = []
+        missing_candidates: list[Path] = []
+        for candidate in unique_candidates:
+            try:
+                if candidate.exists():
+                    existing_candidates.append(candidate)
+                else:
+                    missing_candidates.append(candidate)
+            except OSError:
+                missing_candidates.append(candidate)
+        ordered = [*existing_candidates, *missing_candidates]
+        workspace_root = self.workspace_root.resolve()
+        quest_root = self.quest_root.resolve()
+        for candidate in ordered:
+            if candidate.exists():
+                try:
+                    relative = candidate.relative_to(workspace_root).as_posix()
+                    return candidate, relative, "path"
+                except ValueError:
+                    pass
+            try:
+                relative = candidate.relative_to(quest_root).as_posix()
+                return candidate, relative, "questpath"
+            except ValueError:
+                pass
+            try:
+                relative = candidate.relative_to(workspace_root).as_posix()
+                return candidate, relative, "path"
+            except ValueError:
+                continue
+        return None
     def _document_id_for_path(self, raw_path: object) -> str | None:
         resolved = self._path_in_quest(raw_path)
         if resolved is None:
             return None
-        path, relative = resolved
+        path, relative, document_scope = resolved
         if path.exists() and path.is_file():
-            return f"questpath::{relative}"
+            return f"{document_scope}::{relative}"
         return None
     def _relative_path_or_raw(self, raw_path: object) -> str | None:
         resolved = self._path_in_quest(raw_path)
         if resolved is not None:
-            _path, relative = resolved
+            _path, relative, _document_scope = resolved
             return relative
         text = str(raw_path or "").strip()
         return text or None
@@ -383,7 +429,7 @@ class QuestStageViewBuilder:
         resolved = self._path_in_quest(raw_path)
         if resolved is None:
             return None
-        path, _relative = resolved
+        path, _relative, _document_scope = resolved
         if not path.exists() or not path.is_file():
             return None
         try:
@@ -464,7 +510,7 @@ class QuestStageViewBuilder:
                 "exists": path.exists(),
                 "scope": "external",
             }
-        path, relative = resolved
+        path, relative, document_scope = resolved
         exists = path.exists()
         kind = "directory" if (exists and path.is_dir()) or expected_kind == "directory" else "file"
         scope = self.quest_service._classify_relative_scope(relative)[0]
@@ -474,7 +520,7 @@ class QuestStageViewBuilder:
             "description": description,
             "path": relative,
             "absolute_path": str(path),
-            "document_id": f"questpath::{relative}" if exists and path.is_file() else None,
+            "document_id": f"{document_scope}::{relative}" if exists and path.is_file() else None,
             "kind": kind,
             "exists": exists,
             "scope": scope,
@@ -508,30 +554,88 @@ class QuestStageViewBuilder:
         resolved = self._path_in_quest(raw_path)
         if resolved is None:
             return None
-        _path, relative = resolved
+        _path, relative, _document_scope = resolved
         return relative
-    def _paper_latex_root(self, bundle_manifest: dict[str, Any]) -> str | None:
-        preferred = self._paper_relative_path(bundle_manifest.get("latex_root_path"))
-        if preferred:
-            return preferred
+    def _paper_latex_root(
+        self,
+        bundle_manifest: dict[str, Any],
+        *,
+        compile_report: dict[str, Any] | None = None,
+    ) -> str | None:
+        for candidate in (
+            bundle_manifest.get("latex_root_path"),
+            (compile_report or {}).get("latex_root_path"),
+            (compile_report or {}).get("main_file_path"),
+        ):
+            resolved = self._path_in_quest(candidate)
+            if resolved is None:
+                continue
+            path, relative, _document_scope = resolved
+            if path.is_dir():
+                return relative
+            if path.suffix.lower() == ".tex":
+                return PurePosixPath(relative).parent.as_posix()
         paper_root = self._paper_root()
         for candidate in (paper_root / "latex", paper_root / "tex"):
             if candidate.exists():
-                return candidate.relative_to(self.quest_root).as_posix()
+                try:
+                    return candidate.relative_to(self.workspace_root.resolve()).as_posix()
+                except ValueError:
+                    return candidate.relative_to(self.quest_root).as_posix()
         return None
-    def _paper_main_tex(self, latex_root_rel: str | None) -> str | None:
+    def _paper_main_tex(
+        self,
+        latex_root_rel: str | None,
+        *,
+        bundle_manifest: dict[str, Any] | None = None,
+        compile_report: dict[str, Any] | None = None,
+    ) -> str | None:
+        for candidate in (
+            (compile_report or {}).get("main_file_path"),
+            bundle_manifest.get("main_tex_path") if isinstance(bundle_manifest, dict) else None,
+            bundle_manifest.get("latex_root_path") if isinstance(bundle_manifest, dict) else None,
+            (compile_report or {}).get("latex_root_path"),
+        ):
+            resolved = self._path_in_quest(candidate)
+            if resolved is None:
+                continue
+            path, relative, _document_scope = resolved
+            if path.suffix.lower() == ".tex":
+                return relative
+            if path.is_dir():
+                preferred = path / "main.tex"
+                if preferred.exists():
+                    nested = self._path_in_quest(preferred)
+                    if nested is not None:
+                        _resolved_path, nested_relative, _nested_scope = nested
+                        return nested_relative
         if not latex_root_rel:
             return None
-        latex_root = self.quest_root / latex_root_rel
+        latex_root = (self.workspace_root / latex_root_rel).resolve()
+        if not latex_root.exists():
+            latex_root = (self.quest_root / latex_root_rel).resolve()
+        if latex_root.is_file() and latex_root.suffix.lower() == ".tex":
+            nested = self._path_in_quest(latex_root)
+            if nested is not None:
+                _resolved_path, nested_relative, _nested_scope = nested
+                return nested_relative
+            return None
         preferred = latex_root / "main.tex"
         if preferred.exists():
-            return preferred.relative_to(self.quest_root).as_posix()
+            nested = self._path_in_quest(preferred)
+            if nested is not None:
+                _resolved_path, nested_relative, _nested_scope = nested
+                return nested_relative
         candidates = sorted(latex_root.glob("*.tex"))
         if not candidates:
             return None
-        return candidates[0].relative_to(self.quest_root).as_posix()
+        nested = self._path_in_quest(candidates[0])
+        if nested is None:
+            return None
+        _resolved_path, nested_relative, _nested_scope = nested
+        return nested_relative
     def _paper_pdf_candidates(
         self,
@@ -1460,10 +1564,11 @@ class QuestStageViewBuilder:
             },
         )
-    def _paper_files(self) -> list[dict[str, Any]]:
+    def _paper_files(self, *, compile_report: dict[str, Any] | None = None) -> list[dict[str, Any]]:
         bundle_manifest = self._paper_bundle_manifest()
-        latex_root_rel = self._paper_latex_root(bundle_manifest)
-        main_tex_rel = self._paper_main_tex(latex_root_rel)
+        compile_report = compile_report if isinstance(compile_report, dict) else {}
+        latex_root_rel = self._paper_latex_root(bundle_manifest, compile_report=compile_report)
+        main_tex_rel = self._paper_main_tex(latex_root_rel, bundle_manifest=bundle_manifest, compile_report=compile_report)
         pdf_candidates = self._paper_pdf_candidates(bundle_manifest, main_tex_rel=main_tex_rel)
         paper_root = self._paper_root()
         open_source_root = self._open_source_root()
@@ -1537,8 +1642,8 @@ class QuestStageViewBuilder:
         if not isinstance(compile_report, dict):
             compile_report = {}
         bundle_manifest = self._paper_bundle_manifest()
-        latex_root_rel = self._paper_latex_root(bundle_manifest)
-        main_tex_rel = self._paper_main_tex(latex_root_rel)
+        latex_root_rel = self._paper_latex_root(bundle_manifest, compile_report=compile_report)
+        main_tex_rel = self._paper_main_tex(latex_root_rel, bundle_manifest=bundle_manifest, compile_report=compile_report)
         references_bib = read_text(paper_root / "references.bib", "")
         references_count = sum(1 for line in references_bib.splitlines() if line.lstrip().startswith("@"))
         pdf_paths = self._paper_pdf_candidates(bundle_manifest, main_tex_rel=main_tex_rel)
@@ -1577,7 +1682,7 @@ class QuestStageViewBuilder:
                 _field("LaTeX Root", latex_root_rel or "Not recorded"),
                 _field("Main TeX", main_tex_rel or "Not recorded"),
             ],
-            key_files=self._paper_files(),
+            key_files=self._paper_files(compile_report=compile_report),
             history=self._artifact_history(paper_items),
             details={
                 "paper": {

package/src/deepscientist/runners/codex.py CHANGED Viewed

@@ -11,11 +11,12 @@ from pathlib import Path
 from typing import Any
 from ..artifact import ArtifactService
+from ..codex_cli_compat import adapt_profile_only_provider_config, normalize_codex_reasoning_effort
 from ..config import ConfigManager
 from ..gitops import export_git_graph
 from ..prompts import PromptBuilder
 from ..runtime_logs import JsonlLogger
-from ..shared import append_jsonl, ensure_dir, generate_id, read_yaml, resolve_runner_binary, utc_now, write_json, write_text
+from ..shared import append_jsonl, ensure_dir, generate_id, read_text, read_yaml, resolve_runner_binary, utc_now, write_json, write_text
 from ..web_search import extract_web_search_payload
 from .base import RunRequest, RunResult
@@ -920,7 +921,10 @@ class CodexRunner:
             command.extend(["--model", normalized_model])
         if request.approval_policy:
             command.extend(["-c", f'approval_policy="{request.approval_policy}"'])
-        reasoning_effort = request.reasoning_effort
+        reasoning_effort, _ = normalize_codex_reasoning_effort(
+            request.reasoning_effort,
+            resolved_binary=resolved_binary or self.binary,
+        )
         if reasoning_effort:
             command.extend(["-c", f'model_reasoning_effort="{reasoning_effort}"'])
         tool_timeout_sec = self._positive_timeout_seconds(resolved_runner_config.get("mcp_tool_timeout_sec"))
@@ -945,6 +949,7 @@ class CodexRunner:
         target = ensure_dir(workspace_root / ".codex")
         resolved_runner_config = runner_config if isinstance(runner_config, dict) else self._load_runner_config()
         configured_home = str(resolved_runner_config.get("config_dir") or os.environ.get("CODEX_HOME") or str(Path.home() / ".codex"))
+        profile = str(resolved_runner_config.get("profile") or "").strip()
         source = Path(configured_home).expanduser()
         for filename in ("config.toml", "auth.json"):
             source_path = source / filename
@@ -953,6 +958,10 @@ class CodexRunner:
                 if source_path.resolve() == target_path.resolve():
                     continue
                 shutil.copy2(source_path, target_path)
+        config_path = target / "config.toml"
+        if profile and config_path.exists():
+            adapted_text, _ = adapt_profile_only_provider_config(read_text(config_path), profile=profile)
+            write_text(config_path, adapted_text)
         ensure_dir(target / "skills")
         quest_skills_root = quest_root / ".codex" / "skills"
         if quest_skills_root.exists():