omnius 1.0.196 → 1.0.197

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3871,6 +3871,18 @@ var init_system_deps = __esm({
3871
3871
  description: "Screenshot capture tool",
3872
3872
  group: "screenshot"
3873
3873
  },
3874
+ {
3875
+ command: "gnome-screenshot",
3876
+ packages: { apt: "gnome-screenshot", dnf: "gnome-screenshot", pacman: "gnome-screenshot" },
3877
+ description: "GNOME screenshot capture tool, useful on Wayland sessions",
3878
+ group: "screenshot"
3879
+ },
3880
+ {
3881
+ command: "grim",
3882
+ packages: { apt: "grim", dnf: "grim", pacman: "grim" },
3883
+ description: "Wayland screenshot capture tool for wlroots compositors",
3884
+ group: "screenshot"
3885
+ },
3874
3886
  {
3875
3887
  command: "xdotool",
3876
3888
  packages: { apt: "xdotool", dnf: "xdotool", pacman: "xdotool" },
@@ -3919,8 +3931,8 @@ import { existsSync as existsSync6, statSync as statSync3 } from "node:fs";
3919
3931
  function captureDesktopScreenshot(outputPath3) {
3920
3932
  const attempts = [];
3921
3933
  const out = quoteShell(outputPath3);
3922
- const tryCapture = (label, command) => {
3923
- const result = run(command, 1e4);
3934
+ const tryCapture = (label, command, timeout2 = 1e4) => {
3935
+ const result = run(command, timeout2);
3924
3936
  if (result.ok && existsSync6(outputPath3)) {
3925
3937
  const inspection = inspectScreenshot(outputPath3);
3926
3938
  if (inspection.ok)
@@ -3950,6 +3962,17 @@ $bitmap.Dispose()
3950
3962
  if (backend)
3951
3963
  return backend;
3952
3964
  } else if (process.platform === "linux") {
3965
+ const desktop = `${process.env["XDG_CURRENT_DESKTOP"] || ""} ${process.env["DESKTOP_SESSION"] || ""}`;
3966
+ if ((/wayland/i.test(process.env["XDG_SESSION_TYPE"] || "") || process.env["WAYLAND_DISPLAY"]) && hasCommand2("gdbus") && hasCommand2("dbus-monitor")) {
3967
+ const backend = tryCapture("xdg-desktop-portal-screenshot", portalScreenshotCommand(outputPath3), 13e4);
3968
+ if (backend)
3969
+ return backend;
3970
+ }
3971
+ if (/gnome/i.test(desktop) && hasCommand2("gdbus")) {
3972
+ const backend = tryCapture("gnome-shell-screenshot-dbus", `gdbus call --session --dest org.gnome.Shell.Screenshot --object-path /org/gnome/Shell/Screenshot --method org.gnome.Shell.Screenshot.Screenshot false false ${out}`);
3973
+ if (backend)
3974
+ return backend;
3975
+ }
3953
3976
  const candidates = [
3954
3977
  ["gnome-screenshot", `gnome-screenshot -f ${out}`],
3955
3978
  ["grim", `grim ${out}`],
@@ -3965,6 +3988,20 @@ $bitmap.Dispose()
3965
3988
  if (backend)
3966
3989
  return backend;
3967
3990
  }
3991
+ if (/wayland/i.test(process.env["XDG_SESSION_TYPE"] || "") || process.env["WAYLAND_DISPLAY"]) {
3992
+ for (const binary of [/gnome/i.test(desktop) ? "gnome-screenshot" : "", "grim"].filter(Boolean)) {
3993
+ const dep2 = ensureCommand(binary);
3994
+ if (!dep2.available) {
3995
+ if (dep2.error)
3996
+ attempts.push({ label: `${binary} auto-install`, message: dep2.error });
3997
+ continue;
3998
+ }
3999
+ const command = binary === "gnome-screenshot" ? `gnome-screenshot -f ${out}` : `grim ${out}`;
4000
+ const backend = tryCapture(dep2.installed ? `${binary} (auto-installed)` : binary, command);
4001
+ if (backend)
4002
+ return backend;
4003
+ }
4004
+ }
3968
4005
  const dep = ensureCommand("scrot");
3969
4006
  if (dep.available) {
3970
4007
  const backend = tryCapture(dep.installed ? "scrot (auto-installed)" : "scrot", `scrot ${out}`);
@@ -3983,6 +4020,73 @@ $bitmap.Dispose()
3983
4020
  }
3984
4021
  throw new Error("No desktop screenshot backend succeeded.\n" + formatDesktopAttempts(attempts) + "\n" + desktopAutomationRecoveryMessage());
3985
4022
  }
4023
+ function portalScreenshotCommand(outputPath3) {
4024
+ const script = `
4025
+ set -eu
4026
+ out=${quoteShell(outputPath3)}
4027
+ monitor_file="$(mktemp)"
4028
+ cleanup() {
4029
+ if [ -n "\${monpid:-}" ]; then kill "$monpid" >/dev/null 2>&1 || true; fi
4030
+ rm -f "$monitor_file"
4031
+ }
4032
+ trap cleanup EXIT
4033
+
4034
+ call_out="$(gdbus call --session --dest org.freedesktop.portal.Desktop --object-path /org/freedesktop/portal/desktop --method org.freedesktop.portal.Screenshot.Screenshot "" "{'interactive': <true>, 'modal': <true>}" 2>&1)" || {
4035
+ printf '%s\\n' "$call_out" >&2
4036
+ exit 1
4037
+ }
4038
+ handle="$(printf '%s\\n' "$call_out" | sed -n "s/.*objectpath '\\([^']*\\)'.*/\\1/p")"
4039
+ if [ -z "$handle" ]; then
4040
+ printf 'Portal screenshot did not return a request handle: %s\\n' "$call_out" >&2
4041
+ exit 1
4042
+ fi
4043
+
4044
+ dbus-monitor --session "type='signal',interface='org.freedesktop.portal.Request',member='Response',path='$handle'" > "$monitor_file" 2>&1 &
4045
+ monpid=$!
4046
+ deadline=$(( $(date +%s) + 120 ))
4047
+ while :; do
4048
+ if grep -q "member=Response" "$monitor_file"; then break; fi
4049
+ if ! kill -0 "$monpid" >/dev/null 2>&1; then break; fi
4050
+ if [ "$(date +%s)" -ge "$deadline" ]; then
4051
+ printf 'Timed out waiting for the desktop screenshot permission prompt.\\n' >&2
4052
+ exit 124
4053
+ fi
4054
+ sleep 0.2
4055
+ done
4056
+ kill "$monpid" >/dev/null 2>&1 || true
4057
+
4058
+ response_code="$(awk '/member=Response/{seen=1; next} seen && /uint32/{print $2; exit}' "$monitor_file")"
4059
+ if [ "$response_code" != "0" ]; then
4060
+ printf 'Screenshot permission was denied or cancelled by the desktop portal (response %s).\\n' "\${response_code:-unknown}" >&2
4061
+ cat "$monitor_file" >&2
4062
+ exit 1
4063
+ fi
4064
+
4065
+ uri="$(sed -n 's/.*string "\\(file:[^"]*\\)".*/\\1/p' "$monitor_file" | tail -n 1)"
4066
+ if [ -z "$uri" ]; then
4067
+ printf 'Desktop portal granted screenshot permission but did not return a file URI.\\n' >&2
4068
+ cat "$monitor_file" >&2
4069
+ exit 1
4070
+ fi
4071
+
4072
+ if command -v gio >/dev/null 2>&1; then
4073
+ gio copy -f "$uri" "$out"
4074
+ elif command -v python3 >/dev/null 2>&1; then
4075
+ python3 - "$uri" "$out" <<'PY'
4076
+ import shutil, sys, urllib.parse
4077
+ uri, out = sys.argv[1], sys.argv[2]
4078
+ parsed = urllib.parse.urlparse(uri)
4079
+ if parsed.scheme != "file":
4080
+ raise SystemExit(f"Unsupported screenshot URI scheme: {parsed.scheme}")
4081
+ shutil.copyfile(urllib.parse.unquote(parsed.path), out)
4082
+ PY
4083
+ else
4084
+ printf 'Need gio or python3 to copy portal screenshot URI %s to %s.\\n' "$uri" "$out" >&2
4085
+ exit 1
4086
+ fi
4087
+ `.trim();
4088
+ return `bash -lc ${quoteShell(script)}`;
4089
+ }
3986
4090
  function moveDesktopPointer(x, y) {
3987
4091
  const result = performDesktopPointerAction({ x, y, moveOnly: true });
3988
4092
  if (!result.ok)
@@ -4111,10 +4215,13 @@ function desktopAutomationRecoveryMessage(command) {
4111
4215
  `DISPLAY=${display} WAYLAND_DISPLAY=${wayland}`,
4112
4216
  "For web pages and local dev servers, prefer browser_action or playwright_browser screenshots/clicks; they do not need OS desktop access.",
4113
4217
  "For OS-level desktop control, Omnius tries platform backends in order:",
4114
- " Linux: xdotool/X11, ydotool, dotool, python-xlib",
4218
+ " Linux screenshots: xdg-desktop-portal permission prompt, GNOME Shell D-Bus/gnome-screenshot, grim, scrot, ImageMagick import, Python ImageGrab",
4219
+ " Linux pointer control: xdotool/X11, ydotool, dotool, python-xlib",
4115
4220
  " macOS: cliclick, then System Events",
4116
4221
  " Windows: PowerShell user32 input",
4117
- "On Wayland, install and enable ydotool or dotool when xdotool cannot open an X display."
4222
+ "On Wayland, Omnius requests screenshot permission through xdg-desktop-portal when available. Approve the system screenshot prompt to continue.",
4223
+ "On GNOME Wayland, unattended screenshots may still be denied by compositor policy. Install gnome-screenshot or grant screenshot permission for the session if capture is blocked.",
4224
+ "On Wayland pointer control, install and enable ydotool or dotool when xdotool cannot open an X display."
4118
4225
  ].join("\n");
4119
4226
  }
4120
4227
  function performDesktopPointerAction(options2) {
@@ -270549,6 +270656,9 @@ State: ${statePath}${clearArtifacts ? "\nArtifacts cleared: true" : ""}`,
270549
270656
  }
270550
270657
  const stamp = timestampSlug();
270551
270658
  const screenshotPath = join47(sessionDir2, `${stamp}-step-${step}-before.png`);
270659
+ if (process.platform === "linux" && (process.env["WAYLAND_DISPLAY"] || /wayland/i.test(process.env["XDG_SESSION_TYPE"] || ""))) {
270660
+ yield "Vision action loop: requesting desktop screenshot permission if the system prompts";
270661
+ }
270552
270662
  yield `Vision action loop: capturing screenshot ${step}/${maxSteps}`;
270553
270663
  let screenshotBackend = "";
270554
270664
  try {
@@ -270641,6 +270751,9 @@ State: ${statePath}${clearArtifacts ? "\nArtifacts cleared: true" : ""}`,
270641
270751
  actionTaken = operation === "move" ? `Moved pointer to (${pixelX}, ${pixelY}) via ${backend}` : `Clicked at (${pixelX}, ${pixelY}) via ${backend} [${button} ${clickType}]`;
270642
270752
  outputLines.push(actionTaken);
270643
270753
  afterScreenshotPath = join47(sessionDir2, `${timestampSlug()}-step-${step}-after.png`);
270754
+ if (process.platform === "linux" && (process.env["WAYLAND_DISPLAY"] || /wayland/i.test(process.env["XDG_SESSION_TYPE"] || ""))) {
270755
+ yield "Vision action loop: requesting desktop screenshot permission for post-action verification if the system prompts";
270756
+ }
270644
270757
  yield `Vision action loop: capturing post-action screenshot ${step}/${maxSteps}`;
270645
270758
  const afterBackend = captureDesktopScreenshot(afterScreenshotPath);
270646
270759
  mutatedFiles.push(afterScreenshotPath);
@@ -528648,7 +528761,7 @@ var init_full_sub_agent = __esm({
528648
528761
  _activeSubProcesses = /* @__PURE__ */ new Map();
528649
528762
  FullSubAgentTool = class {
528650
528763
  name = "full_sub_agent";
528651
- description = "Spawn a COMPLETE Omnius sub-process with ALL tools and capabilities. Unlike sub_agent (shared process, limited tools), this spawns a separate omnius instance with its own clean context window, full tool set, memory, skills, and COHERE access. Use for complex multi-file tasks that benefit from a fresh context. The sub-process runs omnius --non-interactive and its output appears in the tab bar. Returns immediately with a process ID for monitoring.";
528764
+ description = "Spawn a COMPLETE Omnius sub-process with ALL tools and capabilities. Unlike sub_agent (shared process, limited tools), this spawns a separate omnius instance with its own clean context window, full tool set, memory, skills, and COHERE access. Use for complex multi-file tasks that benefit from a fresh context. The sub-process runs omnius --non-interactive until task_complete, abort, or timeout. By default this tool returns immediately with a process ID for monitoring; set wait=true to block until the child exits.";
528652
528765
  parameters = {
528653
528766
  type: "object",
528654
528767
  properties: {
@@ -528668,6 +528781,14 @@ var init_full_sub_agent = __esm({
528668
528781
  id: {
528669
528782
  type: "string",
528670
528783
  description: "Sub-agent process ID (for status/output/stop)"
528784
+ },
528785
+ wait: {
528786
+ type: "boolean",
528787
+ description: "For action='spawn', wait for the subprocess to exit instead of returning immediately. The child omnius run uses unlimited turns by default and stops on task_complete, abort, or timeout."
528788
+ },
528789
+ timeout_ms: {
528790
+ type: "number",
528791
+ description: "Optional timeout passed through to the child omnius process."
528671
528792
  }
528672
528793
  },
528673
528794
  required: []
@@ -528706,6 +528827,8 @@ var init_full_sub_agent = __esm({
528706
528827
  if (!task)
528707
528828
  return { success: false, output: "", error: "task is required", durationMs: performance.now() - start2 };
528708
528829
  const model = String(args["model"] ?? this.model);
528830
+ const wait = args["wait"] === true;
528831
+ const timeoutMs = typeof args["timeout_ms"] === "number" && Number.isFinite(args["timeout_ms"]) ? Math.max(1, Math.floor(args["timeout_ms"])) : void 0;
528709
528832
  const broker = getModelBroker();
528710
528833
  const decision2 = await broker.ensureModelLoadable({
528711
528834
  name: model || "default",
@@ -528729,7 +528852,7 @@ var init_full_sub_agent = __esm({
528729
528852
  durationMs: performance.now() - start2
528730
528853
  };
528731
528854
  }
528732
- const entry = spawnFullSubAgent(task, { model, backendUrl: this.backendUrl, workingDir: this.workingDir }, (text) => this.onViewWrite?.(entry.id, text), (id, exitCode, output) => {
528855
+ const entry = spawnFullSubAgent(task, { model, backendUrl: this.backendUrl, workingDir: this.workingDir, timeoutMs }, (text) => this.onViewWrite?.(entry.id, text), (id, exitCode, output) => {
528733
528856
  this.onViewStatus?.(id, exitCode === 0 ? "completed" : "failed");
528734
528857
  broker.unregisterLoaded("subprocess", id, "sub-agent-exited");
528735
528858
  this.onComplete?.(id, task, exitCode, output);
@@ -528746,6 +528869,33 @@ var init_full_sub_agent = __esm({
528746
528869
  priority: 1
528747
528870
  });
528748
528871
  this.onViewRegister?.(entry.id, entry.id, "full");
528872
+ if (wait) {
528873
+ const exitCode = await this.waitForExit(entry);
528874
+ const output = entry.outputBuffer.join("\n");
528875
+ const tail = entry.outputBuffer.slice(-120).join("\n");
528876
+ const durationMs = performance.now() - start2;
528877
+ if (exitCode === 0) {
528878
+ return {
528879
+ success: true,
528880
+ output: `Full Omnius sub-agent completed: ${entry.id}
528881
+ PID: ${entry.pid}
528882
+ Model: ${model}
528883
+ Duration: ${(durationMs / 1e3).toFixed(1)}s
528884
+ Output lines: ${entry.outputBuffer.length}` + (tail ? `
528885
+
528886
+ ${tail}` : ""),
528887
+ durationMs
528888
+ };
528889
+ }
528890
+ return {
528891
+ success: false,
528892
+ output: output ? `Full Omnius sub-agent failed: ${entry.id}
528893
+
528894
+ ${tail || output}` : "",
528895
+ error: `Full Omnius sub-agent exited before successful completion (exit code ${exitCode ?? "unknown"}).`,
528896
+ durationMs
528897
+ };
528898
+ }
528749
528899
  return {
528750
528900
  success: true,
528751
528901
  output: `Full Omnius sub-agent spawned: ${entry.id}
@@ -528805,6 +528955,14 @@ ${tail}`, durationMs: performance.now() - start2 };
528805
528955
  return { success: false, output: "", error: `Unknown action: ${action}`, durationMs: performance.now() - start2 };
528806
528956
  }
528807
528957
  }
528958
+ waitForExit(entry) {
528959
+ if (entry.status !== "running")
528960
+ return Promise.resolve(entry.exitCode);
528961
+ return new Promise((resolvePromise) => {
528962
+ entry.process.once("exit", (code8) => resolvePromise(code8));
528963
+ entry.process.once("error", () => resolvePromise(-1));
528964
+ });
528965
+ }
528808
528966
  };
528809
528967
  }
528810
528968
  });
@@ -672837,7 +672995,11 @@ function createSubAgentTool(config, repoRoot, ctxWindowSize) {
672837
672995
  },
672838
672996
  max_turns: {
672839
672997
  type: "number",
672840
- description: "Maximum turns for the sub-agent (default: 15)"
672998
+ description: "Maximum turns for the sub-agent (default: 15). Use 0 to run until task_complete or timeout."
672999
+ },
673000
+ until_task_complete: {
673001
+ type: "boolean",
673002
+ description: "If true, run with max_turns=0 so the sub-agent stops only at task_complete, abort, or timeout."
672841
673003
  }
672842
673004
  },
672843
673005
  required: ["task"]
@@ -672845,7 +673007,7 @@ function createSubAgentTool(config, repoRoot, ctxWindowSize) {
672845
673007
  async execute(args) {
672846
673008
  const task = String(args["task"] ?? "");
672847
673009
  const background = Boolean(args["background"]);
672848
- const maxTurns = typeof args["max_turns"] === "number" ? args["max_turns"] : 15;
673010
+ const maxTurns = args["until_task_complete"] === true ? 0 : typeof args["max_turns"] === "number" ? args["max_turns"] : 15;
672849
673011
  if (!task) {
672850
673012
  return { success: false, output: "", error: "task is required" };
672851
673013
  }
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.196",
3
+ "version": "1.0.197",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.196",
9
+ "version": "1.0.197",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.196",
3
+ "version": "1.0.197",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",