omnius 1.0.208 → 1.0.210

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -284618,15 +284618,52 @@ function findScrapeScript() {
284618
284618
  ];
284619
284619
  return candidates.find((p2) => existsSync44(p2)) || candidates[0];
284620
284620
  }
284621
- async function probeService() {
284621
+ async function probeServiceInfo() {
284622
284622
  try {
284623
284623
  const controller = new AbortController();
284624
284624
  const timeout2 = setTimeout(() => controller.abort(), 3e3);
284625
284625
  const res = await fetch(`${BASE_URL}/health`, { signal: controller.signal });
284626
284626
  clearTimeout(timeout2);
284627
- return res.ok;
284627
+ if (!res.ok)
284628
+ return null;
284629
+ const data = await res.json().catch(() => null);
284630
+ return data && typeof data === "object" ? data : {};
284628
284631
  } catch {
284632
+ return null;
284633
+ }
284634
+ }
284635
+ async function probeService() {
284636
+ return Boolean(await probeServiceInfo());
284637
+ }
284638
+ function serviceHasCapabilities(info) {
284639
+ if (!info)
284629
284640
  return false;
284641
+ const raw = info["capabilities"];
284642
+ const capabilities = Array.isArray(raw) ? raw.map(String) : [];
284643
+ return REQUIRED_SERVICE_CAPABILITIES.every((capability) => capabilities.includes(capability));
284644
+ }
284645
+ function killBrowserActionServicePort() {
284646
+ if (serviceProcess && serviceProcess.pid && !serviceProcess.killed) {
284647
+ try {
284648
+ process.kill(-serviceProcess.pid, "SIGTERM");
284649
+ } catch {
284650
+ }
284651
+ try {
284652
+ serviceProcess.kill("SIGTERM");
284653
+ } catch {
284654
+ }
284655
+ serviceProcess = null;
284656
+ }
284657
+ const commands = [
284658
+ `lsof -ti tcp:${DEFAULT_PORT} | xargs -r kill -TERM`,
284659
+ `fuser -k ${DEFAULT_PORT}/tcp`
284660
+ ];
284661
+ for (const cmd of commands) {
284662
+ try {
284663
+ execSync22(cmd, { stdio: "ignore", timeout: 5e3 });
284664
+ break;
284665
+ } catch {
284666
+ }
284630
284667
  }
284631
284668
  }
284632
284669
  function findPython3() {
@@ -284641,8 +284678,17 @@ function findPython3() {
284641
284678
  return null;
284642
284679
  }
284643
284680
  async function launchService() {
284644
- if (await probeService())
284645
- return null;
284681
+ const existing = await probeServiceInfo();
284682
+ if (existing) {
284683
+ if (serviceHasCapabilities(existing))
284684
+ return null;
284685
+ killBrowserActionServicePort();
284686
+ for (let i2 = 0; i2 < 20; i2++) {
284687
+ await new Promise((r2) => setTimeout(r2, 250));
284688
+ if (!await probeService())
284689
+ break;
284690
+ }
284691
+ }
284646
284692
  const python = findPython3();
284647
284693
  if (!python)
284648
284694
  return "Python 3 not found. Install Python 3.9+ to use browser automation.";
@@ -284654,10 +284700,12 @@ async function launchService() {
284654
284700
  env: {
284655
284701
  ...process.env,
284656
284702
  SCRAPE_PORT: String(DEFAULT_PORT),
284703
+ OMNIUS_BROWSER_ACTION_VENV: join55(omniusHomeDir(), "runtimes", "browser", ".venv-selenium"),
284657
284704
  SCRAPE_HEADLESS_DEFAULT: process.env["SCRAPE_HEADLESS_DEFAULT"] ?? (defaultBrowserHeadless() ? "1" : "0"),
284658
284705
  SCRAPE_REQUIRE_AUTH: "0"
284659
284706
  }
284660
284707
  });
284708
+ serviceProcess.unref();
284661
284709
  const cleanupService = () => {
284662
284710
  if (serviceProcess && serviceProcess.pid && !serviceProcess.killed) {
284663
284711
  try {
@@ -284796,13 +284844,33 @@ async function apiCall(endpoint, method = "POST", body) {
284796
284844
  url += `?${params.toString()}`;
284797
284845
  }
284798
284846
  const res = await fetch(url, options2);
284799
- return await res.json();
284847
+ const raw = await res.text();
284848
+ try {
284849
+ return JSON.parse(raw);
284850
+ } catch {
284851
+ return {
284852
+ ok: false,
284853
+ error: `HTTP ${res.status} from browser_action service: ${raw.slice(0, 500)}`
284854
+ };
284855
+ }
284856
+ }
284857
+ function evaluateFailureMessage2(err, code8) {
284858
+ const raw = err instanceof Error ? err.message : String(err);
284859
+ const hints = [];
284860
+ if (/map is not a function/i.test(raw) && /querySelectorAll/i.test(code8)) {
284861
+ hints.push("document.querySelectorAll() returns a NodeList; use Array.from(document.querySelectorAll(selector)).map(...) or [...document.querySelectorAll(selector)].map(...).");
284862
+ }
284863
+ if (/(?:\.value\s*=|setAttribute\(['"]value['"])/.test(code8) && /\b(input|textarea|querySelector)/i.test(code8)) {
284864
+ hints.push("Direct .value assignment can bypass framework input/change handlers. Prefer browser_action type, browser_action click_xy plus input/sync paths, or playwright_browser fill/visual_click.");
284865
+ }
284866
+ return [raw.slice(0, 500), ...hints.map((hint) => `Hint: ${hint}`)].join("\n");
284800
284867
  }
284801
- var __dirname3, DEFAULT_PORT, SCRAPE_SCRIPT, BASE_URL, serviceProcess, activeSessionId, activeSessionHeadless, activeSessionUrl, BrowserActionTool;
284868
+ var __dirname3, DEFAULT_PORT, SCRAPE_SCRIPT, BASE_URL, serviceProcess, activeSessionId, activeSessionHeadless, activeSessionUrl, REQUIRED_SERVICE_CAPABILITIES, BrowserActionTool;
284802
284869
  var init_browser_action = __esm({
284803
284870
  "packages/execution/dist/tools/browser-action.js"() {
284804
284871
  "use strict";
284805
284872
  init_dom_summary();
284873
+ init_model_store();
284806
284874
  init_network_egress_policy();
284807
284875
  __dirname3 = dirname14(fileURLToPath6(import.meta.url));
284808
284876
  DEFAULT_PORT = 8130;
@@ -284812,16 +284880,17 @@ var init_browser_action = __esm({
284812
284880
  activeSessionId = null;
284813
284881
  activeSessionHeadless = null;
284814
284882
  activeSessionUrl = null;
284883
+ REQUIRED_SERVICE_CAPABILITIES = ["evaluate"];
284815
284884
  BrowserActionTool = class {
284816
284885
  name = "browser_action";
284817
- description = "Control a persistent headless Chrome browser session for interactive web tasks. The browser stays open between calls, maintaining cookies, login state, and history. This is a separate Selenium/Chrome runtime from playwright_browser; do not switch between the two mid-workflow unless you intentionally navigate the second tool to the same URL. Use this (not web_fetch/web_crawl) when you need to: (1) log into a website, (2) fill and submit forms, (3) click buttons or links interactively, (4) take screenshots of rendered pages, (5) navigate multi-step workflows (checkout, signup, dashboards), (6) interact with elements that require JavaScript (dropdowns, modals, infinite scroll). Actions: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close. For browser visuals, use browser_action({action:'screenshot', width, height, output_path}) — this captures the headless browser viewport, not the desktop. Use the desktop screenshot tool only when the actual OS screen is the target. For verification of browser runtime failures, prefer playwright_browser because it exposes page_errors, console_logs, network_log, DOM/accessibility, and screenshots from the same session. IMPORTANT: Start by calling navigate with the URL — do NOT ask the user for credentials or info first. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Navigate to the page, then use dom/screenshot to see what's there, then type/click to interact. Call 'close' when done to free resources. This tool does not save or download arbitrary rendered files (PDFs, archives, media) to disk — clicking a 'Download' link inside the browser does not produce a local file path for the agent. For file acquisition, use the dedicated download/file tool and validate the resulting content-type and size before treating the result as success.";
284886
+ description = "Control a persistent headless Chrome browser session for interactive web tasks. The browser stays open between calls, maintaining cookies, login state, and history. This is a separate Selenium/Chrome runtime from playwright_browser; do not switch between the two mid-workflow unless you intentionally navigate the second tool to the same URL. Use this (not web_fetch/web_crawl) when you need to: (1) log into a website, (2) fill and submit forms, (3) click buttons or links interactively, (4) take screenshots of rendered pages, (5) navigate multi-step workflows (checkout, signup, dashboards), (6) interact with elements that require JavaScript (dropdowns, modals, infinite scroll). Actions: navigate, click, click_xy, type, evaluate, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close. For browser visuals, use browser_action({action:'screenshot', width, height, output_path}) — this captures the headless browser viewport, not the desktop. Use the desktop screenshot tool only when the actual OS screen is the target. For verification of browser runtime failures, prefer playwright_browser because it exposes page_errors, console_logs, network_log, DOM/accessibility, and screenshots from the same session. IMPORTANT: Start by calling navigate with the URL — do NOT ask the user for credentials or info first. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Navigate to the page, then use dom/screenshot to see what's there, then type/click to interact. Call 'close' when done to free resources. This tool does not save or download arbitrary rendered files (PDFs, archives, media) to disk — clicking a 'Download' link inside the browser does not produce a local file path for the agent. For file acquisition, use the dedicated download/file tool and validate the resulting content-type and size before treating the result as success.";
284818
284887
  parameters = {
284819
284888
  type: "object",
284820
284889
  properties: {
284821
284890
  action: {
284822
284891
  type: "string",
284823
- enum: ["navigate", "click", "click_xy", "type", "screenshot", "dom", "dom_summary", "vision_click", "scroll", "scroll_up", "scroll_down", "back", "forward", "close"],
284824
- description: "Browser action to perform. Key actions:\n- 'screenshot': capture the headless browser render at width/height; returns an image part and a local file path if output_path is provided\n- 'dom_summary': compact view of interactive elements (~1KB vs 200KB raw DOM)\n- 'vision_click': screenshot the page, use Moondream vision to find an element by description, then click it. Pass the element description in 'text' parameter (e.g. text='the login button'). This is the visual grounding loop from SeeAct.\n- 'click': click by CSS selector (fastest when you know the selector)\n- 'click_xy': click at pixel coordinates (when you have exact coords)"
284892
+ enum: ["navigate", "click", "click_xy", "type", "evaluate", "screenshot", "dom", "dom_summary", "vision_click", "scroll", "scroll_up", "scroll_down", "back", "forward", "close"],
284893
+ description: "Browser action to perform. Key actions:\n- 'screenshot': capture the headless browser render at width/height; returns an image part and a local file path if output_path is provided\n- 'dom_summary': compact view of interactive elements (~1KB vs 200KB raw DOM)\n- 'vision_click': screenshot the page, use Moondream vision to find an element by description, then click it. Pass the element description in 'text' parameter (e.g. text='the login button'). This is the visual grounding loop from SeeAct.\n- 'click': click by CSS selector (fastest when you know the selector)\n- 'click_xy': click at pixel coordinates (when you have exact coords)\n- 'evaluate': run JavaScript in the active Selenium page; pass code in text"
284825
284894
  },
284826
284895
  url: {
284827
284896
  type: "string",
@@ -284833,7 +284902,7 @@ var init_browser_action = __esm({
284833
284902
  },
284834
284903
  text: {
284835
284904
  type: "string",
284836
- description: "Text to type (for 'type' action) OR element description to find and click (for 'vision_click' action, e.g. 'the submit button', 'the search field', 'the country dropdown')"
284905
+ description: "Text to type (for 'type' action), JS code (for 'evaluate'), OR element description to find and click (for 'vision_click' action, e.g. 'the submit button', 'the search field', 'the country dropdown')"
284837
284906
  },
284838
284907
  x: {
284839
284908
  type: "number",
@@ -284890,6 +284959,7 @@ var init_browser_action = __esm({
284890
284959
  await apiCall("/session/close");
284891
284960
  } catch {
284892
284961
  }
284962
+ killBrowserActionServicePort();
284893
284963
  activeSessionId = null;
284894
284964
  activeSessionHeadless = null;
284895
284965
  activeSessionUrl = null;
@@ -285007,6 +285077,32 @@ Runtime: browser_action Selenium/Chrome session. Continue with browser_action fo
285007
285077
  durationMs: Date.now() - start2
285008
285078
  };
285009
285079
  }
285080
+ case "evaluate": {
285081
+ const code8 = typeof args.text === "string" ? args.text : typeof args.value === "string" ? args.value : "";
285082
+ if (!code8.trim())
285083
+ return { success: false, output: "", error: "text is required for evaluate action", durationMs: Date.now() - start2 };
285084
+ result = await apiCall("/evaluate", "POST", { script: code8 });
285085
+ if (result.ok) {
285086
+ const resultType = String(result["result_type"] ?? "unknown");
285087
+ const payload = result["result"];
285088
+ const rendered = payload === void 0 ? "undefined" : typeof payload === "string" ? payload : JSON.stringify(payload, null, 2);
285089
+ const truncated = rendered.length > 2e4 ? `${rendered.slice(0, 2e4)}
285090
+ ... (truncated)` : rendered;
285091
+ return {
285092
+ success: true,
285093
+ output: `Evaluation result (${resultType}):
285094
+ ${truncated}`,
285095
+ durationMs: Date.now() - start2
285096
+ };
285097
+ }
285098
+ const evalMsg = String(result.error ?? result.message ?? "Evaluate failed");
285099
+ return {
285100
+ success: false,
285101
+ output: "",
285102
+ error: `browser_action evaluate failed: ${evaluateFailureMessage2(evalMsg, code8)} ${browserActionRuntimeHint()}`,
285103
+ durationMs: Date.now() - start2
285104
+ };
285105
+ }
285010
285106
  case "screenshot": {
285011
285107
  if (requestedWidth || requestedHeight || requestedScale) {
285012
285108
  const currentW = requestedWidth ?? 1280;
@@ -285190,7 +285286,7 @@ Runtime: browser_action Selenium/Chrome session. Continue with browser_action fo
285190
285286
  result = await apiCall("/history/forward", "POST");
285191
285287
  return { success: !!result.ok, output: "Navigated forward", durationMs: Date.now() - start2 };
285192
285288
  default:
285193
- return { success: false, output: "", error: `Unknown action: ${action}. Available: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close`, durationMs: Date.now() - start2 };
285289
+ return { success: false, output: "", error: `Unknown action: ${action}. Available: navigate, click, click_xy, type, evaluate, screenshot, dom, dom_summary, vision_click, scroll, scroll_up, scroll_down, back, forward, close`, durationMs: Date.now() - start2 };
285194
285290
  }
285195
285291
  } catch (err) {
285196
285292
  return {
@@ -641871,7 +641967,7 @@ function renderTelegramSubAgentError(username, error) {
641871
641967
  process.stdout.write(` ${c3.dim("│")} ${c3.magenta("✘")} @${username}: ${c3.dim(preview)}
641872
641968
  `);
641873
641969
  }
641874
- var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS, TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
641970
+ var TELEGRAM_TOOL_ACTION_GROUPS, TELEGRAM_TOOL_ACTION_GROUP, TELEGRAM_TOOL_MUTATING_GROUPS, DEFAULT_TELEGRAM_TOOL_GROUP_POLICY, TELEGRAM_TOOL_BUTTON_LABELS, TELEGRAM_SAFETY_PROMPT, ADMIN_DM_PROMPT, ADMIN_GROUP_PROMPT, TELEGRAM_PUBLIC_SOUL_PROFILE, TELEGRAM_PUBLIC_ORCHESTRATOR_CONTRACT, TELEGRAM_PUBLIC_MEMORY_SCOPE_CONTRACT, TELEGRAM_PUBLIC_VISION_STACK_CONTRACT, GROUP_REPLY_DISCRETION_PROMPT, TELEGRAM_CHAT_MODE_PROMPT, ADMIN_CHAT_PROFILE_PROMPT, TELEGRAM_ACTION_RESPONSE_CONTRACT, TELEGRAM_EXTERNAL_ACQUISITION_CONTRACT, TELEGRAM_LINK_INTEGRITY_CONTRACT, TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT, TELEGRAM_INTERACTION_DECISION_MINIMAL_SCHEMA, TELEGRAM_INTERACTION_DECISION_REPAIR_SCHEMA, TELEGRAM_CHAT_REPLY_RESPONSE_FORMAT, TELEGRAM_SPACED_URL_RE, TELEGRAM_HTTP_URL_RE, TELEGRAM_STUCK_SELF_TALK_PREFIXES, TELEGRAM_CHAT_HISTORY_LIMIT, TELEGRAM_CONTEXT_RECENT_DEFAULT, TELEGRAM_CONTEXT_LINE_LIMIT, TELEGRAM_CONTEXT_SAMPLE_LIMIT, TELEGRAM_MEMORY_CARD_LIMIT, TELEGRAM_MEMORY_NOTE_LIMIT, TELEGRAM_ASSOCIATIVE_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_USER_FACT_LIMIT, TELEGRAM_ASSOCIATIVE_ACTION_LIMIT, TELEGRAM_ASSOCIATIVE_RELATION_LIMIT, TELEGRAM_MEMORY_STOPWORDS, TELEGRAM_MEMORY_GENERIC_QUERY_TOKENS, TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS, TELEGRAM_SUB_AGENT_DEFAULT_LIMIT, TELEGRAM_SUB_AGENT_MAX_LIMIT, TELEGRAM_SUB_AGENT_BURST_CONTEXT_LIMIT, TELEGRAM_ADMIN_LIVE_PANEL_PAGES, TELEGRAM_ADMIN_LIVE_MUTATION_TOOLS, TELEGRAM_PUBLIC_HELP_COMMANDS2, TELEGRAM_REMINDER_SLASH_COMMANDS, TELEGRAM_REFLECTION_SLASH_COMMANDS, TELEGRAM_PUBLIC_BOT_COMMAND_NAMES, TELEGRAM_IMAGE_EXTENSIONS, MEDIA_CACHE_TTL_MS, TELEGRAM_CHANNEL_DMN_SWEEP_MS, TELEGRAM_CHANNEL_DMN_IDLE_AFTER_MS, TELEGRAM_CHANNEL_DMN_MIN_INTERVAL_MS, TELEGRAM_CHANNEL_DMN_MIN_MESSAGES, TELEGRAM_ALLOWED_UPDATES, TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS, TELEGRAM_ROUTER_AUTO_MIN_PARAMETERS_B, TELEGRAM_PUBLIC_TOOL_QUOTAS, TelegramBridge;
641875
641971
  var init_telegram_bridge = __esm({
641876
641972
  "packages/cli/src/tui/telegram-bridge.ts"() {
641877
641973
  "use strict";
@@ -642334,20 +642430,7 @@ Telegram link integrity contract:
642334
642430
  TELEGRAM_CHANNEL_DMN_MIN_MESSAGES = 4;
642335
642431
  TELEGRAM_ALLOWED_UPDATES = ["message", "guest_message", "callback_query", "poll", "message_reaction", "message_reaction_count"];
642336
642432
  TELEGRAM_DEFAULT_LONG_POLL_TIMEOUT_SECONDS = 50;
642337
- TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES = [
642338
- "qwen2.5:3b",
642339
- "qwen2.5:7b",
642340
- "llama3.2:1b",
642341
- "llama3.2:3b",
642342
- "gemma3:1b",
642343
- "gemma3:4b",
642344
- "phi3:mini",
642345
- "phi4-mini:latest",
642346
- "qwen3:0.6b",
642347
- "qwen3:1.7b",
642348
- "qwen3:4b",
642349
- "qwen3:8b"
642350
- ];
642433
+ TELEGRAM_ROUTER_AUTO_MIN_PARAMETERS_B = 8;
642351
642434
  TELEGRAM_PUBLIC_TOOL_QUOTAS = {
642352
642435
  web: { limit: 20, windowMs: 60 * 6e4 },
642353
642436
  media: { limit: 30, windowMs: 60 * 6e4 },
@@ -647934,11 +648017,11 @@ ${retryText}`,
647934
648017
  }
647935
648018
  telegramRouterAutoModelEnabled() {
647936
648019
  const raw = (process.env["OMNIUS_TG_ROUTER_AUTO_MODEL"] ?? "").trim().toLowerCase();
647937
- return raw !== "0" && raw !== "false" && raw !== "off";
648020
+ return raw === "1" || raw === "true" || raw === "on";
647938
648021
  }
647939
648022
  telegramRouterCandidateModels() {
647940
648023
  const raw = (process.env["OMNIUS_TG_ROUTER_MODEL_CANDIDATES"] ?? "").trim();
647941
- const candidates = raw ? raw.split(/[,\s]+/).map((part) => part.trim()).filter(Boolean) : TELEGRAM_DEFAULT_ROUTER_MODEL_CANDIDATES;
648024
+ const candidates = raw ? raw.split(/[,\s]+/).map((part) => part.trim()).filter(Boolean) : [];
647942
648025
  return Array.from(new Set(candidates));
647943
648026
  }
647944
648027
  telegramRouterAllowThinkHeavyAutoModels() {
@@ -647948,16 +648031,10 @@ ${retryText}`,
647948
648031
  telegramRouterModelLooksThinkHeavy(name10) {
647949
648032
  return /\b(?:qwen3|qwq|deepseek-r1|r1-|reasoning)\b/i.test(name10);
647950
648033
  }
647951
- orderTelegramRouterCandidates(candidates) {
647952
- if (this.telegramRouterAllowThinkHeavyAutoModels()) return candidates;
647953
- const stable = candidates.filter((candidate) => !this.telegramRouterModelLooksThinkHeavy(candidate));
647954
- const thinkHeavy = candidates.filter((candidate) => this.telegramRouterModelLooksThinkHeavy(candidate));
647955
- return [...stable, ...thinkHeavy];
647956
- }
647957
648034
  normalizeOllamaModelNameForMatch(name10) {
647958
648035
  return name10.trim().toLowerCase().replace(/:latest$/, "");
647959
648036
  }
647960
- async fetchOllamaInstalledModelNames(baseUrl) {
648037
+ async fetchOllamaInstalledModels(baseUrl) {
647961
648038
  const url = `${baseUrl.replace(/\/+$/, "")}/api/tags`;
647962
648039
  const timeoutFn = AbortSignal.timeout;
647963
648040
  const res = await fetch(url, {
@@ -647965,7 +648042,43 @@ ${retryText}`,
647965
648042
  });
647966
648043
  if (!res.ok) throw new Error(`ollama /api/tags returned HTTP ${res.status}`);
647967
648044
  const data = await res.json();
647968
- return Array.isArray(data.models) ? data.models.map((model) => typeof model.name === "string" ? model.name : "").filter(Boolean) : [];
648045
+ return Array.isArray(data.models) ? data.models.map((model) => ({
648046
+ name: typeof model.name === "string" ? model.name : "",
648047
+ sizeBytes: typeof model.size === "number" ? model.size : void 0,
648048
+ parameterSize: typeof model.details?.parameter_size === "string" ? model.details.parameter_size : void 0
648049
+ })).filter((model) => Boolean(model.name)) : [];
648050
+ }
648051
+ telegramModelParameterBillions(model) {
648052
+ const haystack = `${model.name} ${model.parameterSize ?? ""}`.toLowerCase();
648053
+ const billion = haystack.match(/(\d+(?:\.\d+)?)\s*(?:b|bn)\b/);
648054
+ if (billion) return Number(billion[1]);
648055
+ const million = haystack.match(/(\d+(?:\.\d+)?)\s*m\b/);
648056
+ if (million) return Number(million[1]) / 1e3;
648057
+ return null;
648058
+ }
648059
+ scoreTelegramInstalledRouterModel(model) {
648060
+ const name10 = model.name.toLowerCase();
648061
+ if (/(?:embed|embedding|nomic|bge|e5-|clip|rerank|moondream|llava|vision|vl\b|minicpm|whisper|tts|sdxl|diffusion)/i.test(name10)) {
648062
+ return Number.NEGATIVE_INFINITY;
648063
+ }
648064
+ const paramsB = this.telegramModelParameterBillions(model);
648065
+ if (paramsB !== null && paramsB < TELEGRAM_ROUTER_AUTO_MIN_PARAMETERS_B) {
648066
+ return Number.NEGATIVE_INFINITY;
648067
+ }
648068
+ if (paramsB === null && (model.sizeBytes ?? 0) < 5e9) {
648069
+ return Number.NEGATIVE_INFINITY;
648070
+ }
648071
+ let score = 0;
648072
+ if (paramsB !== null) score += paramsB * 10;
648073
+ else score += Math.min(80, (model.sizeBytes ?? 0) / 1e9);
648074
+ if (/qwen|huihui|qwq/i.test(name10)) score += 80;
648075
+ else if (/deepseek|nemotron|llama|mistral|mixtral|command-r|devstral/i.test(name10)) score += 50;
648076
+ else if (/gemma/i.test(name10)) score += 20;
648077
+ if (/:latest$/i.test(model.name)) score += 1;
648078
+ if (this.telegramRouterModelLooksThinkHeavy(model.name) && !this.telegramRouterAllowThinkHeavyAutoModels()) {
648079
+ score -= 15;
648080
+ }
648081
+ return score;
647969
648082
  }
647970
648083
  async resolveTelegramRouterBackend(config) {
647971
648084
  const explicit = (process.env["OMNIUS_TG_ROUTER_MODEL"] ?? "").trim();
@@ -647977,17 +648090,20 @@ ${retryText}`,
647977
648090
  detail: "OMNIUS_TG_ROUTER_MODEL"
647978
648091
  };
647979
648092
  }
647980
- if (config.backendType !== "ollama" || !this.telegramRouterAutoModelEnabled()) {
648093
+ if (config.backendType !== "ollama") {
647981
648094
  return {
647982
648095
  backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
647983
648096
  model: config.model,
647984
648097
  source: "main"
647985
648098
  };
647986
648099
  }
647987
- const candidates = this.orderTelegramRouterCandidates(this.telegramRouterCandidateModels());
648100
+ const autoModelEnabled = this.telegramRouterAutoModelEnabled();
648101
+ const candidateFilter = this.telegramRouterCandidateModels();
648102
+ const candidates = new Set(candidateFilter.map((candidate) => this.normalizeOllamaModelNameForMatch(candidate)));
647988
648103
  const cacheKey = `${config.backendUrl}
647989
648104
  ${config.model}
647990
- ${candidates.join(",")}`;
648105
+ auto=${autoModelEnabled ? "1" : "0"}
648106
+ ${candidateFilter.join(",")}`;
647991
648107
  const now = Date.now();
647992
648108
  if (this.telegramRouterModelCache && this.telegramRouterModelCache.cacheKey === cacheKey && now - this.telegramRouterModelCache.atMs < 6e4) {
647993
648109
  const cached = this.telegramRouterModelCache;
@@ -647998,30 +648114,65 @@ ${candidates.join(",")}`;
647998
648114
  detail: cached.detail
647999
648115
  };
648000
648116
  }
648117
+ if (!autoModelEnabled) {
648118
+ const detail2 = "Telegram router auto-model selection is disabled by default; using main model";
648119
+ this.telegramRouterModelCache = {
648120
+ cacheKey,
648121
+ atMs: now,
648122
+ model: config.model,
648123
+ source: "main",
648124
+ detail: detail2
648125
+ };
648126
+ return {
648127
+ backend: new OllamaAgenticBackend(config.backendUrl, config.model, config.apiKey),
648128
+ model: config.model,
648129
+ source: "main",
648130
+ detail: detail2
648131
+ };
648132
+ }
648001
648133
  try {
648002
- const installed = await this.fetchOllamaInstalledModelNames(config.backendUrl);
648134
+ const installed = await this.fetchOllamaInstalledModels(config.backendUrl);
648003
648135
  const installedByNormalized = /* @__PURE__ */ new Map();
648004
- for (const name10 of installed) {
648005
- installedByNormalized.set(this.normalizeOllamaModelNameForMatch(name10), name10);
648136
+ for (const model of installed) {
648137
+ installedByNormalized.set(this.normalizeOllamaModelNameForMatch(model.name), model);
648006
648138
  }
648007
- for (const candidate of candidates) {
648008
- const selected = installedByNormalized.get(this.normalizeOllamaModelNameForMatch(candidate));
648009
- if (!selected) continue;
648139
+ const installedMain = installedByNormalized.get(this.normalizeOllamaModelNameForMatch(config.model));
648140
+ if (installedMain) {
648010
648141
  const resolved = {
648011
648142
  cacheKey,
648012
648143
  atMs: now,
648013
- model: selected,
648014
- source: "auto-small",
648015
- detail: "selected first installed Telegram router candidate from Ollama /api/tags; think-heavy models are tried last unless OMNIUS_TG_ROUTER_ALLOW_THINK_MODELS=1"
648144
+ model: installedMain.name,
648145
+ source: "main",
648146
+ detail: "main Telegram router model is installed in Ollama /api/tags; using main model by policy"
648016
648147
  };
648017
648148
  this.telegramRouterModelCache = resolved;
648018
648149
  return {
648019
- backend: new OllamaAgenticBackend(config.backendUrl, selected, config.apiKey),
648020
- model: selected,
648021
- source: "auto-small",
648150
+ backend: new OllamaAgenticBackend(config.backendUrl, installedMain.name, config.apiKey),
648151
+ model: installedMain.name,
648152
+ source: "main",
648022
648153
  detail: resolved.detail
648023
648154
  };
648024
648155
  }
648156
+ if (autoModelEnabled) {
648157
+ const pool3 = candidateFilter.length > 0 ? installed.filter((model) => candidates.has(this.normalizeOllamaModelNameForMatch(model.name))) : installed;
648158
+ const selected = pool3.map((model) => ({ model, score: this.scoreTelegramInstalledRouterModel(model) })).filter((entry) => Number.isFinite(entry.score)).sort((a2, b) => b.score - a2.score)[0]?.model;
648159
+ if (selected) {
648160
+ const resolved = {
648161
+ cacheKey,
648162
+ atMs: now,
648163
+ model: selected.name,
648164
+ source: "auto-installed",
648165
+ detail: `main Telegram router model ${JSON.stringify(config.model)} was not found in Ollama /api/tags; selected best installed capable router model dynamically (minimum ${TELEGRAM_ROUTER_AUTO_MIN_PARAMETERS_B}B, excludes embeddings/vision/tiny models)`
648166
+ };
648167
+ this.telegramRouterModelCache = resolved;
648168
+ return {
648169
+ backend: new OllamaAgenticBackend(config.backendUrl, selected.name, config.apiKey),
648170
+ model: selected.name,
648171
+ source: "auto-installed",
648172
+ detail: resolved.detail
648173
+ };
648174
+ }
648175
+ }
648025
648176
  } catch (err) {
648026
648177
  const detail2 = `router model auto-detect failed: ${err instanceof Error ? err.message : String(err)}`;
648027
648178
  this.telegramRouterModelCache = {
@@ -648038,7 +648189,7 @@ ${candidates.join(",")}`;
648038
648189
  detail: detail2
648039
648190
  };
648040
648191
  }
648041
- const detail = "no configured small router model was installed; using main model";
648192
+ const detail = `no installed capable Telegram router model met the dynamic minimum (${TELEGRAM_ROUTER_AUTO_MIN_PARAMETERS_B}B); using main model`;
648042
648193
  this.telegramRouterModelCache = {
648043
648194
  cacheKey,
648044
648195
  atMs: now,
@@ -32,7 +32,13 @@ from typing import Dict, Optional
32
32
  # ──────────────────────────────────────────────────────────────
33
33
  # 0) Embedded venv bootstrap (same pattern as other services)
34
34
  # ──────────────────────────────────────────────────────────────
35
- VENV_DIR = Path.cwd() / ".venv"
35
+ SCRIPT_PATH = Path(__file__).resolve()
36
+ SCRIPT_DIR = SCRIPT_PATH.parent
37
+ OMNIUS_HOME = Path(os.environ.get("OMNIUS_HOME") or (Path.home() / ".omnius"))
38
+ VENV_DIR = Path(
39
+ os.environ.get("OMNIUS_BROWSER_ACTION_VENV")
40
+ or (OMNIUS_HOME / "runtimes" / "browser" / ".venv-selenium")
41
+ )
36
42
 
37
43
 
38
44
  def _in_venv() -> bool:
@@ -48,6 +54,7 @@ def _ensure_venv_and_reexec() -> None:
48
54
  return
49
55
  python = sys.executable
50
56
  if not VENV_DIR.exists():
57
+ VENV_DIR.parent.mkdir(parents=True, exist_ok=True)
51
58
  print(f"[bootstrap] creating virtualenv at {VENV_DIR}", file=sys.stderr)
52
59
  subprocess.check_call([python, "-m", "venv", str(VENV_DIR)])
53
60
  pip_bin = VENV_DIR / ("Scripts/pip.exe" if os.name == "nt" else "bin/pip")
@@ -69,10 +76,21 @@ _ensure_venv_and_reexec()
69
76
  # ──────────────────────────────────────────────────────────────
70
77
  import subprocess # noqa: E402 (re-import after re-exec)
71
78
 
72
- SCRIPT_PATH = Path(__file__).resolve()
73
- SCRIPT_DIR = SCRIPT_PATH.parent
74
- SETUP_MARKER = SCRIPT_DIR / ".scrape_setup_complete"
79
+ SETUP_MARKER = VENV_DIR / ".scrape_setup_complete"
75
80
  OUT_DIR = SCRIPT_DIR / "frames"
81
+ SERVICE_VERSION = "2026-06-01-evaluate-v1"
82
+ SERVICE_CAPABILITIES = [
83
+ "navigate",
84
+ "click",
85
+ "click_xy",
86
+ "type",
87
+ "evaluate",
88
+ "screenshot",
89
+ "dom",
90
+ "scroll",
91
+ "history",
92
+ "events",
93
+ ]
76
94
 
77
95
 
78
96
  def _pip_install(*pkgs: str) -> None:
@@ -129,6 +147,7 @@ from selenium.webdriver.common.by import By # noqa: E402
129
147
  from selenium.webdriver.common.keys import Keys # noqa: E402
130
148
  from selenium.webdriver.chrome.options import Options # noqa: E402
131
149
  from selenium.webdriver.chrome.service import Service # noqa: E402
150
+ from selenium.webdriver.remote.webelement import WebElement # noqa: E402
132
151
  from selenium.webdriver.support import expected_conditions as EC # noqa: E402
133
152
  from selenium.webdriver.support.ui import WebDriverWait # noqa: E402
134
153
  from webdriver_manager.chrome import ChromeDriverManager # noqa: E402
@@ -160,6 +179,64 @@ def _truthy(value) -> bool:
160
179
  return str(value).lower() in ("1", "true", "yes", "on")
161
180
 
162
181
 
182
+ def _serialize_script_result(value, depth: int = 0, seen: Optional[set[int]] = None):
183
+ if seen is None:
184
+ seen = set()
185
+ if value is None or isinstance(value, (str, int, float, bool)):
186
+ return value
187
+ if depth > 5:
188
+ return str(value)
189
+ if isinstance(value, WebElement):
190
+ try:
191
+ rect = value.rect or {}
192
+ except Exception:
193
+ rect = {}
194
+ try:
195
+ text = value.text or ""
196
+ except Exception:
197
+ text = ""
198
+ try:
199
+ tag = value.tag_name or ""
200
+ except Exception:
201
+ tag = ""
202
+ def attr(name: str) -> str:
203
+ try:
204
+ return value.get_attribute(name) or ""
205
+ except Exception:
206
+ return ""
207
+ return {
208
+ "__omnius_type": "element",
209
+ "tag": tag,
210
+ "id": attr("id"),
211
+ "name": attr("name"),
212
+ "type": attr("type"),
213
+ "role": attr("role"),
214
+ "ariaLabel": attr("aria-label"),
215
+ "text": text[:240],
216
+ "rect": {
217
+ "x": rect.get("x", 0),
218
+ "y": rect.get("y", 0),
219
+ "width": rect.get("width", 0),
220
+ "height": rect.get("height", 0),
221
+ },
222
+ }
223
+ if isinstance(value, (list, tuple, set)):
224
+ return [_serialize_script_result(item, depth + 1, seen) for item in list(value)[:200]]
225
+ if isinstance(value, dict):
226
+ ident = id(value)
227
+ if ident in seen:
228
+ return "[Circular]"
229
+ seen.add(ident)
230
+ out = {}
231
+ for idx, (key, item) in enumerate(value.items()):
232
+ if idx >= 200:
233
+ out["__omnius_truncated"] = True
234
+ break
235
+ out[str(key)] = _serialize_script_result(item, depth + 1, seen)
236
+ return out
237
+ return str(value)
238
+
239
+
163
240
  class Tools:
164
241
  _driver: Optional[webdriver.Chrome] = None
165
242
 
@@ -421,6 +498,21 @@ class Tools:
421
498
  log_message(f"[dom] snapshot failed: {exc}", "WARNING")
422
499
  return ""
423
500
 
501
+ @staticmethod
502
+ def evaluate(script: str):
503
+ if not Tools._driver:
504
+ return {"ok": False, "error": "browser not open"}
505
+ try:
506
+ result = Tools._driver.execute_script(script)
507
+ return {
508
+ "ok": True,
509
+ "result": _serialize_script_result(result),
510
+ "result_type": "undefined" if result is None else type(result).__name__,
511
+ }
512
+ except Exception as exc:
513
+ log_message(f"[evaluate] script failed: {exc}", "ERROR")
514
+ return {"ok": False, "error": str(exc)}
515
+
424
516
  @staticmethod
425
517
  def scroll(amount: int = 600) -> str:
426
518
  if not Tools._driver:
@@ -921,7 +1013,15 @@ def _error(message: str, status: int = 400):
921
1013
  # ──────────────────────────────────────────────────────────────
922
1014
  @app.get("/health")
923
1015
  def health():
924
- return jsonify({"status": "ok", "browser_open": Tools.is_browser_open(), "sessions": len(_SESSIONS)})
1016
+ return jsonify({
1017
+ "status": "ok",
1018
+ "service": "browser_action",
1019
+ "version": SERVICE_VERSION,
1020
+ "capabilities": SERVICE_CAPABILITIES,
1021
+ "browser_open": Tools.is_browser_open(),
1022
+ "sessions": len(_SESSIONS),
1023
+ "venv": str(VENV_DIR),
1024
+ })
925
1025
 
926
1026
 
927
1027
  @app.post("/session/start")
@@ -1041,6 +1141,23 @@ def type_text():
1041
1141
  return _ok(message=msg)
1042
1142
 
1043
1143
 
1144
+ @app.post("/evaluate")
1145
+ def evaluate_script():
1146
+ if not _auth_ok(request):
1147
+ return _error("unauthorized", 401)
1148
+ data = request.get_json(silent=True) or {}
1149
+ script = data.get("script") or data.get("text") or data.get("code") or ""
1150
+ if not str(script).strip():
1151
+ return _error("missing script", 400)
1152
+ with _slot():
1153
+ result = Tools.evaluate(str(script))
1154
+ if not isinstance(result, dict) or not result.get("ok"):
1155
+ return _error(result.get("error") if isinstance(result, dict) else "evaluate failed", 500)
1156
+ sid = data.get("sid") or next(iter(_SESSIONS), "")
1157
+ _queue_event(sid, {"type": "status", "msg": "evaluate", "ts": int(time.time() * 1000)})
1158
+ return _ok(result=result.get("result"), result_type=result.get("result_type"))
1159
+
1160
+
1044
1161
  @app.post("/scroll")
1045
1162
  def scroll():
1046
1163
  if not _auth_ok(request):
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.208",
3
+ "version": "1.0.210",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.208",
9
+ "version": "1.0.210",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.208",
3
+ "version": "1.0.210",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",