@jarvis-agent/core 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ export default class ActivateSkillTool implements DialogueTool, Tool {
14
14
  get description(): string;
15
15
  /** Dynamic parameters with enum constraint */
16
16
  get parameters(): JSONSchema7;
17
- execute(args: Record<string, unknown>): Promise<ToolResult>;
17
+ execute(args: Record<string, unknown>, ..._rest: unknown[]): Promise<ToolResult>;
18
18
  }
19
19
  export { ActivateSkillTool };
20
20
  //# sourceMappingURL=activate-skill-tool.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"activate-skill-tool.d.ts","sourceRoot":"","sources":["../../../src/chat/tools/activate-skill-tool.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAE7D,eAAO,MAAM,SAAS,mBAAmB,CAAC;AAE1C,yDAAyD;AACzD,MAAM,CAAC,OAAO,OAAO,iBAAkB,YAAW,YAAY,EAAE,IAAI;IAClE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAa;IAClC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAQ;IAEhC,oDAAoD;IACpD,IAAI,WAAW,IAAI,MAAM,CAQxB;IAED,8CAA8C;IAC9C,IAAI,UAAU,IAAI,WAAW,CAgB5B;IAEK,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC;CAkClE;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC"}
1
+ {"version":3,"file":"activate-skill-tool.d.ts","sourceRoot":"","sources":["../../../src/chat/tools/activate-skill-tool.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAE7D,eAAO,MAAM,SAAS,mBAAmB,CAAC;AAE1C,yDAAyD;AACzD,MAAM,CAAC,OAAO,OAAO,iBAAkB,YAAW,YAAY,EAAE,IAAI;IAClE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAa;IAClC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAQ;IAEhC,oDAAoD;IACpD,IAAI,WAAW,IAAI,MAAM,CAQxB;IAED,8CAA8C;IAC9C,IAAI,UAAU,IAAI,WAAW,CAgB5B;IAEK,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,GAAG,KAAK,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,UAAU,CAAC;CAkCvF;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC"}
package/dist/index.cjs CHANGED
@@ -39704,6 +39704,24 @@ Output:
39704
39704
  "changed": true,
39705
39705
  "changeInfo": "New message received in the group chat. The message content is: 'Hello, how are you?'"
39706
39706
  }`;
39707
+ const watch_text_system_prompt = `You are a page content analyzer. Given a page content and a condition description, determine if the condition is currently met on the page.
39708
+ Return ONLY a JSON object, no other text.
39709
+ - "changed": true means the condition IS met (e.g. the target button exists, the status has changed to the expected value)
39710
+ - "changed": false means the condition is NOT yet met
39711
+
39712
+ ## Example
39713
+ Condition: Monitor for a "Retry" button appearing on the page
39714
+ ### Condition not met
39715
+ Output:
39716
+ {
39717
+ "changed": false
39718
+ }
39719
+ ### Condition met
39720
+ Output:
39721
+ {
39722
+ "changed": true,
39723
+ "changeInfo": "The 'Retry' button is present on the page at index 127-128"
39724
+ }`;
39707
39725
  class WatchTriggerTool {
39708
39726
  constructor() {
39709
39727
  this.name = TOOL_NAME$a;
@@ -39768,12 +39786,34 @@ class WatchTriggerTool {
39768
39786
  ],
39769
39787
  };
39770
39788
  }
39789
+ const rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms, agentContext.context.config.globalConfig?.streamFirstTimeout, agentContext.context.config.globalConfig?.streamTokenTimeout, agentContext);
39790
+ const useVision = this.isVisionModel(rlm);
39791
+ // Initial condition check (text-based, works with all models)
39792
+ const pageContent = await this.get_page_content(agentContext);
39793
+ const initialCheck = await this.is_condition_met(rlm, pageContent, task_description, agentContext);
39794
+ if (initialCheck.changed) {
39795
+ return {
39796
+ content: [
39797
+ {
39798
+ type: "text",
39799
+ text: initialCheck.changeInfo || "Condition already met on page.",
39800
+ },
39801
+ ],
39802
+ };
39803
+ }
39804
+ // Enter monitoring loop
39771
39805
  await this.init_eko_observer(agentContext);
39772
- const image1 = await this.get_screenshot(agentContext);
39773
39806
  const start = new Date().getTime();
39774
39807
  const timeout = (args.timeout || 5) * 60000;
39775
39808
  const frequency = Math.max(500, (args.frequency || 1) * 1000);
39776
- const rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms, agentContext.context.config.globalConfig?.streamFirstTimeout, agentContext.context.config.globalConfig?.streamTokenTimeout, agentContext);
39809
+ let image1;
39810
+ let content1;
39811
+ if (useVision) {
39812
+ image1 = await this.get_screenshot(agentContext);
39813
+ }
39814
+ else {
39815
+ content1 = pageContent;
39816
+ }
39777
39817
  while (new Date().getTime() - start < timeout) {
39778
39818
  await agentContext.context.checkAborted();
39779
39819
  await new Promise((resolve) => setTimeout(resolve, frequency));
@@ -39782,17 +39822,32 @@ class WatchTriggerTool {
39782
39822
  continue;
39783
39823
  }
39784
39824
  await this.init_eko_observer(agentContext);
39785
- const image2 = await this.get_screenshot(agentContext);
39786
- const changeResult = await this.is_dom_change(agentContext, rlm, image1, image2, task_description);
39787
- if (changeResult.changed) {
39788
- return {
39789
- content: [
39790
- {
39791
- type: "text",
39792
- text: changeResult.changeInfo || "DOM change detected.",
39793
- },
39794
- ],
39795
- };
39825
+ if (useVision) {
39826
+ // Vision model: compare screenshots
39827
+ const image2 = await this.get_screenshot(agentContext);
39828
+ const changeResult = await this.is_dom_change(agentContext, rlm, image1, image2, task_description);
39829
+ if (changeResult.changed) {
39830
+ return {
39831
+ content: [
39832
+ { type: "text", text: changeResult.changeInfo || "DOM change detected." },
39833
+ ],
39834
+ };
39835
+ }
39836
+ }
39837
+ else {
39838
+ // Text model: compare page content
39839
+ const content2 = await this.get_page_content(agentContext);
39840
+ if (content2 === content1)
39841
+ continue;
39842
+ const changeResult = await this.is_condition_met(rlm, content2, task_description, agentContext);
39843
+ content1 = content2;
39844
+ if (changeResult.changed) {
39845
+ return {
39846
+ content: [
39847
+ { type: "text", text: changeResult.changeInfo || "Condition met." },
39848
+ ],
39849
+ };
39850
+ }
39796
39851
  }
39797
39852
  }
39798
39853
  return {
@@ -39804,6 +39859,65 @@ class WatchTriggerTool {
39804
39859
  ],
39805
39860
  };
39806
39861
  }
39862
+ /** Check if the primary LLM supports vision */
39863
+ isVisionModel(rlm) {
39864
+ const names = rlm.Names;
39865
+ const llms = rlm.Llms;
39866
+ if (!names || names.length === 0)
39867
+ return false;
39868
+ const config = llms[names[0]];
39869
+ if (!config)
39870
+ return false;
39871
+ const provider = String(config.provider || "").toLowerCase();
39872
+ const model = String(config.model || "").toLowerCase();
39873
+ if (provider === "deepseek" || model.includes("deepseek"))
39874
+ return false;
39875
+ if (provider === "anthropic")
39876
+ return true;
39877
+ if (provider === "google")
39878
+ return true;
39879
+ if (model.includes("gpt-4o") || model.includes("gpt-4-vision") || model.includes("gpt-4-turbo"))
39880
+ return true;
39881
+ if (model.includes("claude") || model.includes("gemini"))
39882
+ return true;
39883
+ return false;
39884
+ }
39885
+ /** Get page text content via extract_page_content */
39886
+ async get_page_content(agentContext) {
39887
+ const extract = agentContext.agent["extract_page_content"];
39888
+ if (!extract)
39889
+ return "";
39890
+ const result = await extract.call(agentContext.agent, agentContext);
39891
+ return result?.page_content || "";
39892
+ }
39893
+ /** Check if condition is met using text-based LLM analysis */
39894
+ async is_condition_met(rlm, pageContent, task_description, agentContext) {
39895
+ try {
39896
+ const request = {
39897
+ messages: [
39898
+ { role: "system", content: watch_text_system_prompt },
39899
+ {
39900
+ role: "user",
39901
+ content: [
39902
+ {
39903
+ type: "text",
39904
+ text: `Condition: ${task_description}\n\nPage content:\n${pageContent.slice(0, 30000)}`,
39905
+ },
39906
+ ],
39907
+ },
39908
+ ],
39909
+ abortSignal: agentContext.context.controller.signal,
39910
+ };
39911
+ const result = await rlm.call(request);
39912
+ let resultText = result.text || "{}";
39913
+ resultText = resultText.substring(resultText.indexOf("{"), resultText.lastIndexOf("}") + 1);
39914
+ return JSON.parse(resultText);
39915
+ }
39916
+ catch (error) {
39917
+ Log.error("Error in is_condition_met:", error);
39918
+ }
39919
+ return { changed: false };
39920
+ }
39807
39921
  async get_screenshot(agentContext) {
39808
39922
  const screenshot = agentContext.agent["screenshot"];
39809
39923
  const imageResult = (await screenshot.call(agentContext.agent, agentContext));
@@ -40213,7 +40327,7 @@ class ActivateSkillTool {
40213
40327
  required: ["name"],
40214
40328
  };
40215
40329
  }
40216
- async execute(args) {
40330
+ async execute(args, ..._rest) {
40217
40331
  const name = args.name;
40218
40332
  if (!global.skillService) {
40219
40333
  return {
@@ -40510,11 +40624,11 @@ monitor changes in webpage DOM elements, when executing to the watch node, requi
40510
40624
  <if ${TOOL_NAME$6}Tool>
40511
40625
  * SKILLS
40512
40626
  You can use the \`${TOOL_NAME$6}\` tool to load domain-specific skill instructions when they would help complete the current task.
40627
+ </if>
40513
40628
  <if skills>
40514
40629
  Available skills:
40515
40630
  {{skills}}
40516
40631
  </if>
40517
- </if>
40518
40632
 
40519
40633
  <if mainTask>
40520
40634
  Main task: {{mainTask}}