@jay-framework/gemini-agent-plugin 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,43 +1,115 @@
1
1
  import { createJayService, makeJayInit, makeJayStackComponent } from "@jay-framework/fullstack-component";
2
- import { createSignal, createMemo } from "@jay-framework/component";
2
+ import { createSignal, createMemo, createDerivedArray } from "@jay-framework/component";
3
3
  import { createActionCaller } from "@jay-framework/stack-client-runtime";
4
4
  const GEMINI_SERVICE = createJayService("GeminiService");
5
5
  const init = makeJayInit();
6
- const callSendMessage = createActionCaller("geminiAgent.sendMessage", "POST");
7
- const callSubmitToolResults = createActionCaller("geminiAgent.submitToolResults", "POST");
8
- function buildSerializedTools(automation) {
6
+ var Role = /* @__PURE__ */ ((Role2) => {
7
+ Role2[Role2["user"] = 0] = "user";
8
+ Role2[Role2["assistant"] = 1] = "assistant";
9
+ return Role2;
10
+ })(Role || {});
11
+ const MAX_ARRAY_ITEMS = 3;
12
+ const MAX_STRING_LENGTH = 200;
13
+ function compactPageState(value) {
14
+ if (value === null || value === void 0)
15
+ return value;
16
+ if (typeof value === "string") {
17
+ if (value.length > MAX_STRING_LENGTH) {
18
+ return value.slice(0, MAX_STRING_LENGTH) + "...";
19
+ }
20
+ return value;
21
+ }
22
+ if (Array.isArray(value)) {
23
+ if (value.length > MAX_ARRAY_ITEMS) {
24
+ const truncated = value.slice(0, MAX_ARRAY_ITEMS).map(compactPageState);
25
+ truncated.push(`... (${value.length} total)`);
26
+ return truncated;
27
+ }
28
+ return value.map(compactPageState);
29
+ }
30
+ if (typeof value === "object") {
31
+ const result = {};
32
+ for (const [k, v] of Object.entries(value)) {
33
+ result[k] = compactPageState(v);
34
+ }
35
+ return result;
36
+ }
37
+ return value;
38
+ }
39
+ const sendMessage = createActionCaller("geminiAgent.sendMessage", "POST");
40
+ const submitToolResults = createActionCaller("geminiAgent.submitToolResults", "POST");
41
+ const getToolDescriptions = createActionCaller("geminiAgent.getToolDescriptions", "GET");
42
+ const FILLABLE_TYPES = /* @__PURE__ */ new Set(["HTMLInputElement", "HTMLTextAreaElement", "HTMLSelectElement"]);
43
+ function isCheckable(element) {
44
+ return element instanceof HTMLInputElement && (element.type === "checkbox" || element.type === "radio");
45
+ }
46
+ function getSelectOptions(element) {
47
+ if (!(element instanceof HTMLSelectElement))
48
+ return void 0;
49
+ return Array.from(element.options).map((opt) => opt.value);
50
+ }
51
+ function setElementValue(element, value) {
52
+ if (isCheckable(element)) {
53
+ element.checked = value === "true";
54
+ } else {
55
+ element.value = value;
56
+ }
57
+ }
58
+ function getValueEventTypes(registeredEvents) {
59
+ const result = [];
60
+ if (registeredEvents.includes("input"))
61
+ result.push("input");
62
+ if (registeredEvents.includes("change"))
63
+ result.push("change");
64
+ if (result.length > 0)
65
+ return result;
66
+ return registeredEvents.length > 0 ? [registeredEvents[0]] : ["input"];
67
+ }
68
+ const CHAT_WIDGET_REFS = /* @__PURE__ */ new Set(["messageInput", "sendMessage", "toggleExpand"]);
69
+ function buildSerializedTools(automation, descriptionMap) {
9
70
  const { interactions } = automation.getPageState();
10
71
  const tools = [];
11
72
  for (const group of interactions) {
73
+ if (CHAT_WIDGET_REFS.has(group.refName))
74
+ continue;
12
75
  const sample = group.items[0];
13
76
  if (!sample)
14
77
  continue;
15
78
  const elementType = sample.element.constructor.name;
16
- const isFillable = [
17
- "HTMLInputElement",
18
- "HTMLTextAreaElement",
19
- "HTMLSelectElement"
20
- ].includes(elementType);
21
- const isCheckbox = elementType === "HTMLInputElement" && ["checkbox", "radio"].includes(sample.element.type);
79
+ const isFillable = FILLABLE_TYPES.has(elementType);
80
+ const isSelect = elementType === "HTMLSelectElement";
81
+ const checkable = isCheckable(sample.element);
22
82
  const isForEach = group.items.length > 1 || sample.coordinate.length > 1;
23
- const prefix = isCheckbox ? "toggle" : isFillable ? "fill" : "click";
83
+ const prefix = checkable ? "toggle" : isFillable ? "fill" : "click";
24
84
  const toolName = `${prefix}-${toKebab(group.refName)}`;
25
85
  const humanName = toHumanReadable(group.refName);
26
- const description = group.description || `${isCheckbox ? "Toggle" : isFillable ? "Fill" : "Click"} ${humanName}${isForEach ? " for a specific item" : ""}`;
86
+ const description = descriptionMap.get(group.refName) || group.description || `${checkable ? "Toggle" : isFillable ? "Fill" : "Click"} ${humanName}${isForEach ? " for a specific item" : ""}`;
27
87
  const properties = {};
28
88
  const required = [];
29
89
  if (isForEach) {
90
+ const coordStrings = group.items.map((i) => i.coordinate.join("/"));
30
91
  properties.coordinate = {
31
92
  type: "string",
32
- description: `Item coordinate (e.g. "${sample.coordinate.join("/")}")`
93
+ description: "Item coordinate",
94
+ enum: coordStrings
33
95
  };
34
96
  required.push("coordinate");
35
97
  }
36
- if (isFillable && !isCheckbox) {
37
- properties.value = {
38
- type: "string",
39
- description: `Value to set in ${humanName}`
40
- };
98
+ if (isFillable) {
99
+ if (checkable) {
100
+ properties.value = {
101
+ type: "string",
102
+ description: "Checked state",
103
+ enum: ["true", "false"]
104
+ };
105
+ } else {
106
+ const selectOptions = isSelect ? getSelectOptions(sample.element) : void 0;
107
+ properties.value = {
108
+ type: "string",
109
+ description: isSelect ? "Value to select" : `Value to set in ${humanName}`,
110
+ ...selectOptions ? { enum: selectOptions } : {}
111
+ };
112
+ }
41
113
  required.push("value");
42
114
  }
43
115
  tools.push({
@@ -77,7 +149,6 @@ function executePageAutomationTool(automation, call) {
77
149
  }
78
150
  let item = group.items[0];
79
151
  if (args.coordinate && typeof args.coordinate === "string") {
80
- const coord = args.coordinate.split("/");
81
152
  item = group.items.find((i) => i.coordinate.join("/") === args.coordinate) || group.items[0];
82
153
  }
83
154
  if (!item) {
@@ -87,23 +158,21 @@ function executePageAutomationTool(automation, call) {
87
158
  isError: true
88
159
  };
89
160
  }
90
- if (prefix === "fill" && args.value != null) {
91
- const el = item.element;
92
- const nativeInputValueSetter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, "value")?.set;
93
- if (nativeInputValueSetter) {
94
- nativeInputValueSetter.call(el, String(args.value));
95
- } else {
96
- el.value = String(args.value);
161
+ if (prefix === "fill" || prefix === "toggle") {
162
+ setElementValue(item.element, String(args.value));
163
+ for (const evt of getValueEventTypes(item.events)) {
164
+ item.element.dispatchEvent(new Event(evt, { bubbles: true }));
97
165
  }
98
- el.dispatchEvent(new Event("input", { bubbles: true }));
99
- el.dispatchEvent(new Event("change", { bubbles: true }));
100
166
  } else {
101
167
  item.element.click();
102
168
  }
103
169
  const newState = automation.getPageState();
104
170
  return {
105
171
  callId: call.id,
106
- result: JSON.stringify({ success: true, pageState: newState.viewState })
172
+ result: JSON.stringify({
173
+ success: true,
174
+ pageState: compactPageState(newState.viewState)
175
+ })
107
176
  };
108
177
  } catch (error) {
109
178
  return {
@@ -121,11 +190,12 @@ function GeminiChatInteractive(_props, refs, fastViewState, _carryForward) {
121
190
  const [getIsExpanded, setIsExpanded] = createSignal(false);
122
191
  const [getError, setError] = createSignal(null);
123
192
  const hasMessages = createMemo(() => getMessages().length > 0);
193
+ const displayMessages = createDerivedArray(getMessages, (item) => item());
124
194
  const hasError = createMemo(() => getError() !== null);
125
195
  const lastUserMessage = createMemo(() => {
126
196
  const msgs = getMessages();
127
197
  for (let i = msgs.length - 1; i >= 0; i--) {
128
- if (msgs[i].role === "user")
198
+ if (msgs[i].role === Role.user)
129
199
  return msgs[i].content;
130
200
  }
131
201
  return "";
@@ -133,39 +203,49 @@ function GeminiChatInteractive(_props, refs, fastViewState, _carryForward) {
133
203
  const lastAssistantMessage = createMemo(() => {
134
204
  const msgs = getMessages();
135
205
  for (let i = msgs.length - 1; i >= 0; i--) {
136
- if (msgs[i].role === "assistant")
206
+ if (msgs[i].role === Role.assistant)
137
207
  return msgs[i].content;
138
208
  }
139
209
  return "";
140
210
  });
141
- const getAutomation = () => window.__jay?.automation || null;
211
+ let automation = window.__jay?.automation || null;
212
+ if (!automation) {
213
+ window.addEventListener("jay:automation-ready", () => {
214
+ automation = window.__jay?.automation || null;
215
+ }, { once: true });
216
+ }
217
+ const getAutomation = () => automation;
218
+ let toolDescriptionMap = /* @__PURE__ */ new Map();
219
+ getToolDescriptions(void 0).then((descriptions) => {
220
+ toolDescriptionMap = new Map(descriptions.map((d) => [d.refName, d.description]));
221
+ }).catch(() => {
222
+ });
142
223
  function getToolsAndState() {
143
- const automation = getAutomation();
144
- if (!automation) {
224
+ const automation2 = getAutomation();
225
+ if (!automation2) {
145
226
  return { toolDefinitions: [], pageState: {} };
146
227
  }
147
228
  return {
148
- toolDefinitions: buildSerializedTools(automation),
149
- pageState: automation.getPageState().viewState
229
+ toolDefinitions: buildSerializedTools(automation2, toolDescriptionMap),
230
+ pageState: automation2.getPageState().viewState
150
231
  };
151
232
  }
152
233
  async function handleToolCalls(output) {
153
234
  while (output.type === "tool-calls") {
154
235
  setHistory(output.history);
155
- const automation = getAutomation();
156
- if (!automation) {
236
+ const automation2 = getAutomation();
237
+ if (!automation2) {
157
238
  setError("AutomationAPI not available");
158
239
  return;
159
240
  }
160
- const results = output.calls.filter((c) => c.category === "page-automation").map((call) => executePageAutomationTool(automation, call));
241
+ const results = output.calls.filter((c) => c.category === "page-automation").map((call) => executePageAutomationTool(automation2, call));
161
242
  const { toolDefinitions, pageState } = getToolsAndState();
162
- const nextOutput = await callSubmitToolResults({
243
+ output = await submitToolResults({
163
244
  results,
164
245
  history: output.history,
165
246
  toolDefinitions,
166
247
  pageState
167
248
  });
168
- output = nextOutput;
169
249
  }
170
250
  const finalOutput = output;
171
251
  if (finalOutput.type === "response") {
@@ -174,23 +254,23 @@ function GeminiChatInteractive(_props, refs, fastViewState, _carryForward) {
174
254
  ...msgs,
175
255
  {
176
256
  index: msgs.length,
177
- role: "assistant",
257
+ role: Role.assistant,
178
258
  content: finalOutput.message
179
259
  }
180
260
  ]);
181
261
  }
182
262
  }
183
- async function sendMessage() {
263
+ async function sendMessage$1() {
184
264
  const message = getInputValue().trim();
185
265
  if (!message || getIsLoading())
186
266
  return;
187
267
  setError(null);
188
268
  setIsLoading(true);
189
269
  setInputValue("");
190
- setMessages((msgs) => [...msgs, { index: msgs.length, role: "user", content: message }]);
270
+ setMessages((msgs) => [...msgs, { index: msgs.length, role: Role.user, content: message }]);
191
271
  try {
192
272
  const { toolDefinitions, pageState } = getToolsAndState();
193
- const output = await callSendMessage({
273
+ const output = await sendMessage({
194
274
  message,
195
275
  history: getHistory(),
196
276
  toolDefinitions,
@@ -203,7 +283,7 @@ function GeminiChatInteractive(_props, refs, fastViewState, _carryForward) {
203
283
  setIsLoading(false);
204
284
  }
205
285
  }
206
- refs.sendMessage.onclick(sendMessage);
286
+ refs.sendMessage.onclick(sendMessage$1);
207
287
  refs.toggleExpand.onclick(() => {
208
288
  setIsExpanded((v) => !v);
209
289
  });
@@ -213,16 +293,12 @@ function GeminiChatInteractive(_props, refs, fastViewState, _carryForward) {
213
293
  refs.messageInput.onkeydown((jayEvent) => {
214
294
  if (jayEvent.event.key === "Enter" && !jayEvent.event.shiftKey) {
215
295
  jayEvent.event.preventDefault();
216
- sendMessage();
296
+ sendMessage$1();
217
297
  }
218
298
  });
219
299
  return {
220
300
  render: () => ({
221
- messages: () => getMessages().map((m) => ({
222
- index: m.index,
223
- role: m.role,
224
- content: m.content
225
- })),
301
+ messages: displayMessages,
226
302
  lastUserMessage,
227
303
  lastAssistantMessage,
228
304
  messageInput: getInputValue,
package/dist/index.d.ts CHANGED
@@ -115,6 +115,19 @@ declare const init: _jay_framework_fullstack_component.JayInitBuilderWithServer<
115
115
 
116
116
  declare function setupGeminiAgent(ctx: PluginSetupContext): Promise<PluginSetupResult>;
117
117
 
118
+ /**
119
+ * Tool Description Loader — reads .jay-contract files and extracts
120
+ * descriptions for interactive tags.
121
+ *
122
+ * Descriptions are loaded from contract YAML at runtime (not embedded
123
+ * in the HTML/JS bundle) so the cost is only paid when the AI agent
124
+ * is present.
125
+ */
126
+ interface ToolDescription {
127
+ refName: string;
128
+ description: string;
129
+ }
130
+
118
131
  /**
119
132
  * Main entry point for chat messages.
120
133
  *
@@ -129,6 +142,14 @@ declare const sendMessage: _jay_framework_fullstack_component.JayAction<SendMess
129
142
  * Returns either a text response or more pending tool calls.
130
143
  */
131
144
  declare const submitToolResults: _jay_framework_fullstack_component.JayAction<SubmitToolResultsInput, SendMessageOutput> & _jay_framework_fullstack_component.JayActionDefinition<SubmitToolResultsInput, SendMessageOutput, [GeminiService]>;
145
+ /**
146
+ * Returns tool descriptions extracted from .jay-contract files.
147
+ *
148
+ * Internal use only — called once by the client component at init.
149
+ * Not exposed to the LLM (filtered by geminiAgent.* prefix).
150
+ * Results are cached server-side after the first call.
151
+ */
152
+ declare const getToolDescriptions: _jay_framework_fullstack_component.JayAction<unknown, ToolDescription[]> & _jay_framework_fullstack_component.JayActionDefinition<unknown, ToolDescription[], []>;
132
153
 
133
154
  /**
134
155
  * Configuration for the Gemini agent plugin.
@@ -192,21 +213,52 @@ declare const geminiChat: _jay_framework_fullstack_component.JayStackComponentDe
192
213
  * Tool Bridge — converts jay-stack tool descriptors and action metadata
193
214
  * to Gemini FunctionDeclarations.
194
215
  *
195
- * Only actions with .jay-action metadata are exposed to the AI agent.
196
- * Page automation tools (from AutomationAPI) are always included.
216
+ * Slim declarations (name + description, empty params) are sent so
217
+ * Gemini knows the exact tool names. When the LLM calls a tool that
218
+ * hasn't been discovered via get_tool_details, it gets an error
219
+ * response telling it to discover first. After discovery, the full
220
+ * declaration replaces the slim one for subsequent calls.
197
221
  */
198
222
 
199
223
  /**
200
- * Converts page automation tools and server actions into Gemini function declarations.
201
- *
202
- * - Client tools are included directly (already have schema).
203
- * - Server actions are only included if they have .jay-action metadata.
204
- * Actions without metadata are silently skipped (opt-in mechanism).
224
+ * The `get_page_state` meta-tool declaration.
225
+ * Returns the full untruncated page state on demand. The system prompt
226
+ * includes a compact (truncated) version; this tool provides the full data
227
+ * when the LLM needs it (e.g., listing all products).
228
+ */
229
+ declare const PAGE_STATE_TOOL: GeminiFunctionDeclaration;
230
+ /**
231
+ * The `get_tool_details` meta-tool declaration.
232
+ * Added to every Gemini call so the LLM can discover full parameter
233
+ * schemas on demand instead of receiving them all upfront.
234
+ */
235
+ declare const DISCOVERY_TOOL: GeminiFunctionDeclaration;
236
+ /**
237
+ * Converts page automation tools and server actions into full Gemini
238
+ * function declarations (with complete parameter schemas).
205
239
  */
206
240
  declare function toGeminiTools(clientTools: SerializedToolDef[], serverActions: Array<{
207
241
  actionName: string;
208
242
  metadata: ActionMetadata;
209
243
  }>): GeminiFunctionDeclaration[];
244
+ /**
245
+ * Converts page automation tools and server actions into slim Gemini
246
+ * function declarations — name + description only, empty parameters.
247
+ * Ensures Gemini uses correct tool names. The LLM must call
248
+ * get_tool_details before using any tool.
249
+ */
250
+ declare function toSlimGeminiTools(clientTools: SerializedToolDef[], serverActions: Array<{
251
+ actionName: string;
252
+ metadata: ActionMetadata;
253
+ }>): GeminiFunctionDeclaration[];
254
+ /**
255
+ * Builds a compact text summary of all available tools for the system prompt.
256
+ * Lists tool names and descriptions, with param names where applicable.
257
+ */
258
+ declare function buildToolSummary(clientTools: SerializedToolDef[], serverActions: Array<{
259
+ actionName: string;
260
+ metadata: ActionMetadata;
261
+ }>): string;
210
262
  /**
211
263
  * Maps a Gemini function call name back to its original action name.
212
264
  * Reverses the `action_` prefix and `_` → `.` replacement.
@@ -222,23 +274,24 @@ declare function resolveToolCallTarget(toolName: string, clientToolNames: Set<st
222
274
  /**
223
275
  * System Prompt Builder — constructs the system prompt for Gemini.
224
276
  *
225
- * Page state and available server actions are included as context
226
- * (not tools), so the LLM always knows the current state without
227
- * wasting tool calls.
277
+ * Page state is compacted (no pretty-printing, truncated arrays/strings)
278
+ * to reduce token usage. Tool summaries replace per-tool schema details.
228
279
  */
229
- interface ServerActionSummary {
230
- name: string;
231
- description?: string;
232
- }
280
+ /**
281
+ * Recursively compacts a page state object to reduce token usage:
282
+ * - Arrays longer than 3 items are truncated with a count suffix
283
+ * - Strings longer than 200 chars are truncated with ellipsis
284
+ */
285
+ declare function compactPageState(value: unknown): unknown;
233
286
  /**
234
287
  * Builds the system prompt for a Gemini conversation turn.
235
288
  *
236
289
  * The prompt includes:
237
290
  * 1. Custom prefix (from config) or default greeting
238
- * 2. Current page state as JSON context
239
- * 3. List of available server actions with descriptions
240
- * 4. Instructions for tool use
291
+ * 2. Current page state as compact JSON context
292
+ * 3. Tool summary list
293
+ * 4. Instructions for tool use and discovery
241
294
  */
242
- declare function buildSystemPrompt(pageState: object, serverActions: ServerActionSummary[], customPrefix?: string): string;
295
+ declare function buildSystemPrompt(pageState: object, toolSummary: string, customPrefix?: string): string;
243
296
 
244
- export { GEMINI_SERVICE, type GeminiAgentConfig, type GeminiFunctionCallPart, type GeminiFunctionDeclaration, type GeminiFunctionResponsePart, type GeminiMessage, type GeminiPart, GeminiService, type GeminiServiceConfig, type GeminiTextPart, type PendingToolCall, type SendMessageInput, type SendMessageOutput, type SerializedToolDef, type ServerActionSummary, type SubmitToolResultsInput, type SubmitToolResultsOutput, type ToolCallResult, buildSystemPrompt, geminiChat, init, resolveToolCallTarget, sendMessage, setupGeminiAgent, submitToolResults, toGeminiTools };
297
+ export { DISCOVERY_TOOL, GEMINI_SERVICE, type GeminiAgentConfig, type GeminiFunctionCallPart, type GeminiFunctionDeclaration, type GeminiFunctionResponsePart, type GeminiMessage, type GeminiPart, GeminiService, type GeminiServiceConfig, type GeminiTextPart, PAGE_STATE_TOOL, type PendingToolCall, type SendMessageInput, type SendMessageOutput, type SerializedToolDef, type SubmitToolResultsInput, type SubmitToolResultsOutput, type ToolCallResult, buildSystemPrompt, buildToolSummary, compactPageState, geminiChat, getToolDescriptions, init, resolveToolCallTarget, sendMessage, setupGeminiAgent, submitToolResults, toGeminiTools, toSlimGeminiTools };
package/dist/index.js CHANGED
@@ -10,6 +10,8 @@ import * as fs from "fs";
10
10
  import * as path from "path";
11
11
  import * as yaml from "js-yaml";
12
12
  import { Type, GoogleGenAI } from "@google/genai";
13
+ import * as fs$1 from "node:fs";
14
+ import * as path$1 from "node:path";
13
15
  const CONFIG_FILE_NAME$1 = ".gemini.yaml";
14
16
  const DEFAULT_MODEL = "gemini-2.0-flash";
15
17
  function loadConfig() {
@@ -184,6 +186,39 @@ async function setupGeminiAgent(ctx) {
184
186
  };
185
187
  }
186
188
  }
189
+ const defaultLogger = {
190
+ important: (msg, ...args) => console.log(msg, ...args),
191
+ info: (msg, ...args) => console.log(msg, ...args),
192
+ warn: (msg, ...args) => console.warn(msg, ...args),
193
+ error: (msg, ...args) => console.error(msg, ...args)
194
+ };
195
+ let currentLogger = defaultLogger;
196
+ function getLogger() {
197
+ return currentLogger;
198
+ }
199
+ const PAGE_STATE_TOOL = {
200
+ name: "get_page_state",
201
+ description: "Get the full current page state. Use when the compact state in context is insufficient (e.g., to see all items in a truncated list).",
202
+ parameters: {
203
+ type: "object",
204
+ properties: {}
205
+ }
206
+ };
207
+ const DISCOVERY_TOOL = {
208
+ name: "get_tool_details",
209
+ description: "Get full parameter schemas for tools. Call before using tools that need coordinates or specific values.",
210
+ parameters: {
211
+ type: "object",
212
+ properties: {
213
+ tool_names: {
214
+ type: "array",
215
+ items: { type: "string" },
216
+ description: "Names of tools to get details for"
217
+ }
218
+ },
219
+ required: ["tool_names"]
220
+ }
221
+ };
187
222
  function toGeminiTools(clientTools, serverActions) {
188
223
  const tools = [];
189
224
  for (const tool of clientTools) {
@@ -202,6 +237,39 @@ function toGeminiTools(clientTools, serverActions) {
202
237
  }
203
238
  return tools;
204
239
  }
240
+ function toSlimGeminiTools(clientTools, serverActions) {
241
+ const tools = [];
242
+ for (const tool of clientTools) {
243
+ tools.push({
244
+ name: tool.name,
245
+ description: tool.description,
246
+ parameters: { type: "object", properties: {} }
247
+ });
248
+ }
249
+ for (const { actionName, metadata } of serverActions) {
250
+ tools.push({
251
+ name: `action_${actionName.replace(/\./g, "_")}`,
252
+ description: metadata.description,
253
+ parameters: { type: "object", properties: {} }
254
+ });
255
+ }
256
+ return tools;
257
+ }
258
+ function buildToolSummary(clientTools, serverActions) {
259
+ const lines = [];
260
+ for (const tool of clientTools) {
261
+ const paramNames = Object.keys(tool.inputSchema.properties || {});
262
+ const paramSuffix = paramNames.length > 0 ? ` (params: ${paramNames.join(", ")})` : "";
263
+ lines.push(`- ${tool.name}: ${tool.description}${paramSuffix}`);
264
+ }
265
+ for (const { actionName, metadata } of serverActions) {
266
+ const toolName = `action_${actionName.replace(/\./g, "_")}`;
267
+ const paramNames = Object.keys(metadata.inputSchema?.properties || {});
268
+ const paramSuffix = paramNames.length > 0 ? ` (params: ${paramNames.join(", ")})` : "";
269
+ lines.push(`- ${toolName}: ${metadata.description}${paramSuffix}`);
270
+ }
271
+ return lines.join("\n");
272
+ }
205
273
  function resolveToolCallTarget(toolName, clientToolNames) {
206
274
  if (clientToolNames.has(toolName)) {
207
275
  return { category: "page-automation", name: toolName };
@@ -212,32 +280,83 @@ function resolveToolCallTarget(toolName, clientToolNames) {
212
280
  }
213
281
  return { category: "page-automation", name: toolName };
214
282
  }
215
- function buildSystemPrompt(pageState, serverActions, customPrefix) {
283
+ const MAX_ARRAY_ITEMS = 3;
284
+ const MAX_STRING_LENGTH = 200;
285
+ function compactPageState(value) {
286
+ if (value === null || value === void 0)
287
+ return value;
288
+ if (typeof value === "string") {
289
+ if (value.length > MAX_STRING_LENGTH) {
290
+ return value.slice(0, MAX_STRING_LENGTH) + "...";
291
+ }
292
+ return value;
293
+ }
294
+ if (Array.isArray(value)) {
295
+ if (value.length > MAX_ARRAY_ITEMS) {
296
+ const truncated = value.slice(0, MAX_ARRAY_ITEMS).map(compactPageState);
297
+ truncated.push(`... (${value.length} total)`);
298
+ return truncated;
299
+ }
300
+ return value.map(compactPageState);
301
+ }
302
+ if (typeof value === "object") {
303
+ const result = {};
304
+ for (const [k, v] of Object.entries(value)) {
305
+ result[k] = compactPageState(v);
306
+ }
307
+ return result;
308
+ }
309
+ return value;
310
+ }
311
+ function buildSystemPrompt(pageState, toolSummary, customPrefix) {
312
+ const compacted = compactPageState(pageState);
216
313
  const parts = [
217
314
  customPrefix || "You are a helpful assistant for this web application.",
218
315
  "",
219
316
  "## Current Page State",
220
- JSON.stringify(pageState, null, 2),
317
+ JSON.stringify(compacted),
221
318
  ""
222
319
  ];
223
- if (serverActions.length > 0) {
224
- parts.push("## Available Server Actions");
225
- for (const action of serverActions) {
226
- parts.push(`- ${action.name}${action.description ? `: ${action.description}` : ""}`);
227
- }
320
+ if (toolSummary) {
321
+ parts.push("## Available Tools");
322
+ parts.push(toolSummary);
228
323
  parts.push("");
229
324
  }
230
325
  parts.push(
231
- "Use the provided tools to interact with the page and call server actions.",
326
+ "## Instructions",
327
+ "You can interact with the page using the tools listed above.",
328
+ "Before using any tool, call `get_tool_details` with the tool names to discover and enable them.",
329
+ "The page state above is a compact summary. Call `get_page_state` for the full untruncated state when needed.",
330
+ "",
331
+ "There are two kinds of tools:",
332
+ "- **Page tools** (click-*, fill-*, toggle-*): These change what the user sees on the page. Use these to interact with the UI.",
333
+ "- **Server actions** (action_*): These fetch or send data to the backend. Results are returned to YOU only — the user does NOT see them on the page. Use page tools to update the UI after server actions.",
334
+ "",
335
+ "Prefer page tools to drive the UI. Only use server actions when the page tools cannot achieve the goal.",
232
336
  "After using tools, describe what you did to the user.",
233
- "The page state above is refreshed each turn — use it to understand what the user sees."
337
+ "The page state is refreshed each turn — use it to understand what the user sees."
234
338
  );
235
339
  return parts.join("\n");
236
340
  }
237
- async function processGeminiTurn(service, history, tools, systemPrompt, clientToolNames) {
341
+ function approxSize(value) {
342
+ return JSON.stringify(value).length;
343
+ }
344
+ const META_TOOL_NAMES = /* @__PURE__ */ new Set(["get_tool_details", "get_page_state"]);
345
+ async function processGeminiTurn(service, history, tools, systemPrompt, clientToolNames, fullToolLookup, pageState, discoveredTools, turnNumber = 1) {
346
+ const log = getLogger();
347
+ const lastUserMsg = [...history].reverse().find((m) => m.role === "user" && m.parts.some((p) => p.text));
348
+ const userText = lastUserMsg ? lastUserMsg.parts.find((p) => p.text)?.text || "" : "";
349
+ const historySize = approxSize(history);
350
+ const toolNames = tools.map((t) => t.name);
351
+ log.info(
352
+ `[gemini-agent] Turn ${turnNumber} | user: "${userText}" | history: ${history.length} msgs (~${historySize} chars) | tools: ${toolNames.length} (${toolNames.join(", ")}) | prompt: ${systemPrompt.length} chars`
353
+ );
354
+ const startTime = Date.now();
238
355
  const response = await service.generateWithTools(history, tools, systemPrompt);
356
+ const duration = Date.now() - startTime;
239
357
  const candidate = response.candidates?.[0];
240
358
  if (!candidate?.content?.parts) {
359
+ log.info(`[gemini-agent] Turn ${turnNumber} | response: empty | ${duration}ms`);
241
360
  return {
242
361
  type: "response",
243
362
  message: "I apologize, but I was unable to generate a response.",
@@ -249,9 +368,14 @@ async function processGeminiTurn(service, history, tools, systemPrompt, clientTo
249
368
  (p) => p.functionCall != null
250
369
  );
251
370
  const responseParts = candidate.content.parts;
371
+ const responseSize = approxSize(responseParts);
252
372
  if (functionCalls.length === 0) {
253
373
  const textParts = parts.filter((p) => p.text != null);
254
374
  const message = textParts.map((p) => p.text).join("");
375
+ log.info(
376
+ `[gemini-agent] Turn ${turnNumber} | response: text (~${responseSize} chars) | ${duration}ms
377
+ ${JSON.stringify(responseParts, null, 2)}`
378
+ );
255
379
  const updatedHistory2 = [
256
380
  ...history,
257
381
  { role: "model", parts: responseParts }
@@ -262,11 +386,80 @@ async function processGeminiTurn(service, history, tools, systemPrompt, clientTo
262
386
  history: updatedHistory2
263
387
  };
264
388
  }
389
+ const callNames = functionCalls.map((fc) => fc.functionCall.name);
390
+ log.info(
391
+ `[gemini-agent] Turn ${turnNumber} | response: tool-calls (${callNames.join(", ")}) (~${responseSize} chars) | ${duration}ms
392
+ ${JSON.stringify(responseParts, null, 2)}`
393
+ );
265
394
  const updatedHistory = [...history, { role: "model", parts: responseParts }];
266
395
  const pendingClientCalls = [];
267
396
  const serverCallResults = [];
397
+ let expandedTools = tools;
268
398
  for (const fc of functionCalls) {
269
- const target = resolveToolCallTarget(fc.functionCall.name, clientToolNames);
399
+ const name = fc.functionCall.name;
400
+ if (name === "get_page_state") {
401
+ serverCallResults.push({
402
+ functionResponse: {
403
+ name: "get_page_state",
404
+ response: pageState
405
+ }
406
+ });
407
+ continue;
408
+ }
409
+ if (name === "get_tool_details") {
410
+ const requestedNames = fc.functionCall.args?.tool_names || [];
411
+ const schemas = {};
412
+ const newFullDeclarations = [];
413
+ for (const toolName of requestedNames) {
414
+ const full = fullToolLookup.get(toolName);
415
+ if (full) {
416
+ schemas[toolName] = full.parameters;
417
+ discoveredTools.add(toolName);
418
+ if (!newFullDeclarations.some((d) => d.name === toolName)) {
419
+ newFullDeclarations.push(full);
420
+ }
421
+ }
422
+ }
423
+ if (newFullDeclarations.length > 0) {
424
+ const upgradeNames = new Set(newFullDeclarations.map((d) => d.name));
425
+ expandedTools = [
426
+ ...expandedTools.filter((t) => !upgradeNames.has(t.name)),
427
+ ...newFullDeclarations
428
+ ];
429
+ }
430
+ serverCallResults.push({
431
+ functionResponse: {
432
+ name: "get_tool_details",
433
+ response: schemas
434
+ }
435
+ });
436
+ continue;
437
+ }
438
+ if (!META_TOOL_NAMES.has(name) && !discoveredTools.has(name)) {
439
+ const full = fullToolLookup.get(name);
440
+ if (full) {
441
+ discoveredTools.add(name);
442
+ const upgradeNames = /* @__PURE__ */ new Set([name]);
443
+ expandedTools = [...expandedTools.filter((t) => !upgradeNames.has(t.name)), full];
444
+ serverCallResults.push({
445
+ functionResponse: {
446
+ name,
447
+ response: {
448
+ error: `This tool requires parameters. Here is the schema: ${JSON.stringify(full.parameters)}. Call ${name} again with the correct parameters.`
449
+ }
450
+ }
451
+ });
452
+ } else {
453
+ serverCallResults.push({
454
+ functionResponse: {
455
+ name,
456
+ response: { error: `Unknown tool '${name}'.` }
457
+ }
458
+ });
459
+ }
460
+ continue;
461
+ }
462
+ const target = resolveToolCallTarget(name, clientToolNames);
270
463
  if (target.category === "server-action") {
271
464
  const result = await actionRegistry.execute(target.name, fc.functionCall.args);
272
465
  serverCallResults.push({
@@ -285,16 +478,6 @@ async function processGeminiTurn(service, history, tools, systemPrompt, clientTo
285
478
  }
286
479
  }
287
480
  if (pendingClientCalls.length > 0) {
288
- [
289
- ...pendingClientCalls,
290
- // Server actions already executed — include as completed calls
291
- ...serverCallResults.map((r) => ({
292
- id: r.functionResponse.name,
293
- name: r.functionResponse.name,
294
- args: {},
295
- category: "server-action"
296
- }))
297
- ];
298
481
  return {
299
482
  type: "tool-calls",
300
483
  calls: pendingClientCalls,
@@ -305,22 +488,120 @@ async function processGeminiTurn(service, history, tools, systemPrompt, clientTo
305
488
  ...updatedHistory,
306
489
  { role: "user", parts: serverCallResults }
307
490
  ];
308
- return processGeminiTurn(service, historyWithResults, tools, systemPrompt, clientToolNames);
491
+ return processGeminiTurn(
492
+ service,
493
+ historyWithResults,
494
+ expandedTools,
495
+ systemPrompt,
496
+ clientToolNames,
497
+ fullToolLookup,
498
+ pageState,
499
+ discoveredTools,
500
+ turnNumber + 1
501
+ );
309
502
  }
310
503
  async function handleConversation(service, history, toolDefinitions, pageState) {
311
- const serverActions = actionRegistry.getActionsWithMetadata();
312
- const tools = toGeminiTools(toolDefinitions, serverActions);
313
- const serverActionSummaries = serverActions.map((a) => ({
314
- name: a.actionName,
315
- description: a.metadata.description
316
- }));
317
- const systemPrompt = buildSystemPrompt(
504
+ const serverActions = actionRegistry.getActionsWithMetadata().filter((a) => !a.actionName.startsWith("geminiAgent."));
505
+ const fullTools = toGeminiTools(toolDefinitions, serverActions);
506
+ const fullToolLookup = /* @__PURE__ */ new Map();
507
+ for (const tool of fullTools) {
508
+ fullToolLookup.set(tool.name, tool);
509
+ }
510
+ const slimTools = toSlimGeminiTools(toolDefinitions, serverActions);
511
+ const toolsForGemini = [...slimTools, DISCOVERY_TOOL, PAGE_STATE_TOOL];
512
+ const toolSummary = buildToolSummary(toolDefinitions, serverActions);
513
+ const systemPrompt = buildSystemPrompt(pageState, toolSummary, service.systemPromptPrefix);
514
+ const clientToolNames = new Set(toolDefinitions.map((t) => t.name));
515
+ return processGeminiTurn(
516
+ service,
517
+ history,
518
+ toolsForGemini,
519
+ systemPrompt,
520
+ clientToolNames,
521
+ fullToolLookup,
318
522
  pageState,
319
- serverActionSummaries,
320
- service.systemPromptPrefix
523
+ /* @__PURE__ */ new Set()
321
524
  );
322
- const clientToolNames = new Set(toolDefinitions.map((t) => t.name));
323
- return processGeminiTurn(service, history, tools, systemPrompt, clientToolNames);
525
+ }
526
+ const INTERACTIVE_TYPE = "interactive";
527
+ function isInteractive(tag) {
528
+ if (!tag.type)
529
+ return false;
530
+ if (typeof tag.type === "string")
531
+ return tag.type === INTERACTIVE_TYPE;
532
+ if (Array.isArray(tag.type))
533
+ return tag.type.includes(INTERACTIVE_TYPE);
534
+ return false;
535
+ }
536
+ function extractDescription(desc) {
537
+ if (!desc)
538
+ return void 0;
539
+ if (typeof desc === "string")
540
+ return desc;
541
+ if (Array.isArray(desc))
542
+ return desc.join(" ");
543
+ return void 0;
544
+ }
545
+ function collectInteractiveDescriptions(tags) {
546
+ const result = [];
547
+ for (const tag of tags) {
548
+ if (isInteractive(tag)) {
549
+ const desc = extractDescription(tag.description);
550
+ if (desc) {
551
+ result.push({ refName: tag.tag, description: desc });
552
+ }
553
+ }
554
+ if (tag.tags) {
555
+ result.push(...collectInteractiveDescriptions(tag.tags));
556
+ }
557
+ }
558
+ return result;
559
+ }
560
+ let cachedDescriptions = null;
561
+ function loadToolDescriptions(projectRoot) {
562
+ if (cachedDescriptions)
563
+ return cachedDescriptions;
564
+ const log = getLogger();
565
+ const descriptions = [];
566
+ const indexPath = path$1.join(projectRoot, "agent-kit", "plugins-index.yaml");
567
+ if (!fs$1.existsSync(indexPath)) {
568
+ log.info("[gemini-agent] No plugins-index.yaml found, skipping tool descriptions");
569
+ cachedDescriptions = descriptions;
570
+ return descriptions;
571
+ }
572
+ try {
573
+ const indexContent = fs$1.readFileSync(indexPath, "utf-8");
574
+ const pluginsIndex = yaml.load(indexContent);
575
+ if (!pluginsIndex?.plugins) {
576
+ cachedDescriptions = descriptions;
577
+ return descriptions;
578
+ }
579
+ for (const plugin of pluginsIndex.plugins) {
580
+ for (const contract of plugin.contracts || []) {
581
+ const contractPath = path$1.resolve(projectRoot, contract.path);
582
+ if (!fs$1.existsSync(contractPath)) {
583
+ log.info(`[gemini-agent] Contract file not found: ${contractPath}, skipping`);
584
+ continue;
585
+ }
586
+ try {
587
+ const contractContent = fs$1.readFileSync(contractPath, "utf-8");
588
+ const parsed = yaml.load(contractContent);
589
+ if (parsed?.tags) {
590
+ descriptions.push(...collectInteractiveDescriptions(parsed.tags));
591
+ }
592
+ } catch (err) {
593
+ log.warn(
594
+ `[gemini-agent] Failed to parse contract ${contractPath}: ${err.message}`
595
+ );
596
+ }
597
+ }
598
+ }
599
+ log.info(`[gemini-agent] Loaded ${descriptions.length} tool descriptions from contracts`);
600
+ } catch (err) {
601
+ log.warn(`[gemini-agent] Failed to load plugins-index.yaml: ${err.message}`);
602
+ }
603
+ cachedDescriptions = descriptions;
604
+ return descriptions;
324
605
  }
325
606
  const sendMessage = makeJayAction("geminiAgent.sendMessage").withServices(GEMINI_SERVICE).withHandler(async (input, service) => {
326
607
  const { message, history, toolDefinitions, pageState } = input;
@@ -344,6 +625,9 @@ const submitToolResults = makeJayAction("geminiAgent.submitToolResults").withSer
344
625
  ];
345
626
  return handleConversation(service, updatedHistory, toolDefinitions, pageState);
346
627
  });
628
+ const getToolDescriptions = makeJayAction("geminiAgent.getToolDescriptions").withMethod("GET").withHandler(async () => {
629
+ return loadToolDescriptions(process.cwd());
630
+ });
347
631
  async function fastRender() {
348
632
  const Pipeline = RenderPipeline.for();
349
633
  return Pipeline.ok({}).toPhaseOutput(() => ({
@@ -363,14 +647,20 @@ async function fastRender() {
363
647
  }
364
648
  const geminiChat = makeJayStackComponent().withProps().withFastRender(fastRender);
365
649
  export {
650
+ DISCOVERY_TOOL,
366
651
  GEMINI_SERVICE,
367
652
  GeminiService,
653
+ PAGE_STATE_TOOL,
368
654
  buildSystemPrompt,
655
+ buildToolSummary,
656
+ compactPageState,
369
657
  geminiChat,
658
+ getToolDescriptions,
370
659
  init,
371
660
  resolveToolCallTarget,
372
661
  sendMessage,
373
662
  setupGeminiAgent,
374
663
  submitToolResults,
375
- toGeminiTools
664
+ toGeminiTools,
665
+ toSlimGeminiTools
376
666
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jay-framework/gemini-agent-plugin",
3
- "version": "0.12.0",
3
+ "version": "0.14.0",
4
4
  "type": "module",
5
5
  "license": "Apache-2.0",
6
6
  "description": "Gemini AI agent plugin for jay-stack — embedded chat agent with page automation and server action calling",
@@ -33,18 +33,19 @@
33
33
  },
34
34
  "dependencies": {
35
35
  "@google/genai": "^0.14.0",
36
- "@jay-framework/component": "^0.12.0",
37
- "@jay-framework/fullstack-component": "^0.12.0",
38
- "@jay-framework/runtime": "^0.12.0",
39
- "@jay-framework/runtime-automation": "^0.12.0",
40
- "@jay-framework/stack-client-runtime": "^0.12.0",
41
- "@jay-framework/stack-server-runtime": "^0.12.0",
36
+ "@jay-framework/component": "^0.14.0",
37
+ "@jay-framework/fullstack-component": "^0.14.0",
38
+ "@jay-framework/logger": "^0.14.0",
39
+ "@jay-framework/runtime": "^0.14.0",
40
+ "@jay-framework/runtime-automation": "^0.14.0",
41
+ "@jay-framework/stack-client-runtime": "^0.14.0",
42
+ "@jay-framework/stack-server-runtime": "^0.14.0",
42
43
  "js-yaml": "^4.1.0"
43
44
  },
44
45
  "devDependencies": {
45
- "@jay-framework/compiler-jay-stack": "^0.12.0",
46
- "@jay-framework/dev-environment": "^0.12.0",
47
- "@jay-framework/jay-cli": "^0.12.0",
46
+ "@jay-framework/compiler-jay-stack": "^0.14.0",
47
+ "@jay-framework/dev-environment": "^0.14.0",
48
+ "@jay-framework/jay-cli": "^0.14.0",
48
49
  "@types/js-yaml": "^4.0.9",
49
50
  "@types/node": "^22.15.21",
50
51
  "rimraf": "^5.0.5",
package/plugin.yaml CHANGED
@@ -10,6 +10,7 @@ actions:
10
10
  action: send-message.jay-action
11
11
  - name: submitToolResults
12
12
  action: submit-tool-results.jay-action
13
+ - name: getToolDescriptions
13
14
  setup:
14
15
  handler: setupGeminiAgent
15
16
  description: Configure Gemini API key