browserwire 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,295 @@
1
+ /**
2
+ * synthesize-workflows.js — LLM-driven workflow synthesis
3
+ *
4
+ * Takes a merged BrowserWireManifest and produces WorkflowActionDef[] —
5
+ * complete task blueprints covering navigation, interaction, and data reading.
6
+ *
7
+ * Three workflow kinds:
8
+ * read — navigate → read_view → return structured data
9
+ * write — navigate → fill/select/click → submit → check outcomes
10
+ * mixed — navigate → interact → read_view → return structured data
11
+ */
12
+
13
+ import { callLLM, getLLMConfig } from "./llm-client.js";
14
+
15
+ const SYSTEM_PROMPT = `You are a workflow architect for web automation. Given a site manifest, you synthesize high-level task workflows that developers actually want to call.
16
+
17
+ You will receive:
18
+ - pages[]: { id, routePattern, name, description }
19
+ - actions[]: { id, interactionKind, semanticName, name }
20
+ - views[]: { id, name, isList }
21
+ - compositeActions[]: hints about multi-step operations
22
+
23
+ ## Three workflow kinds
24
+
25
+ **READ workflows** (kind: "read") — fetch and return structured data:
26
+ - Navigate to the listing/detail page
27
+ - End with a read_view step referencing a viewId from views[]
28
+ - No inputs required unless navigation needs a parameter (e.g., id for detail page)
29
+ - No outcomes field needed
30
+ - Examples: list_events, get_event_details, list_attendees
31
+
32
+ **WRITE workflows** (kind: "write") — perform a mutation:
33
+ - Navigate to form/action page
34
+ - Fill fields (interactionKind=type → fill), select dropdowns (interactionKind=select → select), submit (form-submit click → submit, toggle → click)
35
+ - Provide outcomes with success/failure signals
36
+ - Examples: create_event, register_for_event, update_profile
37
+
38
+ **MIXED workflows** (kind: "mixed") — interact then read:
39
+ - Navigate, interact (search/filter), then read_view for results
40
+ - Provide outcomes only if a submission is involved
41
+ - Examples: search_events, filter_by_category
42
+
43
+ ## Rules
44
+ - First step MUST always be navigate
45
+ - Only reference actionIds from actions[] and viewIds from views[]
46
+ - read_view MUST be the LAST step for read/mixed workflows
47
+ - For write: infer step type from interactionKind: type→fill, select→select, form-submit click→submit, toggle→click
48
+ - Map every workflow input to exactly one fill/select step via inputParam
49
+ - Max 8 workflows — prioritize the most useful
50
+ - URL: use routePattern from pages[]; replace :param with the input variable name
51
+
52
+ ## Output format (JSON only, no prose)
53
+ {
54
+ "workflows": [
55
+ {
56
+ "name": "list_events",
57
+ "kind": "read",
58
+ "description": "Returns all events as a structured list",
59
+ "inputs": [],
60
+ "steps": [
61
+ { "type": "navigate", "url": "/events" },
62
+ { "type": "read_view", "viewId": "view_events_list" }
63
+ ]
64
+ },
65
+ {
66
+ "name": "create_event",
67
+ "kind": "write",
68
+ "description": "Creates a new event by filling the creation form",
69
+ "inputs": [
70
+ { "name": "title", "type": "string", "required": true, "description": "Event title" },
71
+ { "name": "start_date", "type": "string", "required": true, "description": "Start date" }
72
+ ],
73
+ "steps": [
74
+ { "type": "navigate", "url": "/events/new" },
75
+ { "type": "fill", "actionId": "action_type_into_title", "inputParam": "title" },
76
+ { "type": "fill", "actionId": "action_type_into_start_date", "inputParam": "start_date" },
77
+ { "type": "submit", "actionId": "action_click_create_event" }
78
+ ],
79
+ "outcomes": {
80
+ "success": { "kind": "url_change", "value": "/events/[0-9]+" },
81
+ "failure": { "kind": "element_appears", "value": ".error-message, .alert-danger" }
82
+ }
83
+ }
84
+ ]
85
+ }`;
86
+
87
+ /**
88
+ * Validate and clean up a raw workflow object from the LLM response.
89
+ * Returns a WorkflowActionDef or null if invalid.
90
+ */
91
+ const validateWorkflow = (raw, actionIds, viewIds, capturedAt) => {
92
+ if (!raw || typeof raw !== "object") return null;
93
+ if (!raw.name || typeof raw.name !== "string") return null;
94
+ if (!["read", "write", "mixed"].includes(raw.kind)) return null;
95
+ if (!Array.isArray(raw.steps) || raw.steps.length === 0) return null;
96
+
97
+ // First step must be navigate
98
+ if (raw.steps[0]?.type !== "navigate") return null;
99
+
100
+ const steps = [];
101
+ for (const step of raw.steps) {
102
+ if (!step || typeof step.type !== "string") return null;
103
+
104
+ if (step.type === "navigate") {
105
+ if (!step.url || typeof step.url !== "string") return null;
106
+ steps.push({ type: "navigate", url: step.url });
107
+ continue;
108
+ }
109
+
110
+ if (step.type === "read_view") {
111
+ if (!step.viewId || !viewIds.has(step.viewId)) {
112
+ // skip invalid read_view references silently
113
+ continue;
114
+ }
115
+ steps.push({ type: "read_view", viewId: step.viewId });
116
+ continue;
117
+ }
118
+
119
+ if (["fill", "select", "click", "submit"].includes(step.type)) {
120
+ if (!step.actionId || !actionIds.has(step.actionId)) {
121
+ // skip unknown action references
122
+ continue;
123
+ }
124
+ const s = { type: step.type, actionId: step.actionId };
125
+ if (step.inputParam && typeof step.inputParam === "string") {
126
+ s.inputParam = step.inputParam;
127
+ }
128
+ steps.push(s);
129
+ continue;
130
+ }
131
+
132
+ // Unknown step type — skip
133
+ }
134
+
135
+ if (steps.length === 0) return null;
136
+ if (steps[0].type !== "navigate") return null;
137
+
138
+ // For read/mixed: require at least one read_view step
139
+ if (raw.kind === "read" || raw.kind === "mixed") {
140
+ if (!steps.some((s) => s.type === "read_view")) return null;
141
+ // read_view must be last
142
+ const lastIdx = steps.length - 1;
143
+ if (steps[lastIdx].type !== "read_view") {
144
+ // Move it to the end
145
+ const readViewSteps = steps.filter((s) => s.type === "read_view");
146
+ const nonReadView = steps.filter((s) => s.type !== "read_view");
147
+ steps.length = 0;
148
+ steps.push(...nonReadView, ...readViewSteps);
149
+ }
150
+ }
151
+
152
+ // For write: require at least one fill/select + submit/click
153
+ if (raw.kind === "write") {
154
+ const hasFillOrSelect = steps.some((s) => s.type === "fill" || s.type === "select");
155
+ const hasSubmitOrClick = steps.some((s) => s.type === "submit" || s.type === "click");
156
+ if (!hasFillOrSelect || !hasSubmitOrClick) return null;
157
+ }
158
+
159
+ // Validate inputs — only keep inputs referenced in a step
160
+ const referencedParams = new Set(
161
+ steps.filter((s) => s.inputParam).map((s) => s.inputParam)
162
+ );
163
+
164
+ const inputs = (Array.isArray(raw.inputs) ? raw.inputs : [])
165
+ .filter((i) => i && typeof i.name === "string" && referencedParams.has(i.name))
166
+ .map((i) => ({
167
+ name: i.name,
168
+ type: ["string", "number", "boolean", "enum"].includes(i.type) ? i.type : "string",
169
+ required: i.required === true,
170
+ description: typeof i.description === "string" ? i.description : undefined
171
+ }));
172
+
173
+ const workflowId = `workflow_${raw.name.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "")}`;
174
+
175
+ const result = {
176
+ id: workflowId,
177
+ name: raw.name,
178
+ description: typeof raw.description === "string" ? raw.description : raw.name,
179
+ kind: raw.kind,
180
+ inputs,
181
+ steps,
182
+ provenance: {
183
+ source: "agent",
184
+ sessionId: "workflow-synthesis",
185
+ traceIds: [],
186
+ annotationIds: [],
187
+ capturedAt: capturedAt || new Date().toISOString()
188
+ }
189
+ };
190
+
191
+ // Add outcomes for write/mixed
192
+ if (raw.outcomes && typeof raw.outcomes === "object" && raw.kind !== "read") {
193
+ const outcomes = {};
194
+ for (const [key, signal] of Object.entries(raw.outcomes)) {
195
+ if (!signal || typeof signal !== "object") continue;
196
+ if (!["url_change", "element_appears", "text_contains", "element_disappears"].includes(signal.kind)) continue;
197
+ if (typeof signal.value !== "string") continue;
198
+ outcomes[key] = { kind: signal.kind, value: signal.value };
199
+ if (signal.selector && typeof signal.selector === "string") {
200
+ outcomes[key].selector = signal.selector;
201
+ }
202
+ }
203
+ if (Object.keys(outcomes).length > 0) {
204
+ result.outcomes = outcomes;
205
+ }
206
+ }
207
+
208
+ return result;
209
+ };
210
+
211
+ /**
212
+ * Synthesize WorkflowActionDef[] from a merged manifest using the LLM.
213
+ *
214
+ * @param {object} manifest - BrowserWireManifest
215
+ * @returns {Promise<WorkflowActionDef[]>}
216
+ */
217
+ export const synthesizeWorkflows = async (manifest) => {
218
+ const config = getLLMConfig();
219
+ if (!config) {
220
+ console.log("[browserwire-cli] workflow synthesis skipped: LLM not configured");
221
+ return [];
222
+ }
223
+
224
+ const actions = manifest.actions || [];
225
+ const views = manifest.views || [];
226
+ const pages = manifest.pages || [];
227
+ const compositeActions = manifest.compositeActions || [];
228
+
229
+ if (actions.length === 0 && views.length === 0) {
230
+ console.log("[browserwire-cli] workflow synthesis skipped: no actions or views");
231
+ return [];
232
+ }
233
+
234
+ const actionIds = new Set(actions.map((a) => a.id));
235
+ const viewIds = new Set(views.map((v) => v.id));
236
+
237
+ // Build compact manifest summary for the LLM
238
+ const userMessage = JSON.stringify({
239
+ pages: pages.map((p) => ({
240
+ id: p.id,
241
+ routePattern: p.routePattern,
242
+ name: p.name,
243
+ description: p.description || ""
244
+ })),
245
+ actions: actions.map((a) => ({
246
+ id: a.id,
247
+ interactionKind: a.interactionKind || "click",
248
+ semanticName: a.semanticName || a.name,
249
+ name: a.name
250
+ })),
251
+ views: views.map((v) => ({
252
+ id: v.id,
253
+ name: v.semanticName || v.name,
254
+ isList: v.isList || false
255
+ })),
256
+ compositeActions: compositeActions.map((ca) => ({
257
+ name: ca.name,
258
+ description: ca.description || "",
259
+ stepCount: (ca.stepActionIds || []).length
260
+ }))
261
+ }, null, 2);
262
+
263
+ let rawText;
264
+ try {
265
+ rawText = await callLLM(SYSTEM_PROMPT, userMessage, config);
266
+ } catch (error) {
267
+ console.warn(`[browserwire-cli] workflow synthesis LLM call failed: ${error.message}`);
268
+ return [];
269
+ }
270
+
271
+ // Parse JSON from response
272
+ let parsed;
273
+ try {
274
+ // Strip markdown code fences if present
275
+ const cleaned = rawText
276
+ .replace(/^```(?:json)?\s*/m, "")
277
+ .replace(/\s*```\s*$/m, "")
278
+ .trim();
279
+ parsed = JSON.parse(cleaned);
280
+ } catch (error) {
281
+ console.warn(`[browserwire-cli] workflow synthesis: failed to parse LLM response: ${error.message}`);
282
+ return [];
283
+ }
284
+
285
+ const rawWorkflows = Array.isArray(parsed.workflows) ? parsed.workflows : [];
286
+ const capturedAt = new Date().toISOString();
287
+
288
+ const workflows = rawWorkflows
289
+ .map((raw) => validateWorkflow(raw, actionIds, viewIds, capturedAt))
290
+ .filter(Boolean)
291
+ .slice(0, 8);
292
+
293
+ console.log(`[browserwire-cli] workflow synthesis: ${workflows.length} workflows synthesized (${rawWorkflows.length} raw)`);
294
+ return workflows;
295
+ };
package/cli/index.js ADDED
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync } from "node:fs";
4
+ import { resolve } from "node:path";
5
+
6
+ // Load .env file (if present) without overriding existing env vars
7
+ const loadEnv = (filePath) => {
8
+ try {
9
+ const content = readFileSync(filePath, "utf8");
10
+ for (const line of content.split("\n")) {
11
+ const trimmed = line.trim();
12
+ if (!trimmed || trimmed.startsWith("#")) continue;
13
+ const eqIndex = trimmed.indexOf("=");
14
+ if (eqIndex === -1) continue;
15
+ const key = trimmed.slice(0, eqIndex).trim();
16
+ let value = trimmed.slice(eqIndex + 1).trim();
17
+ // Strip surrounding quotes
18
+ if ((value.startsWith('"') && value.endsWith('"')) ||
19
+ (value.startsWith("'") && value.endsWith("'"))) {
20
+ value = value.slice(1, -1);
21
+ }
22
+ // Don't override existing env vars
23
+ if (!(key in process.env)) {
24
+ process.env[key] = value;
25
+ }
26
+ }
27
+ } catch {
28
+ // .env file is optional — silently skip if missing
29
+ }
30
+ };
31
+
32
+ loadEnv(resolve(process.cwd(), ".env"));
33
+
34
+ const args = process.argv.slice(2);
35
+ const debug = args.includes("--debug");
36
+
37
+ if (args.includes("--extension-path")) {
38
+ const extPath = resolve(import.meta.dirname, "../extension");
39
+ console.log(extPath);
40
+ process.exit(0);
41
+ }
42
+
43
+ // --- Server mode: run discovery pipeline + REST API ---
44
+ const host = process.env.BROWSERWIRE_HOST || "127.0.0.1";
45
+ const port = Number(process.env.BROWSERWIRE_PORT || 8787);
46
+
47
+ if (!process.env.BROWSERWIRE_LLM_PROVIDER) {
48
+ console.error("[browserwire-cli] BROWSERWIRE_LLM_PROVIDER is required (set in .env or environment)");
49
+ console.error("[browserwire-cli] Supported: openai, anthropic, ollama");
50
+ process.exit(1);
51
+ }
52
+
53
+ const { startServer } = await import("./server.js");
54
+ const server = await startServer({ host, port, debug });
55
+
56
+ const shutdown = () => {
57
+ server.close(() => {
58
+ process.exit(0);
59
+ });
60
+ };
61
+
62
+ process.on("SIGINT", shutdown);
63
+ process.on("SIGTERM", shutdown);
@@ -0,0 +1,140 @@
1
+ /**
2
+ * manifest-store.js — File-based store for site-centric manifests.
3
+ *
4
+ * Directory layout:
5
+ * manifests/
6
+ * lu_ma/ # slug from hostname (dots → underscores)
7
+ * manifest.json # canonical manifest
8
+ * meta.json # { origin, createdAt, updatedAt, sessionHistory }
9
+ */
10
+
11
+ import { mkdir, readdir, readFile, writeFile } from "node:fs/promises";
12
+ import { resolve } from "node:path";
13
+
14
+ export class ManifestStore {
15
+ constructor(baseDir = resolve(process.cwd(), "manifests")) {
16
+ this.baseDir = baseDir;
17
+ }
18
+
19
+ /**
20
+ * Convert a URL or origin string to a filesystem-safe slug.
21
+ * "https://lu.ma" → "lu_ma", "localhost:3000" → "localhost_3000"
22
+ */
23
+ static originSlug(urlOrOrigin) {
24
+ try {
25
+ const u = new URL(urlOrOrigin);
26
+ return u.host.replace(/[.:]/g, "_");
27
+ } catch {
28
+ // Already a bare host like "localhost:3000"
29
+ return urlOrOrigin.replace(/[.:]/g, "_");
30
+ }
31
+ }
32
+
33
+ /**
34
+ * List all known sites with summary metadata (no full manifest load).
35
+ * @returns {Promise<Array<{ origin: string, slug: string, updatedAt: string|null, entityCount: number, actionCount: number }>>}
36
+ */
37
+ async listSites() {
38
+ let entries;
39
+ try {
40
+ entries = await readdir(this.baseDir, { withFileTypes: true });
41
+ } catch {
42
+ return [];
43
+ }
44
+
45
+ const sites = [];
46
+ for (const entry of entries) {
47
+ if (!entry.isDirectory()) continue;
48
+ const slug = entry.name;
49
+ const metaPath = resolve(this.baseDir, slug, "meta.json");
50
+ try {
51
+ const raw = await readFile(metaPath, "utf8");
52
+ const meta = JSON.parse(raw);
53
+ sites.push({
54
+ origin: meta.origin,
55
+ slug,
56
+ updatedAt: meta.updatedAt || meta.createdAt || null,
57
+ entityCount: meta.entityCount || 0,
58
+ actionCount: meta.actionCount || 0
59
+ });
60
+ } catch {
61
+ // Skip directories without valid meta.json
62
+ }
63
+ }
64
+ return sites;
65
+ }
66
+
67
+ /**
68
+ * Load the canonical manifest for a site.
69
+ * @returns {Promise<object|null>}
70
+ */
71
+ async load(urlOrOrigin) {
72
+ const slug = ManifestStore.originSlug(urlOrOrigin);
73
+ const manifestPath = resolve(this.baseDir, slug, "manifest.json");
74
+ try {
75
+ const raw = await readFile(manifestPath, "utf8");
76
+ return JSON.parse(raw);
77
+ } catch {
78
+ return null;
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Check if a manifest exists for the given site.
84
+ * @returns {Promise<boolean>}
85
+ */
86
+ async has(urlOrOrigin) {
87
+ const slug = ManifestStore.originSlug(urlOrOrigin);
88
+ const manifestPath = resolve(this.baseDir, slug, "manifest.json");
89
+ try {
90
+ await readFile(manifestPath, "utf8");
91
+ return true;
92
+ } catch {
93
+ return false;
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Save a manifest for a site. Writes manifest.json and updates meta.json atomically.
99
+ */
100
+ async save(urlOrOrigin, manifest, sessionId) {
101
+ let origin;
102
+ try {
103
+ origin = new URL(urlOrOrigin).origin;
104
+ } catch {
105
+ origin = urlOrOrigin;
106
+ }
107
+
108
+ const slug = ManifestStore.originSlug(urlOrOrigin);
109
+ const dir = resolve(this.baseDir, slug);
110
+ await mkdir(dir, { recursive: true });
111
+
112
+ // Write manifest
113
+ await writeFile(resolve(dir, "manifest.json"), JSON.stringify(manifest, null, 2), "utf8");
114
+
115
+ // Read or create meta
116
+ const metaPath = resolve(dir, "meta.json");
117
+ let meta;
118
+ try {
119
+ const raw = await readFile(metaPath, "utf8");
120
+ meta = JSON.parse(raw);
121
+ } catch {
122
+ meta = {
123
+ origin,
124
+ createdAt: new Date().toISOString(),
125
+ sessionHistory: []
126
+ };
127
+ }
128
+
129
+ meta.updatedAt = new Date().toISOString();
130
+ meta.entityCount = manifest.entities?.length || 0;
131
+ meta.actionCount = manifest.actions?.length || 0;
132
+ if (sessionId) {
133
+ meta.sessionHistory.push({ sessionId, timestamp: meta.updatedAt });
134
+ }
135
+
136
+ await writeFile(metaPath, JSON.stringify(meta, null, 2), "utf8");
137
+
138
+ console.log(`[browserwire-cli] manifest saved for ${origin} → manifests/${slug}/`);
139
+ }
140
+ }