traw 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "traw",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "module": "src/index.ts",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,34 +1,31 @@
1
- export const systemPrompt = `You a Traw -control a browser via DOM elements. Each element has an index [N].
2
-
3
- LANGUAGE RULES (STRICT):
4
- - "thought" field: ALWAYS in English, no exceptions
5
- - "done" action "reason" field: ALWAYS in the SAME language as user's original query
6
- - If user asked in Russian → answer in Russian
7
- - If user asked in English → answer in English
8
- - Match the user's language exactly
9
-
10
- MARKDOWN FORMATTING (for "done" reason field):
11
- The terminal supports rich markdown rendering. USE these features for better readability:
12
- - **Headers**: Use ### for sections (e.g., ### Installation)
13
- - **Bold**: Use **text** for emphasis
14
- - **Italic**: Use *text* for subtle emphasis
15
- - **Lists**: Use * or - for bullet points, 1. 2. 3. for numbered lists
16
- - **Code**: Use \`inline code\` for commands, \`\`\`lang for code blocks
17
- - **Links**: Use [text](url) format - they will be clickable
18
- - **Blockquotes**: Use > for quotes or important notes
19
- Structure your answers with headers and lists for easy scanning.
20
-
21
- ACTIONS (use index to target elements):
22
- - click: {"type":"click","index":N} - click element [N]
23
- - type: {"type":"type","index":N,"text":"query"} - type into input [N]
24
- - scroll: {"type":"scroll","direction":"down"} - scroll page
25
- - goto: {"type":"goto","text":"url"} - navigate to URL
26
- - wait: {"type":"wait"} - wait 2 seconds
27
- - back: {"type":"back"} - go back to previous page (FREE action, gives +1 step back, use when current page is wrong/useless)
28
- - done: {"type":"done","reason":"result"} - task complete, include answer IN USER'S LANGUAGE with markdown
29
-
30
- OUTPUT (JSON only, no markdown wrapper):
31
- {"thought":"English reasoning here","action":{"type":"click","index":0}}`
1
+ export const systemPrompt = `You are Traw - browser agent. You receive page as XML.
2
+
3
+ INPUT FORMAT:
4
+ <page>
5
+ <h1>Page Title</h1>
6
+ <a id="0" href="...">Link</a>
7
+ <input id="1" type="text" value="current"/>
8
+ <button id="2" disabled="true">Submit</button>
9
+ </page>
10
+
11
+ Use "id" attribute to target interactive elements.
12
+ Elements with disabled="true" cannot be clicked.
13
+
14
+ LANGUAGE:
15
+ - thought: English
16
+ - done reason: Same language as user query
17
+
18
+ ACTIONS:
19
+ - click: {"type":"click","index":N}
20
+ - type: {"type":"type","index":N,"text":"..."}
21
+ - scroll: {"type":"scroll","direction":"down"|"up"}
22
+ - goto: {"type":"goto","text":"url"}
23
+ - wait: {"type":"wait"}
24
+ - back: {"type":"back"}
25
+ - done: {"type":"done","reason":"answer"}
26
+
27
+ OUTPUT JSON:
28
+ {"thought":"...","action":{"type":"click","index":0}}`
32
29
 
33
30
  export const planningPrompt = `Create short numbered plan to accomplish goal via browser. Start from DuckDuckGo search.
34
31
 
@@ -65,89 +65,78 @@ export class BrowserController {
65
65
  const url = this.page.url()
66
66
  const title = await this.page.title()
67
67
 
68
- const pageText = await this.page.evaluate(() => {
69
- const texts: string[] = []
70
- const textSelector = "h1, h2, h3, h4, h5, h6, p, li, blockquote, td, th, figcaption, summary"
71
-
72
- document.querySelectorAll(textSelector).forEach((el) => {
73
- const node = el as HTMLElement
74
- if (node.offsetParent === null) return
75
-
76
- const text = node.innerText?.trim()
77
- if (!text || text.length < 3) return
78
-
79
- const tag = el.tagName.toLowerCase()
80
- const prefix = tag.startsWith("h") ? `[${tag}]` : ""
81
- texts.push(`${prefix} ${text}`)
82
- })
83
-
84
- const unique = [...new Set(texts)]
85
- return unique.join("\n")
86
- }).catch(() => "")
87
-
88
- const elements = await this.page.evaluate(() => {
89
- const items: string[] = []
90
- const selector = 'a[href], button, input, textarea, select, [role="button"], [onclick]'
91
-
92
- document.querySelectorAll(selector).forEach((el) => {
93
- const node = el as HTMLElement
94
- if (node.offsetParent === null) return
95
-
68
+ const xml = await this.page.evaluate(() => {
69
+ const esc = (s: string) => s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;")
70
+ const out: string[] = ["<page>"]
71
+ let idx = 0
72
+
73
+ document.querySelectorAll("[data-idx]").forEach(el => el.removeAttribute("data-idx"))
74
+
75
+ const walk = (node: Element, depth: number) => {
76
+ const el = node as HTMLElement
96
77
  const tag = el.tagName.toLowerCase()
97
- const type = (el as HTMLInputElement).type || ""
98
- const text = el.textContent?.trim() || ""
99
- const val = (el as HTMLInputElement).value || ""
100
- const name = (el as HTMLInputElement).name || ""
101
- const placeholder = (el as HTMLInputElement).placeholder || ""
102
-
103
- const ariaLabel = el.getAttribute("aria-label") || ""
104
- const titleAttr = el.getAttribute("title") || ""
105
- const alt = (el as HTMLImageElement).alt || ""
106
-
107
- let linkedLabel = ""
108
- const id = el.id
109
- if (id) {
110
- const labelEl = document.querySelector(`label[for="${id}"]`)
111
- if (labelEl) linkedLabel = labelEl.textContent?.trim() || ""
112
- }
113
-
114
- const displayText = ariaLabel || titleAttr || alt || linkedLabel || text || placeholder || name
115
-
116
- let label = ""
117
- if (tag === "a") {
118
- label = `<a>${displayText}</a>`
119
- } else if (tag === "button") {
120
- label = `<button>${displayText || val}</button>`
121
- } else if (tag === "input" && (type === "submit" || type === "button")) {
122
- label = `<button>${val || displayText}</button>`
123
- } else if (tag === "input") {
124
- const labelPart = linkedLabel ? ` label="${linkedLabel}"` : ""
125
- label = `<input${type ? ` type="${type}"` : ""}${labelPart}${val ? ` value="${val}"` : ""}>`
126
- } else if (tag === "textarea") {
127
- const labelPart = linkedLabel ? ` label="${linkedLabel}"` : ""
128
- label = `<textarea${labelPart}>${val}</textarea>`
129
- } else if (tag === "select") {
130
- const labelPart = linkedLabel ? ` label="${linkedLabel}"` : ""
131
- const selected = (el as HTMLSelectElement).selectedOptions[0]?.text || ""
132
- label = `<select${labelPart}${selected ? ` selected="${selected}"` : ""}>`
78
+ const indent = " ".repeat(depth)
79
+
80
+ const skipTags = ["script", "style", "noscript", "svg", "path", "meta", "link", "br", "hr"]
81
+ if (skipTags.includes(tag)) return
82
+
83
+ const interactiveTags = ["a", "button", "input", "textarea", "select"]
84
+ const hasRole = el.getAttribute("role")
85
+ const hasOnclick = el.hasAttribute("onclick")
86
+ const isInteractive = interactiveTags.includes(tag) || hasRole || hasOnclick
87
+
88
+ if (isInteractive) {
89
+ el.setAttribute("data-idx", String(idx))
90
+
91
+ const attrs: string[] = [`id="${idx}"`]
92
+
93
+ const type = (el as HTMLInputElement).type
94
+ if (type) attrs.push(`type="${type}"`)
95
+
96
+ const href = (el as HTMLAnchorElement).href
97
+ if (href && tag === "a") attrs.push(`href="${esc(href.slice(0, 80))}"`)
98
+
99
+ const val = (el as HTMLInputElement).value
100
+ if (val) attrs.push(`value="${esc(val)}"`)
101
+
102
+ if ((el as any).disabled) attrs.push(`disabled="true"`)
103
+ if ((el as any).checked) attrs.push(`checked="true"`)
104
+ if ((el as any).readOnly) attrs.push(`readonly="true"`)
105
+ if ((el as any).required) attrs.push(`required="true"`)
106
+ if (el.getAttribute("aria-expanded")) attrs.push(`expanded="${el.getAttribute("aria-expanded")}"`)
107
+ if (el.getAttribute("aria-selected") === "true") attrs.push(`selected="true"`)
108
+
109
+ const text = el.textContent?.trim() || ""
110
+ const ariaLabel = el.getAttribute("aria-label")
111
+ const placeholder = (el as HTMLInputElement).placeholder
112
+ const label = esc(ariaLabel || text || placeholder || "")
113
+
114
+ out.push(`${indent}<${tag} ${attrs.join(" ")}>${label}</${tag}>`)
115
+ idx++
133
116
  } else {
134
- label = `<${tag}>${displayText}</${tag}>`
117
+ const textTags = ["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "td", "th", "label"]
118
+ if (textTags.includes(tag)) {
119
+ const directText = Array.from(el.childNodes)
120
+ .filter(n => n.nodeType === 3)
121
+ .map(n => n.textContent?.trim())
122
+ .join(" ")
123
+ .trim()
124
+
125
+ if (directText.length > 2) {
126
+ out.push(`${indent}<${tag}>${esc(directText)}</${tag}>`)
127
+ }
128
+ }
135
129
  }
136
-
137
- node.setAttribute("data-idx", String(items.length))
138
- items.push(`[${items.length}] ${label}`)
139
- })
140
-
141
- return items.join("\n")
142
- }).catch(() => "")
143
-
144
- const combined = [pageText, elements].filter(Boolean).join("\n\n")
145
-
146
- return {
147
- url,
148
- title,
149
- text: combined,
150
- }
130
+
131
+ Array.from(el.children).forEach(child => walk(child, depth + 1))
132
+ }
133
+
134
+ walk(document.body, 0)
135
+ out.push("</page>")
136
+ return out.join("\n")
137
+ }).catch(() => "<page></page>")
138
+
139
+ return { url, title, text: xml }
151
140
  }
152
141
 
153
142
  async execute(action: Action): Promise<string> {