textweb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ """
2
+ TextWeb LangChain Tool Integration
3
+
4
+ Wraps the TextWeb HTTP API as LangChain tools for use in agents and chains.
5
+
6
+ Usage:
7
+ from textweb_langchain import get_textweb_tools
8
+
9
+ tools = get_textweb_tools() # Returns list of LangChain tools
10
+ agent = initialize_agent(tools, llm, agent="zero-shot-react-description")
11
+
12
+ Requires:
13
+ pip install langchain requests
14
+ textweb --serve 3000 (run the TextWeb server)
15
+ """
16
+
17
+ import json
18
+ import requests
19
+ from typing import Optional
20
+
21
+ try:
22
+ from langchain.tools import Tool, StructuredTool
23
+ from langchain.pydantic_v1 import BaseModel, Field
24
+ except ImportError:
25
+ raise ImportError("Install langchain: pip install langchain")
26
+
27
+
28
+ DEFAULT_BASE_URL = "http://localhost:3000"
29
+
30
+
31
+ class TextWebClient:
32
+ """HTTP client for the TextWeb server."""
33
+
34
+ def __init__(self, base_url: str = DEFAULT_BASE_URL):
35
+ self.base_url = base_url.rstrip("/")
36
+ self.session = requests.Session()
37
+
38
+ def _post(self, endpoint: str, data: dict) -> dict:
39
+ resp = self.session.post(f"{self.base_url}{endpoint}", json=data, timeout=30)
40
+ resp.raise_for_status()
41
+ return resp.json()
42
+
43
+ def _get(self, endpoint: str) -> dict:
44
+ resp = self.session.get(f"{self.base_url}{endpoint}", timeout=30)
45
+ resp.raise_for_status()
46
+ return resp.json()
47
+
48
+ def navigate(self, url: str) -> str:
49
+ result = self._post("/navigate", {"url": url})
50
+ return self._format(result)
51
+
52
+ def click(self, ref: int) -> str:
53
+ result = self._post("/click", {"ref": ref})
54
+ return self._format(result)
55
+
56
+ def type_text(self, ref: int, text: str) -> str:
57
+ result = self._post("/type", {"ref": ref, "text": text})
58
+ return self._format(result)
59
+
60
+ def select(self, ref: int, value: str) -> str:
61
+ result = self._post("/select", {"ref": ref, "value": value})
62
+ return self._format(result)
63
+
64
+ def scroll(self, direction: str = "down", amount: int = 1) -> str:
65
+ result = self._post("/scroll", {"direction": direction, "amount": amount})
66
+ return self._format(result)
67
+
68
+ def snapshot(self) -> str:
69
+ result = self._get("/snapshot")
70
+ return self._format(result)
71
+
72
+ def _format(self, result: dict) -> str:
73
+ view = result.get("view", "")
74
+ elements = result.get("elements", {})
75
+ meta = result.get("meta", {})
76
+
77
+ refs = "\n".join(
78
+ f"[{ref}] {el.get('semantic', '?')}: {el.get('text', '(no text)')}"
79
+ for ref, el in elements.items()
80
+ )
81
+
82
+ return f"URL: {meta.get('url', 'unknown')}\nTitle: {meta.get('title', 'unknown')}\nRefs: {meta.get('totalRefs', 0)}\n\n{view}\n\nInteractive elements:\n{refs}"
83
+
84
+
85
+ # ─── Pydantic Schemas ─────────────────────────────────────────────────────────
86
+
87
+ class NavigateInput(BaseModel):
88
+ url: str = Field(description="URL to navigate to")
89
+
90
+ class ClickInput(BaseModel):
91
+ ref: int = Field(description="Element reference number to click")
92
+
93
+ class TypeInput(BaseModel):
94
+ ref: int = Field(description="Element reference number of the input field")
95
+ text: str = Field(description="Text to type into the field")
96
+
97
+ class SelectInput(BaseModel):
98
+ ref: int = Field(description="Element reference number of the dropdown")
99
+ value: str = Field(description="Option value or text to select")
100
+
101
+ class ScrollInput(BaseModel):
102
+ direction: str = Field(description="Scroll direction: up, down, or top")
103
+ amount: int = Field(default=1, description="Number of pages to scroll")
104
+
105
+
106
+ # ─── Tool Factory ─────────────────────────────────────────────────────────────
107
+
108
+ def get_textweb_tools(base_url: str = DEFAULT_BASE_URL) -> list:
109
+ """
110
+ Create LangChain tools for TextWeb browser interaction.
111
+
112
+ Args:
113
+ base_url: URL of the running TextWeb HTTP server (default: http://localhost:3000)
114
+
115
+ Returns:
116
+ List of LangChain StructuredTool instances
117
+ """
118
+ client = TextWebClient(base_url)
119
+
120
+ return [
121
+ StructuredTool.from_function(
122
+ func=lambda url: client.navigate(url),
123
+ name="textweb_navigate",
124
+ description="Navigate to a URL and render it as a text grid. Interactive elements are marked with [ref] numbers. Returns ~2-5KB of text instead of a 1MB screenshot. No vision model needed.",
125
+ args_schema=NavigateInput,
126
+ ),
127
+ StructuredTool.from_function(
128
+ func=lambda ref: client.click(ref),
129
+ name="textweb_click",
130
+ description="Click an interactive element by its [ref] number from the text grid.",
131
+ args_schema=ClickInput,
132
+ ),
133
+ StructuredTool.from_function(
134
+ func=lambda ref, text: client.type_text(ref, text),
135
+ name="textweb_type",
136
+ description="Type text into an input field by its [ref] number. Replaces existing content.",
137
+ args_schema=TypeInput,
138
+ ),
139
+ StructuredTool.from_function(
140
+ func=lambda ref, value: client.select(ref, value),
141
+ name="textweb_select",
142
+ description="Select an option from a dropdown by its [ref] number.",
143
+ args_schema=SelectInput,
144
+ ),
145
+ StructuredTool.from_function(
146
+ func=lambda direction, amount=1: client.scroll(direction, amount),
147
+ name="textweb_scroll",
148
+ description="Scroll the page up/down/top. Returns updated text grid.",
149
+ args_schema=ScrollInput,
150
+ ),
151
+ Tool(
152
+ name="textweb_snapshot",
153
+ func=lambda _="": client.snapshot(),
154
+ description="Re-render the current page as text. Use after waiting for dynamic content to load.",
155
+ ),
156
+ ]
157
+
158
+
159
+ # ─── Quick Test ───────────────────────────────────────────────────────────────
160
+
161
+ if __name__ == "__main__":
162
+ import sys
163
+ url = sys.argv[1] if len(sys.argv) > 1 else "https://example.com"
164
+ client = TextWebClient()
165
+ print(client.navigate(url))
@@ -0,0 +1,37 @@
1
+ # TextWeb Browser
2
+
3
+ You have access to a text-based web browser via the `textweb_*` tools. Pages are rendered as structured character grids instead of screenshots.
4
+
5
+ ## How It Works
6
+
7
+ - `textweb_navigate(url)` — Opens a page and returns a text grid
8
+ - `textweb_click(ref)` — Clicks element `[ref]`
9
+ - `textweb_type(ref, text)` — Types into input `[ref]`
10
+ - `textweb_select(ref, value)` — Selects dropdown option
11
+ - `textweb_scroll(direction)` — Scrolls up/down/top
12
+ - `textweb_snapshot()` — Re-renders current page
13
+ - `textweb_press(key)` — Presses a key (Enter, Tab, etc.)
14
+ - `textweb_upload(ref, path)` — Uploads a file to input
15
+
16
+ ## Reading the Grid
17
+
18
+ Interactive elements have reference numbers in brackets:
19
+
20
+ | Element | Appears as | Action |
21
+ |---------|-----------|--------|
22
+ | Link | `[3]Click me` | `click(3)` |
23
+ | Button | `[5 Submit]` | `click(5)` |
24
+ | Text input | `[7:placeholder___]` | `type(7, "text")` |
25
+ | Checkbox | `[9:X] Label` / `[9: ] Label` | `click(9)` to toggle |
26
+ | Radio | `[11:●] Option` / `[11:○] Option` | `click(11)` |
27
+ | Dropdown | `[13:▼ Selected]` | `select(13, "value")` |
28
+ | File input | `[15:📎 Choose file]` | `upload(15, "/path/to/file")` |
29
+ | Heading | `═══ TITLE ═══` | (not interactive) |
30
+
31
+ ## Tips
32
+
33
+ - The grid preserves spatial layout — elements near each other on screen are near each other in text
34
+ - After clicking a link or submitting a form, you get the new page's grid automatically
35
+ - Use `snapshot()` if you need to re-read the page after waiting for dynamic content
36
+ - For multi-step forms, fill fields then click the Next/Submit button
37
+ - Scroll down if you don't see what you're looking for — the initial view shows only the viewport
@@ -0,0 +1,154 @@
1
+ {
2
+ "name": "textweb",
3
+ "description": "Text-grid web browser for AI agents. Renders pages as structured text with interactive element references instead of screenshots.",
4
+ "tools": [
5
+ {
6
+ "type": "function",
7
+ "function": {
8
+ "name": "textweb_navigate",
9
+ "description": "Navigate to a URL and render the page as a structured text grid. Interactive elements are annotated with [ref] numbers. Returns the text view (~2-5KB) instead of a screenshot (~1MB). No vision model needed.",
10
+ "parameters": {
11
+ "type": "object",
12
+ "properties": {
13
+ "url": {
14
+ "type": "string",
15
+ "description": "The URL to navigate to"
16
+ }
17
+ },
18
+ "required": ["url"]
19
+ }
20
+ }
21
+ },
22
+ {
23
+ "type": "function",
24
+ "function": {
25
+ "name": "textweb_click",
26
+ "description": "Click an interactive element identified by its [ref] number from the text grid.",
27
+ "parameters": {
28
+ "type": "object",
29
+ "properties": {
30
+ "ref": {
31
+ "type": "integer",
32
+ "description": "Element reference number (e.g., 3 for [3])"
33
+ }
34
+ },
35
+ "required": ["ref"]
36
+ }
37
+ }
38
+ },
39
+ {
40
+ "type": "function",
41
+ "function": {
42
+ "name": "textweb_type",
43
+ "description": "Type text into an input field identified by its [ref] number. Replaces existing content.",
44
+ "parameters": {
45
+ "type": "object",
46
+ "properties": {
47
+ "ref": {
48
+ "type": "integer",
49
+ "description": "Element reference number of the input field"
50
+ },
51
+ "text": {
52
+ "type": "string",
53
+ "description": "Text to enter into the field"
54
+ }
55
+ },
56
+ "required": ["ref", "text"]
57
+ }
58
+ }
59
+ },
60
+ {
61
+ "type": "function",
62
+ "function": {
63
+ "name": "textweb_select",
64
+ "description": "Select an option from a dropdown by its [ref] number.",
65
+ "parameters": {
66
+ "type": "object",
67
+ "properties": {
68
+ "ref": {
69
+ "type": "integer",
70
+ "description": "Element reference number of the dropdown"
71
+ },
72
+ "value": {
73
+ "type": "string",
74
+ "description": "Option value or visible text to select"
75
+ }
76
+ },
77
+ "required": ["ref", "value"]
78
+ }
79
+ }
80
+ },
81
+ {
82
+ "type": "function",
83
+ "function": {
84
+ "name": "textweb_scroll",
85
+ "description": "Scroll the current page. Returns the updated text grid.",
86
+ "parameters": {
87
+ "type": "object",
88
+ "properties": {
89
+ "direction": {
90
+ "type": "string",
91
+ "enum": ["up", "down", "top"],
92
+ "description": "Scroll direction"
93
+ },
94
+ "amount": {
95
+ "type": "integer",
96
+ "description": "Pages to scroll (default: 1)",
97
+ "default": 1
98
+ }
99
+ },
100
+ "required": ["direction"]
101
+ }
102
+ }
103
+ },
104
+ {
105
+ "type": "function",
106
+ "function": {
107
+ "name": "textweb_snapshot",
108
+ "description": "Re-render the current page without navigating. Use after waiting for dynamic content.",
109
+ "parameters": {
110
+ "type": "object",
111
+ "properties": {}
112
+ }
113
+ }
114
+ },
115
+ {
116
+ "type": "function",
117
+ "function": {
118
+ "name": "textweb_press",
119
+ "description": "Press a keyboard key (Enter, Tab, Escape, ArrowDown, etc.).",
120
+ "parameters": {
121
+ "type": "object",
122
+ "properties": {
123
+ "key": {
124
+ "type": "string",
125
+ "description": "Key name (e.g., 'Enter', 'Tab', 'Escape')"
126
+ }
127
+ },
128
+ "required": ["key"]
129
+ }
130
+ }
131
+ },
132
+ {
133
+ "type": "function",
134
+ "function": {
135
+ "name": "textweb_upload",
136
+ "description": "Upload a file to a file input element.",
137
+ "parameters": {
138
+ "type": "object",
139
+ "properties": {
140
+ "ref": {
141
+ "type": "integer",
142
+ "description": "Element reference number of the file input"
143
+ },
144
+ "path": {
145
+ "type": "string",
146
+ "description": "Absolute file path to upload"
147
+ }
148
+ },
149
+ "required": ["ref", "path"]
150
+ }
151
+ }
152
+ }
153
+ ]
154
+ }