npm - textweb - Versions diffs - 0.1.0 - Mend

textweb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/LICENSE +21 -0
package/README.md +231 -0
package/docs/index.html +761 -0
package/mcp/index.js +275 -0
package/package.json +34 -0
package/src/apply.js +565 -0
package/src/browser.js +134 -0
package/src/cli.js +427 -0
package/src/renderer.js +452 -0
package/src/server.js +504 -0
package/tools/crewai.py +128 -0
package/tools/langchain.py +165 -0
package/tools/system_prompt.md +37 -0
package/tools/tool_definitions.json +154 -0

package/tools/langchain.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""
+TextWeb LangChain Tool Integration
+Wraps the TextWeb HTTP API as LangChain tools for use in agents and chains.
+Usage:
+    from textweb_langchain import get_textweb_tools
+    tools = get_textweb_tools()  # Returns list of LangChain tools
+    agent = initialize_agent(tools, llm, agent="zero-shot-react-description")
+Requires:
+    pip install langchain requests
+    textweb --serve 3000  (run the TextWeb server)
+"""
+import json
+import requests
+from typing import Optional
+try:
+    from langchain.tools import Tool, StructuredTool
+    from langchain.pydantic_v1 import BaseModel, Field
+except ImportError:
+    raise ImportError("Install langchain: pip install langchain")
+DEFAULT_BASE_URL = "http://localhost:3000"
+class TextWebClient:
+    """HTTP client for the TextWeb server."""
+    def __init__(self, base_url: str = DEFAULT_BASE_URL):
+        self.base_url = base_url.rstrip("/")
+        self.session = requests.Session()
+    def _post(self, endpoint: str, data: dict) -> dict:
+        resp = self.session.post(f"{self.base_url}{endpoint}", json=data, timeout=30)
+        resp.raise_for_status()
+        return resp.json()
+    def _get(self, endpoint: str) -> dict:
+        resp = self.session.get(f"{self.base_url}{endpoint}", timeout=30)
+        resp.raise_for_status()
+        return resp.json()
+    def navigate(self, url: str) -> str:
+        result = self._post("/navigate", {"url": url})
+        return self._format(result)
+    def click(self, ref: int) -> str:
+        result = self._post("/click", {"ref": ref})
+        return self._format(result)
+    def type_text(self, ref: int, text: str) -> str:
+        result = self._post("/type", {"ref": ref, "text": text})
+        return self._format(result)
+    def select(self, ref: int, value: str) -> str:
+        result = self._post("/select", {"ref": ref, "value": value})
+        return self._format(result)
+    def scroll(self, direction: str = "down", amount: int = 1) -> str:
+        result = self._post("/scroll", {"direction": direction, "amount": amount})
+        return self._format(result)
+    def snapshot(self) -> str:
+        result = self._get("/snapshot")
+        return self._format(result)
+    def _format(self, result: dict) -> str:
+        view = result.get("view", "")
+        elements = result.get("elements", {})
+        meta = result.get("meta", {})
+        refs = "\n".join(
+            f"[{ref}] {el.get('semantic', '?')}: {el.get('text', '(no text)')}"
+            for ref, el in elements.items()
+        )
+        return f"URL: {meta.get('url', 'unknown')}\nTitle: {meta.get('title', 'unknown')}\nRefs: {meta.get('totalRefs', 0)}\n\n{view}\n\nInteractive elements:\n{refs}"
+# ─── Pydantic Schemas ─────────────────────────────────────────────────────────
+class NavigateInput(BaseModel):
+    url: str = Field(description="URL to navigate to")
+class ClickInput(BaseModel):
+    ref: int = Field(description="Element reference number to click")
+class TypeInput(BaseModel):
+    ref: int = Field(description="Element reference number of the input field")
+    text: str = Field(description="Text to type into the field")
+class SelectInput(BaseModel):
+    ref: int = Field(description="Element reference number of the dropdown")
+    value: str = Field(description="Option value or text to select")
+class ScrollInput(BaseModel):
+    direction: str = Field(description="Scroll direction: up, down, or top")
+    amount: int = Field(default=1, description="Number of pages to scroll")
+# ─── Tool Factory ─────────────────────────────────────────────────────────────
+def get_textweb_tools(base_url: str = DEFAULT_BASE_URL) -> list:
+    """
+    Create LangChain tools for TextWeb browser interaction.
+    Args:
+        base_url: URL of the running TextWeb HTTP server (default: http://localhost:3000)
+    Returns:
+        List of LangChain StructuredTool instances
+    """
+    client = TextWebClient(base_url)
+    return [
+        StructuredTool.from_function(
+            func=lambda url: client.navigate(url),
+            name="textweb_navigate",
+            description="Navigate to a URL and render it as a text grid. Interactive elements are marked with [ref] numbers. Returns ~2-5KB of text instead of a 1MB screenshot. No vision model needed.",
+            args_schema=NavigateInput,
+        ),
+        StructuredTool.from_function(
+            func=lambda ref: client.click(ref),
+            name="textweb_click",
+            description="Click an interactive element by its [ref] number from the text grid.",
+            args_schema=ClickInput,
+        ),
+        StructuredTool.from_function(
+            func=lambda ref, text: client.type_text(ref, text),
+            name="textweb_type",
+            description="Type text into an input field by its [ref] number. Replaces existing content.",
+            args_schema=TypeInput,
+        ),
+        StructuredTool.from_function(
+            func=lambda ref, value: client.select(ref, value),
+            name="textweb_select",
+            description="Select an option from a dropdown by its [ref] number.",
+            args_schema=SelectInput,
+        ),
+        StructuredTool.from_function(
+            func=lambda direction, amount=1: client.scroll(direction, amount),
+            name="textweb_scroll",
+            description="Scroll the page up/down/top. Returns updated text grid.",
+            args_schema=ScrollInput,
+        ),
+        Tool(
+            name="textweb_snapshot",
+            func=lambda _="": client.snapshot(),
+            description="Re-render the current page as text. Use after waiting for dynamic content to load.",
+        ),
+    ]
+# ─── Quick Test ───────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    import sys
+    url = sys.argv[1] if len(sys.argv) > 1 else "https://example.com"
+    client = TextWebClient()
+    print(client.navigate(url))

package/tools/system_prompt.md ADDED Viewed

@@ -0,0 +1,37 @@
+# TextWeb Browser
+You have access to a text-based web browser via the `textweb_*` tools. Pages are rendered as structured character grids instead of screenshots.
+## How It Works
+- `textweb_navigate(url)` — Opens a page and returns a text grid
+- `textweb_click(ref)` — Clicks element `[ref]`
+- `textweb_type(ref, text)` — Types into input `[ref]`
+- `textweb_select(ref, value)` — Selects dropdown option
+- `textweb_scroll(direction)` — Scrolls up/down/top
+- `textweb_snapshot()` — Re-renders current page
+- `textweb_press(key)` — Presses a key (Enter, Tab, etc.)
+- `textweb_upload(ref, path)` — Uploads a file to input
+## Reading the Grid
+Interactive elements have reference numbers in brackets:
+| Element | Appears as | Action |
+|---------|-----------|--------|
+| Link | `[3]Click me` | `click(3)` |
+| Button | `[5 Submit]` | `click(5)` |
+| Text input | `[7:placeholder___]` | `type(7, "text")` |
+| Checkbox | `[9:X] Label` / `[9: ] Label` | `click(9)` to toggle |
+| Radio | `[11:●] Option` / `[11:○] Option` | `click(11)` |
+| Dropdown | `[13:▼ Selected]` | `select(13, "value")` |
+| File input | `[15:📎 Choose file]` | `upload(15, "/path/to/file")` |
+| Heading | `═══ TITLE ═══` | (not interactive) |
+## Tips
+- The grid preserves spatial layout — elements near each other on screen are near each other in text
+- After clicking a link or submitting a form, you get the new page's grid automatically
+- Use `snapshot()` if you need to re-read the page after waiting for dynamic content
+- For multi-step forms, fill fields then click the Next/Submit button
+- Scroll down if you don't see what you're looking for — the initial view shows only the viewport

package/tools/tool_definitions.json ADDED Viewed

@@ -0,0 +1,154 @@
+{
+  "name": "textweb",
+  "description": "Text-grid web browser for AI agents. Renders pages as structured text with interactive element references instead of screenshots.",
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "textweb_navigate",
+        "description": "Navigate to a URL and render the page as a structured text grid. Interactive elements are annotated with [ref] numbers. Returns the text view (~2-5KB) instead of a screenshot (~1MB). No vision model needed.",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "url": {
+              "type": "string",
+              "description": "The URL to navigate to"
+            }
+          },
+          "required": ["url"]
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "textweb_click",
+        "description": "Click an interactive element identified by its [ref] number from the text grid.",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "ref": {
+              "type": "integer",
+              "description": "Element reference number (e.g., 3 for [3])"
+            }
+          },
+          "required": ["ref"]
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "textweb_type",
+        "description": "Type text into an input field identified by its [ref] number. Replaces existing content.",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "ref": {
+              "type": "integer",
+              "description": "Element reference number of the input field"
+            },
+            "text": {
+              "type": "string",
+              "description": "Text to enter into the field"
+            }
+          },
+          "required": ["ref", "text"]
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "textweb_select",
+        "description": "Select an option from a dropdown by its [ref] number.",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "ref": {
+              "type": "integer",
+              "description": "Element reference number of the dropdown"
+            },
+            "value": {
+              "type": "string",
+              "description": "Option value or visible text to select"
+            }
+          },
+          "required": ["ref", "value"]
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "textweb_scroll",
+        "description": "Scroll the current page. Returns the updated text grid.",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "direction": {
+              "type": "string",
+              "enum": ["up", "down", "top"],
+              "description": "Scroll direction"
+            },
+            "amount": {
+              "type": "integer",
+              "description": "Pages to scroll (default: 1)",
+              "default": 1
+            }
+          },
+          "required": ["direction"]
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "textweb_snapshot",
+        "description": "Re-render the current page without navigating. Use after waiting for dynamic content.",
+        "parameters": {
+          "type": "object",
+          "properties": {}
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "textweb_press",
+        "description": "Press a keyboard key (Enter, Tab, Escape, ArrowDown, etc.).",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "key": {
+              "type": "string",
+              "description": "Key name (e.g., 'Enter', 'Tab', 'Escape')"
+            }
+          },
+          "required": ["key"]
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "textweb_upload",
+        "description": "Upload a file to a file input element.",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "ref": {
+              "type": "integer",
+              "description": "Element reference number of the file input"
+            },
+            "path": {
+              "type": "string",
+              "description": "Absolute file path to upload"
+            }
+          },
+          "required": ["ref", "path"]
+        }
+      }
+    }
+  ]
+}