PyPI - pythonclaw - Versions diffs - 0.2.0__py3-none-any.whl - Mend

pythonclaw 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

pythonclaw/__init__.py +17 -0
pythonclaw/__main__.py +6 -0
pythonclaw/channels/discord_bot.py +231 -0
pythonclaw/channels/telegram_bot.py +236 -0
pythonclaw/config.py +190 -0
pythonclaw/core/__init__.py +25 -0
pythonclaw/core/agent.py +773 -0
pythonclaw/core/compaction.py +220 -0
pythonclaw/core/knowledge/rag.py +93 -0
pythonclaw/core/llm/anthropic_client.py +107 -0
pythonclaw/core/llm/base.py +26 -0
pythonclaw/core/llm/gemini_client.py +139 -0
pythonclaw/core/llm/openai_compatible.py +39 -0
pythonclaw/core/llm/response.py +57 -0
pythonclaw/core/memory/manager.py +120 -0
pythonclaw/core/memory/storage.py +164 -0
pythonclaw/core/persistent_agent.py +103 -0
pythonclaw/core/retrieval/__init__.py +6 -0
pythonclaw/core/retrieval/chunker.py +78 -0
pythonclaw/core/retrieval/dense.py +152 -0
pythonclaw/core/retrieval/fusion.py +51 -0
pythonclaw/core/retrieval/reranker.py +112 -0
pythonclaw/core/retrieval/retriever.py +166 -0
pythonclaw/core/retrieval/sparse.py +69 -0
pythonclaw/core/session_store.py +269 -0
pythonclaw/core/skill_loader.py +322 -0
pythonclaw/core/skillhub.py +290 -0
pythonclaw/core/tools.py +622 -0
pythonclaw/core/utils.py +64 -0
pythonclaw/daemon.py +221 -0
pythonclaw/init.py +61 -0
pythonclaw/main.py +489 -0
pythonclaw/onboard.py +290 -0
pythonclaw/scheduler/cron.py +310 -0
pythonclaw/scheduler/heartbeat.py +178 -0
pythonclaw/server.py +145 -0
pythonclaw/session_manager.py +104 -0
pythonclaw/templates/persona/demo_persona.md +2 -0
pythonclaw/templates/skills/communication/CATEGORY.md +4 -0
pythonclaw/templates/skills/communication/email/SKILL.md +54 -0
pythonclaw/templates/skills/communication/email/__pycache__/send_email.cpython-311.pyc +0 -0
pythonclaw/templates/skills/communication/email/send_email.py +88 -0
pythonclaw/templates/skills/data/CATEGORY.md +4 -0
pythonclaw/templates/skills/data/csv_analyzer/SKILL.md +51 -0
pythonclaw/templates/skills/data/csv_analyzer/__pycache__/analyze.cpython-311.pyc +0 -0
pythonclaw/templates/skills/data/csv_analyzer/analyze.py +138 -0
pythonclaw/templates/skills/data/finance/SKILL.md +41 -0
pythonclaw/templates/skills/data/finance/__pycache__/fetch_quote.cpython-311.pyc +0 -0
pythonclaw/templates/skills/data/finance/fetch_quote.py +118 -0
pythonclaw/templates/skills/data/news/SKILL.md +39 -0
pythonclaw/templates/skills/data/news/__pycache__/search_news.cpython-311.pyc +0 -0
pythonclaw/templates/skills/data/news/search_news.py +57 -0
pythonclaw/templates/skills/data/pdf_reader/SKILL.md +40 -0
pythonclaw/templates/skills/data/pdf_reader/__pycache__/read_pdf.cpython-311.pyc +0 -0
pythonclaw/templates/skills/data/pdf_reader/read_pdf.py +113 -0
pythonclaw/templates/skills/data/scraper/SKILL.md +39 -0
pythonclaw/templates/skills/data/scraper/__pycache__/scrape.cpython-311.pyc +0 -0
pythonclaw/templates/skills/data/scraper/scrape.py +92 -0
pythonclaw/templates/skills/data/weather/SKILL.md +42 -0
pythonclaw/templates/skills/data/weather/__pycache__/weather.cpython-311.pyc +0 -0
pythonclaw/templates/skills/data/weather/weather.py +142 -0
pythonclaw/templates/skills/data/youtube/SKILL.md +43 -0
pythonclaw/templates/skills/data/youtube/__pycache__/youtube_info.cpython-311.pyc +0 -0
pythonclaw/templates/skills/data/youtube/youtube_info.py +167 -0
pythonclaw/templates/skills/dev/CATEGORY.md +4 -0
pythonclaw/templates/skills/dev/code_runner/SKILL.md +46 -0
pythonclaw/templates/skills/dev/code_runner/__pycache__/run_code.cpython-311.pyc +0 -0
pythonclaw/templates/skills/dev/code_runner/run_code.py +117 -0
pythonclaw/templates/skills/dev/github/SKILL.md +52 -0
pythonclaw/templates/skills/dev/github/__pycache__/gh.cpython-311.pyc +0 -0
pythonclaw/templates/skills/dev/github/gh.py +165 -0
pythonclaw/templates/skills/dev/http_request/SKILL.md +40 -0
pythonclaw/templates/skills/dev/http_request/__pycache__/request.cpython-311.pyc +0 -0
pythonclaw/templates/skills/dev/http_request/request.py +90 -0
pythonclaw/templates/skills/google/CATEGORY.md +4 -0
pythonclaw/templates/skills/google/workspace/SKILL.md +98 -0
pythonclaw/templates/skills/google/workspace/check_setup.sh +52 -0
pythonclaw/templates/skills/meta/CATEGORY.md +4 -0
pythonclaw/templates/skills/meta/skill_creator/SKILL.md +151 -0
pythonclaw/templates/skills/system/CATEGORY.md +4 -0
pythonclaw/templates/skills/system/change_persona/SKILL.md +41 -0
pythonclaw/templates/skills/system/change_setting/SKILL.md +65 -0
pythonclaw/templates/skills/system/change_setting/__pycache__/update_config.cpython-311.pyc +0 -0
pythonclaw/templates/skills/system/change_setting/update_config.py +129 -0
pythonclaw/templates/skills/system/change_soul/SKILL.md +41 -0
pythonclaw/templates/skills/system/onboarding/SKILL.md +63 -0
pythonclaw/templates/skills/system/onboarding/__pycache__/write_identity.cpython-311.pyc +0 -0
pythonclaw/templates/skills/system/onboarding/write_identity.py +218 -0
pythonclaw/templates/skills/system/random/SKILL.md +33 -0
pythonclaw/templates/skills/system/random/__pycache__/random_util.cpython-311.pyc +0 -0
pythonclaw/templates/skills/system/random/random_util.py +45 -0
pythonclaw/templates/skills/system/time/SKILL.md +33 -0
pythonclaw/templates/skills/system/time/__pycache__/time_util.cpython-311.pyc +0 -0
pythonclaw/templates/skills/system/time/time_util.py +81 -0
pythonclaw/templates/skills/text/CATEGORY.md +4 -0
pythonclaw/templates/skills/text/translator/SKILL.md +47 -0
pythonclaw/templates/skills/text/translator/__pycache__/translate.cpython-311.pyc +0 -0
pythonclaw/templates/skills/text/translator/translate.py +66 -0
pythonclaw/templates/skills/web/CATEGORY.md +4 -0
pythonclaw/templates/skills/web/tavily/SKILL.md +61 -0
pythonclaw/templates/soul/SOUL.md +54 -0
pythonclaw/web/__init__.py +1 -0
pythonclaw/web/app.py +585 -0
pythonclaw/web/static/favicon.png +0 -0
pythonclaw/web/static/index.html +1318 -0
pythonclaw/web/static/logo.png +0 -0
pythonclaw-0.2.0.dist-info/METADATA +410 -0
pythonclaw-0.2.0.dist-info/RECORD +112 -0
pythonclaw-0.2.0.dist-info/WHEEL +5 -0
pythonclaw-0.2.0.dist-info/entry_points.txt +2 -0
pythonclaw-0.2.0.dist-info/licenses/LICENSE +21 -0
pythonclaw-0.2.0.dist-info/top_level.txt +1 -0

pythonclaw/templates/skills/data/csv_analyzer/analyze.py ADDED Viewed

@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""Analyze CSV/Excel files with pandas."""
+import argparse
+import json
+import sys
+try:
+    import pandas as pd
+except ImportError:
+    print("Error: pandas not installed.  Run: pip install pandas openpyxl",
+          file=sys.stderr)
+    sys.exit(1)
+def load_data(path: str, columns: str | None = None) -> pd.DataFrame:
+    ext = path.rsplit(".", 1)[-1].lower()
+    if ext in ("xls", "xlsx"):
+        df = pd.read_excel(path)
+    elif ext == "tsv":
+        df = pd.read_csv(path, sep="\t")
+    else:
+        df = pd.read_csv(path)
+    if columns:
+        cols = [c.strip() for c in columns.split(",")]
+        df = df[cols]
+    return df
+def cmd_info(df: pd.DataFrame, as_json: bool) -> None:
+    info = {
+        "shape": list(df.shape),
+        "columns": [
+            {"name": c, "dtype": str(df[c].dtype), "missing": int(df[c].isna().sum())}
+            for c in df.columns
+        ],
+        "memoryMB": round(df.memory_usage(deep=True).sum() / 1e6, 2),
+    }
+    if as_json:
+        print(json.dumps(info, indent=2))
+    else:
+        print(f"Shape: {df.shape[0]} rows x {df.shape[1]} columns")
+        print(f"Memory: {info['memoryMB']} MB\n")
+        print(f"{'Column':<30} {'Type':<15} {'Missing'}")
+        print("-" * 55)
+        for c in info["columns"]:
+            print(f"{c['name']:<30} {c['dtype']:<15} {c['missing']}")
+def cmd_head(df: pd.DataFrame, rows: int, as_json: bool) -> None:
+    subset = df.head(rows)
+    if as_json:
+        print(subset.to_json(orient="records", indent=2, force_ascii=False))
+    else:
+        print(subset.to_string(index=False))
+def cmd_stats(df: pd.DataFrame, as_json: bool) -> None:
+    numeric = df.select_dtypes(include="number")
+    if numeric.empty:
+        print("No numeric columns found.")
+        return
+    desc = numeric.describe()
+    if as_json:
+        print(desc.to_json(indent=2))
+    else:
+        print(desc.to_string())
+def cmd_query(df: pd.DataFrame, expr: str, rows: int, as_json: bool) -> None:
+    result = df.query(expr)
+    subset = result.head(rows)
+    print(f"Matched {len(result)} rows (showing first {min(rows, len(result))}):\n")
+    if as_json:
+        print(subset.to_json(orient="records", indent=2, force_ascii=False))
+    else:
+        print(subset.to_string(index=False))
+def cmd_groupby(df: pd.DataFrame, col: str, agg: str, as_json: bool) -> None:
+    numeric = df.select_dtypes(include="number").columns.tolist()
+    if col in numeric:
+        numeric.remove(col)
+    if not numeric:
+        print("No numeric columns to aggregate.")
+        return
+    result = df.groupby(col)[numeric].agg(agg).reset_index()
+    if as_json:
+        print(result.to_json(orient="records", indent=2, force_ascii=False))
+    else:
+        print(result.to_string(index=False))
+def main():
+    parser = argparse.ArgumentParser(description="Analyze CSV/Excel files.")
+    parser.add_argument("path", help="Data file path (.csv, .tsv, .xlsx)")
+    parser.add_argument("command", nargs="?", default="info",
+                        choices=["info", "head", "stats", "query", "groupby", "columns"])
+    parser.add_argument("--rows", type=int, default=10)
+    parser.add_argument("--query", dest="expr", default=None)
+    parser.add_argument("--groupby", default=None)
+    parser.add_argument("--agg", default="mean",
+                        choices=["mean", "sum", "count", "min", "max"])
+    parser.add_argument("--columns", default=None)
+    parser.add_argument("--format", choices=["text", "json"], default="text")
+    args = parser.parse_args()
+    try:
+        df = load_data(args.path, args.columns)
+    except Exception as exc:
+        print(f"Error loading {args.path}: {exc}", file=sys.stderr)
+        sys.exit(1)
+    as_json = args.format == "json"
+    if args.command == "info":
+        cmd_info(df, as_json)
+    elif args.command == "head":
+        cmd_head(df, args.rows, as_json)
+    elif args.command == "stats":
+        cmd_stats(df, as_json)
+    elif args.command == "query":
+        if not args.expr:
+            print("Error: --query expression required.", file=sys.stderr)
+            sys.exit(1)
+        cmd_query(df, args.expr, args.rows, as_json)
+    elif args.command == "groupby":
+        if not args.groupby:
+            print("Error: --groupby column required.", file=sys.stderr)
+            sys.exit(1)
+        cmd_groupby(df, args.groupby, args.agg, as_json)
+    elif args.command == "columns":
+        for c in df.columns:
+            print(f"  {c}  ({df[c].dtype})")
+if __name__ == "__main__":
+    main()

pythonclaw/templates/skills/data/finance/SKILL.md ADDED Viewed

@@ -0,0 +1,41 @@
+---
+name: finance
+description: >
+  Fetch stock quotes, crypto prices, forex rates, and financial data.
+  Use when the user asks about any stock price, market data, company
+  financials, or cryptocurrency price.
+---
+## Instructions
+Fetch real-time financial data for stocks, crypto, and forex using
+Yahoo Finance (via the `yfinance` library).
+### Prerequisites
+Install the dependency: `pip install yfinance`
+No API key needed — Yahoo Finance is free.
+### Usage
+```bash
+python {skill_path}/fetch_quote.py SYMBOL [SYMBOL2 ...]
+```
+Options:
+- `--format json` — output as JSON (default: human-readable text)
+- `--history 5d` — include price history (1d, 5d, 1mo, 3mo, 6mo, 1y, 5y, max)
+### Examples
+- "What's Tesla's stock price?" → `python {skill_path}/fetch_quote.py TSLA`
+- "Compare AAPL and MSFT" → `python {skill_path}/fetch_quote.py AAPL MSFT`
+- "Show Bitcoin price" → `python {skill_path}/fetch_quote.py BTC-USD`
+- "EUR/USD exchange rate" → `python {skill_path}/fetch_quote.py EURUSD=X`
+## Resources
+| File | Description |
+|------|-------------|
+| `fetch_quote.py` | Multi-symbol financial data fetcher |

pythonclaw/templates/skills/data/finance/__pycache__/fetch_quote.cpython-311.pyc ADDED Viewed

Binary file

pythonclaw/templates/skills/data/finance/fetch_quote.py ADDED Viewed

@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""Fetch financial quotes from Yahoo Finance."""
+import argparse
+import json
+import sys
+try:
+    import yfinance as yf
+except ImportError:
+    print("Error: yfinance not installed.  Run: pip install yfinance", file=sys.stderr)
+    sys.exit(1)
+def fetch_quote(symbol: str, history: str | None = None) -> dict:
+    ticker = yf.Ticker(symbol)
+    info = ticker.info
+    result = {
+        "symbol": symbol.upper(),
+        "name": info.get("shortName") or info.get("longName", symbol),
+        "price": info.get("currentPrice") or info.get("regularMarketPrice"),
+        "currency": info.get("currency", "USD"),
+        "change": info.get("regularMarketChange"),
+        "changePercent": info.get("regularMarketChangePercent"),
+        "dayHigh": info.get("dayHigh"),
+        "dayLow": info.get("dayLow"),
+        "volume": info.get("volume"),
+        "marketCap": info.get("marketCap"),
+        "fiftyTwoWeekHigh": info.get("fiftyTwoWeekHigh"),
+        "fiftyTwoWeekLow": info.get("fiftyTwoWeekLow"),
+    }
+    if history:
+        hist = ticker.history(period=history)
+        if not hist.empty:
+            records = []
+            for date, row in hist.iterrows():
+                records.append({
+                    "date": date.strftime("%Y-%m-%d"),
+                    "close": round(row["Close"], 2),
+                    "volume": int(row["Volume"]),
+                })
+            result["history"] = records
+    return result
+def format_text(data: dict) -> str:
+    lines = [f"{data['name']} ({data['symbol']})"]
+    price = data.get("price")
+    if price is not None:
+        ccy = data.get("currency", "")
+        change = data.get("change")
+        pct = data.get("changePercent")
+        change_str = ""
+        if change is not None and pct is not None:
+            sign = "+" if change >= 0 else ""
+            change_str = f"  {sign}{change:.2f} ({sign}{pct:.2f}%)"
+        lines.append(f"  Price: {ccy} {price:.2f}{change_str}")
+    for label, key in [("Day Range", None), ("52-Week", None),
+                       ("Volume", "volume"), ("Market Cap", "marketCap")]:
+        if key and data.get(key) is not None:
+            val = data[key]
+            if val >= 1e12:
+                lines.append(f"  {label}: {val/1e12:.2f}T")
+            elif val >= 1e9:
+                lines.append(f"  {label}: {val/1e9:.2f}B")
+            elif val >= 1e6:
+                lines.append(f"  {label}: {val/1e6:.2f}M")
+            else:
+                lines.append(f"  {label}: {val:,.0f}")
+    lo, hi = data.get("dayLow"), data.get("dayHigh")
+    if lo and hi:
+        lines.append(f"  Day Range: {lo:.2f} - {hi:.2f}")
+    lo52, hi52 = data.get("fiftyTwoWeekLow"), data.get("fiftyTwoWeekHigh")
+    if lo52 and hi52:
+        lines.append(f"  52-Week: {lo52:.2f} - {hi52:.2f}")
+    if "history" in data:
+        lines.append(f"  History ({len(data['history'])} points):")
+        for h in data["history"][-5:]:
+            lines.append(f"    {h['date']}: {h['close']}")
+    return "\n".join(lines)
+def main():
+    parser = argparse.ArgumentParser(description="Fetch financial quotes.")
+    parser.add_argument("symbols", nargs="+", help="Ticker symbols (e.g. TSLA AAPL BTC-USD)")
+    parser.add_argument("--format", choices=["text", "json"], default="text")
+    parser.add_argument("--history", default=None, help="Price history period (1d,5d,1mo,3mo,6mo,1y,5y,max)")
+    args = parser.parse_args()
+    results = []
+    for sym in args.symbols:
+        try:
+            data = fetch_quote(sym.strip(), history=args.history)
+            results.append(data)
+        except Exception as exc:
+            results.append({"symbol": sym, "error": str(exc)})
+    if args.format == "json":
+        print(json.dumps(results, indent=2))
+    else:
+        for data in results:
+            if "error" in data:
+                print(f"{data['symbol']}: Error — {data['error']}")
+            else:
+                print(format_text(data))
+            print()
+if __name__ == "__main__":
+    main()

pythonclaw/templates/skills/data/news/SKILL.md ADDED Viewed

@@ -0,0 +1,39 @@
+---
+name: news
+description: >
+  Search and summarise news on any topic using web search.
+  Use when the user asks about recent news, current events, or wants
+  a news briefing on any subject.
+---
+## Instructions
+Search for recent news on any topic.  This skill uses the built-in
+`web_search` tool (Tavily) or falls back to a script that uses
+DuckDuckGo if Tavily is not configured.
+### Usage
+**Option A — use the `web_search` tool directly** (preferred when Tavily is configured):
+```
+web_search(query="latest news about <topic>", topic="news", max_results=10)
+```
+**Option B — use the bundled script** (works without Tavily):
+```bash
+python {skill_path}/search_news.py "topic" [--max 10]
+```
+### Examples
+- "What's happening in the tech industry today?"
+- "Give me the latest AI news"
+- "News about the 2026 World Cup"
+## Resources
+| File | Description |
+|------|-------------|
+| `search_news.py` | Fallback news search via DuckDuckGo |

pythonclaw/templates/skills/data/news/__pycache__/search_news.cpython-311.pyc ADDED Viewed

Binary file

pythonclaw/templates/skills/data/news/search_news.py ADDED Viewed

@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+"""Search for recent news using DuckDuckGo (no API key required)."""
+import argparse
+import json
+import sys
+try:
+    from duckduckgo_search import DDGS
+except ImportError:
+    print("Error: duckduckgo-search not installed.  Run: pip install duckduckgo-search", file=sys.stderr)
+    sys.exit(1)
+def search_news(query: str, max_results: int = 10) -> list[dict]:
+    with DDGS() as ddgs:
+        results = list(ddgs.news(query, max_results=max_results))
+    return [
+        {
+            "title": r.get("title", ""),
+            "url": r.get("url", ""),
+            "source": r.get("source", ""),
+            "date": r.get("date", ""),
+            "body": r.get("body", "")[:300],
+        }
+        for r in results
+    ]
+def main():
+    parser = argparse.ArgumentParser(description="Search news on any topic.")
+    parser.add_argument("query", help="News search query")
+    parser.add_argument("--max", type=int, default=10, help="Max results (default: 10)")
+    parser.add_argument("--format", choices=["text", "json"], default="text")
+    args = parser.parse_args()
+    results = search_news(args.query, max_results=args.max)
+    if args.format == "json":
+        print(json.dumps(results, indent=2, ensure_ascii=False))
+    else:
+        if not results:
+            print("No news found.")
+            return
+        for i, r in enumerate(results, 1):
+            print(f"{i}. {r['title']}")
+            if r["source"]:
+                print(f"   Source: {r['source']}  Date: {r['date']}")
+            if r["url"]:
+                print(f"   {r['url']}")
+            if r["body"]:
+                print(f"   {r['body']}")
+            print()
+if __name__ == "__main__":
+    main()

pythonclaw/templates/skills/data/pdf_reader/SKILL.md ADDED Viewed

@@ -0,0 +1,40 @@
+---
+name: pdf_reader
+description: >
+  Extract text content from PDF files. Supports multi-page PDFs,
+  page-level extraction, and metadata reading. Use when the user asks
+  to read, extract, or analyze content from a PDF document.
+---
+## Instructions
+Extract text and metadata from PDF files.
+### Prerequisites
+Install dependency: `pip install PyPDF2`
+### Usage
+```bash
+python {skill_path}/read_pdf.py PATH_TO_PDF [options]
+```
+Options:
+- `--pages 1-5` — extract only specific pages (1-indexed, supports ranges)
+- `--metadata` — include PDF metadata (author, title, creation date)
+- `--format json` — output as JSON
+- `--summary` — show page count and character count overview only
+### Examples
+- "Read this PDF" → `python {skill_path}/read_pdf.py document.pdf`
+- "Extract pages 2-4 from report.pdf" → `python {skill_path}/read_pdf.py report.pdf --pages 2-4`
+- "What's in this PDF?" → `python {skill_path}/read_pdf.py file.pdf --summary`
+- "Get PDF metadata" → `python {skill_path}/read_pdf.py file.pdf --metadata`
+## Resources
+| File | Description |
+|------|-------------|
+| `read_pdf.py` | PDF text extractor |

pythonclaw/templates/skills/data/pdf_reader/__pycache__/read_pdf.cpython-311.pyc ADDED Viewed

Binary file

pythonclaw/templates/skills/data/pdf_reader/read_pdf.py ADDED Viewed

@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""Extract text and metadata from PDF files."""
+import argparse
+import json
+import sys
+try:
+    from PyPDF2 import PdfReader
+except ImportError:
+    print("Error: PyPDF2 not installed.  Run: pip install PyPDF2", file=sys.stderr)
+    sys.exit(1)
+def parse_page_range(spec: str, total: int) -> list[int]:
+    """Parse a page range like '1-5' or '2,4,6' into 0-based indices."""
+    pages = set()
+    for part in spec.split(","):
+        part = part.strip()
+        if "-" in part:
+            start, end = part.split("-", 1)
+            start = max(1, int(start))
+            end = min(total, int(end))
+            pages.update(range(start - 1, end))
+        else:
+            idx = int(part) - 1
+            if 0 <= idx < total:
+                pages.add(idx)
+    return sorted(pages)
+def extract_text(path: str, pages: list[int] | None = None) -> dict:
+    reader = PdfReader(path)
+    total = len(reader.pages)
+    if pages is None:
+        pages = list(range(total))
+    extracted = []
+    for i in pages:
+        if 0 <= i < total:
+            text = reader.pages[i].extract_text() or ""
+            extracted.append({"page": i + 1, "text": text})
+    meta_raw = reader.metadata
+    metadata = {}
+    if meta_raw:
+        for key in ("title", "author", "subject", "creator", "producer"):
+            val = getattr(meta_raw, key, None)
+            if val:
+                metadata[key] = str(val)
+        if meta_raw.creation_date:
+            metadata["created"] = str(meta_raw.creation_date)
+    return {
+        "path": path,
+        "totalPages": total,
+        "extractedPages": len(extracted),
+        "metadata": metadata,
+        "pages": extracted,
+    }
+def main():
+    parser = argparse.ArgumentParser(description="Extract text from PDF files.")
+    parser.add_argument("path", help="Path to the PDF file")
+    parser.add_argument("--pages", default=None, help="Page range (e.g. '1-5' or '2,4,6')")
+    parser.add_argument("--metadata", action="store_true", help="Show metadata only")
+    parser.add_argument("--summary", action="store_true", help="Show summary only")
+    parser.add_argument("--format", choices=["text", "json"], default="text")
+    args = parser.parse_args()
+    try:
+        reader = PdfReader(args.path)
+        total = len(reader.pages)
+    except Exception as exc:
+        print(f"Error opening PDF: {exc}", file=sys.stderr)
+        sys.exit(1)
+    page_indices = None
+    if args.pages:
+        page_indices = parse_page_range(args.pages, total)
+    data = extract_text(args.path, page_indices)
+    if args.format == "json":
+        print(json.dumps(data, indent=2, ensure_ascii=False))
+        return
+    if args.metadata:
+        print(f"File: {args.path}  ({total} pages)")
+        for k, v in data["metadata"].items():
+            print(f"  {k}: {v}")
+        return
+    if args.summary:
+        total_chars = sum(len(p["text"]) for p in data["pages"])
+        print(f"File: {args.path}")
+        print(f"  Pages: {total}")
+        print(f"  Characters: {total_chars:,}")
+        for p in data["pages"]:
+            print(f"  Page {p['page']}: {len(p['text']):,} chars")
+        return
+    print(f"File: {args.path}  ({data['extractedPages']}/{total} pages)\n")
+    for p in data["pages"]:
+        print(f"--- Page {p['page']} ---")
+        print(p["text"][:5000] if len(p["text"]) > 5000 else p["text"])
+        print()
+if __name__ == "__main__":
+    main()

pythonclaw/templates/skills/data/scraper/SKILL.md ADDED Viewed

@@ -0,0 +1,39 @@
+---
+name: web_scraper
+description: >
+  Scrape and extract content from web pages. Supports extracting text,
+  links, headings, and structured data. Use when the user asks to read
+  a web page, extract information from a URL, or scrape website content.
+---
+## Instructions
+Scrape and extract readable content from any web page.
+### Prerequisites
+Install dependencies: `pip install requests beautifulsoup4`
+### Usage
+```bash
+python {skill_path}/scrape.py URL [--format text|json|links|headings]
+```
+Formats:
+- `text` (default) — cleaned readable text
+- `json` — structured JSON with title, text, links, headings
+- `links` — all links on the page
+- `headings` — all headings (h1–h6)
+### Examples
+- "Read the content of https://example.com"
+- "Extract all links from https://news.ycombinator.com"
+- "What does this page say? https://some-article.com/post"
+## Resources
+| File | Description |
+|------|-------------|
+| `scrape.py` | Generic web page scraper |

pythonclaw/templates/skills/data/scraper/__pycache__/scrape.cpython-311.pyc ADDED Viewed

Binary file

pythonclaw/templates/skills/data/scraper/scrape.py ADDED Viewed

@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""Scrape and extract content from a web page."""
+import argparse
+import json
+import sys
+try:
+    import requests
+    from bs4 import BeautifulSoup
+except ImportError:
+    print(
+        "Error: requests and/or beautifulsoup4 not installed.\n"
+        "Run: pip install requests beautifulsoup4",
+        file=sys.stderr,
+    )
+    sys.exit(1)
+HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+    )
+}
+def scrape(url: str) -> dict:
+    resp = requests.get(url, headers=HEADERS, timeout=30)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+    for tag in soup(["script", "style", "nav", "footer", "header"]):
+        tag.decompose()
+    title = soup.title.string.strip() if soup.title and soup.title.string else ""
+    text = soup.get_text(separator="\n", strip=True)
+    links = []
+    for a in soup.find_all("a", href=True):
+        href = a["href"]
+        label = a.get_text(strip=True)
+        if href.startswith("http"):
+            links.append({"text": label, "url": href})
+    headings = []
+    for level in range(1, 7):
+        for h in soup.find_all(f"h{level}"):
+            headings.append({"level": level, "text": h.get_text(strip=True)})
+    return {
+        "url": url,
+        "title": title,
+        "text": text[:10000],
+        "links": links[:100],
+        "headings": headings,
+    }
+def main():
+    parser = argparse.ArgumentParser(description="Scrape a web page.")
+    parser.add_argument("url", help="URL to scrape")
+    parser.add_argument(
+        "--format",
+        choices=["text", "json", "links", "headings"],
+        default="text",
+        help="Output format",
+    )
+    args = parser.parse_args()
+    try:
+        data = scrape(args.url)
+    except Exception as exc:
+        print(f"Error scraping {args.url}: {exc}", file=sys.stderr)
+        sys.exit(1)
+    if args.format == "json":
+        print(json.dumps(data, indent=2, ensure_ascii=False))
+    elif args.format == "links":
+        for link in data["links"]:
+            print(f"  {link['text']} -> {link['url']}")
+    elif args.format == "headings":
+        for h in data["headings"]:
+            indent = "  " * (h["level"] - 1)
+            print(f"{indent}h{h['level']}: {h['text']}")
+    else:
+        print(f"Title: {data['title']}\n")
+        print(data["text"][:5000])
+if __name__ == "__main__":
+    main()