PyPI - parallel-web-tools - Versions diffs - 0.2.0__tar.gz → 0.3.0rc2__tar.gz - Mend

parallel-web-tools 0.2.0tar.gz → 0.3.0rc2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/.gitignore RENAMED Viewed

@@ -12,6 +12,8 @@ wheels/
 # Environment files
 .env.local
 .env
+.envrc
+.direnv
 # Data files
 data/

{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: parallel-web-tools
-Version: 0.2.0
+Version: 0.3.0rc2
 Summary: Parallel Tools: CLI and Python SDK for AI-powered web intelligence
 Project-URL: Homepage, https://github.com/parallel-web/parallel-web-tools
 Project-URL: Documentation, https://docs.parallel.ai
@@ -51,19 +51,19 @@ Requires-Dist: duckdb>=1.0.0; extra == 'dev'
 Requires-Dist: nest-asyncio>=1.6.0; extra == 'dev'
 Requires-Dist: pandas>=2.3.0; extra == 'dev'
 Requires-Dist: polars>=1.37.0; extra == 'dev'
-Requires-Dist: pre-commit>=4.0.0; extra == 'dev'
+Requires-Dist: pre-commit>=4.6.0; extra == 'dev'
 Requires-Dist: pyarrow>=18.0.0; extra == 'dev'
-Requires-Dist: pyinstaller>=6.0.0; extra == 'dev'
+Requires-Dist: pyinstaller>=6.20.0; extra == 'dev'
 Requires-Dist: pyspark>=3.4.0; extra == 'dev'
-Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
-Requires-Dist: pytest>=8.0.0; extra == 'dev'
+Requires-Dist: pytest-cov>=7.0.0; extra == 'dev'
+Requires-Dist: pytest>=9.0.0; extra == 'dev'
 Requires-Dist: pyyaml>=6.0.0; extra == 'dev'
 Requires-Dist: questionary>=2.0.0; extra == 'dev'
-Requires-Dist: ruff>=0.14.0; extra == 'dev'
+Requires-Dist: ruff>=0.15.0; extra == 'dev'
 Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'dev'
 Requires-Dist: sqlalchemy-bigquery>=1.11.0; extra == 'dev'
 Requires-Dist: sqlalchemy>=2.0.0; extra == 'dev'
-Requires-Dist: ty>=0.0.21; extra == 'dev'
+Requires-Dist: ty>=0.0.33; extra == 'dev'
 Provides-Extra: duckdb
 Requires-Dist: duckdb>=1.0.0; extra == 'duckdb'
 Requires-Dist: nest-asyncio>=1.6.0; extra == 'duckdb'

{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/__init__.py RENAMED Viewed

@@ -29,7 +29,7 @@ from parallel_web_tools.core import (
     run_tasks,
 )
-__version__ = "0.2.0"
+__version__ = "0.3.0rc2"
 __all__ = [
     # Auth

{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/commands.py RENAMED Viewed

@@ -7,7 +7,7 @@ import os
 import sys
 import tempfile
 import time
-from typing import Any
+from typing import Any, NoReturn
 import click
 import httpx
@@ -15,6 +15,7 @@ from dotenv import load_dotenv
 from rich.console import Console
 from parallel_web_tools import __version__
+from parallel_web_tools.cli.skills import create_skills_group
 from parallel_web_tools.core import (
     AVAILABLE_PROCESSORS,
     FINDALL_GENERATORS,
@@ -170,7 +171,7 @@ def _handle_error(
     output_json: bool = False,
     exit_code: int = EXIT_API_ERROR,
     prefix: str = "Error",
-) -> None:
+) -> NoReturn:
     """Handle an error with appropriate output format and exit code.
     In --json mode, outputs structured JSON to stdout. Otherwise, prints a
@@ -318,12 +319,12 @@ def parse_inline_data(data_json: str) -> tuple[str, list[dict[str, str]]]:
         raise click.BadParameter("Data must be an array of objects")
     # Infer columns from the first row
-    columns = list(data[0].keys())
+    columns: list[str] = [str(k) for k in data[0].keys()]
     if not columns:
         raise click.BadParameter("Data objects must have at least one field")
     # Create source_columns with inferred descriptions
-    source_columns = [{"name": col, "description": f"The {col} field"} for col in columns]
+    source_columns: list[dict[str, str]] = [{"name": col, "description": f"The {col} field"} for col in columns]
     # Write to a temporary CSV file
     temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False, newline="")
@@ -647,39 +648,116 @@ def config_cmd(key: str | None, value: str | None, output_json: bool):
             console.print(f"[green]Set {key} = {format_bool(is_auto_update_check_enabled())}[/green]")
+main.add_command(create_skills_group(console, _handle_error, EXIT_BAD_INPUT, EXIT_API_ERROR))
 # =============================================================================
 # Search Command
 # =============================================================================
+# Beta -> V1 mode mapping. Beta had three modes; V1 has two. We keep the old
+# values as accepted CLI inputs and translate them so existing scripts work.
+_SEARCH_MODE_MAP = {
+    "fast": "basic",
+    "one-shot": "basic",
+    "agentic": "advanced",
+    "basic": "basic",
+    "advanced": "advanced",
+}
+_DEPRECATED_SEARCH_MODES = {"fast", "one-shot", "agentic"}
+def _emit_deprecation(message: str) -> None:
+    """Print a deprecation notice to stderr so it doesn't pollute --json output."""
+    click.echo(f"[deprecated] {message}", err=True)
+def build_search_v1_kwargs(
+    *,
+    objective: str | None,
+    query: tuple[str, ...] | list[str],
+    mode: str | None,
+    max_results: int | None,
+    source_policy: dict[str, Any] | None,
+    excerpt_max_chars_per_result: int | None,
+    excerpt_max_chars_total: int | None,
+    fetch_policy: dict[str, Any] | None,
+    location: str | None = None,
+    session_id: str | None = None,
+    client_model: str | None = None,
+) -> dict[str, Any]:
+    """Translate Beta-style search params to V1 client.search() kwargs.
+    V1 requires search_queries; if the caller only provided an objective, we
+    fall back to using it as the single query so older invocations keep working.
+    """
+    queries = list(query) if query else []
+    if not queries and objective:
+        queries = [objective]
+    kwargs: dict[str, Any] = {"search_queries": queries}
+    if objective:
+        kwargs["objective"] = objective
+    if mode:
+        kwargs["mode"] = _SEARCH_MODE_MAP.get(mode, mode)
+    if excerpt_max_chars_total is not None:
+        kwargs["max_chars_total"] = excerpt_max_chars_total
+    if session_id:
+        kwargs["session_id"] = session_id
+    if client_model:
+        kwargs["client_model"] = client_model
+    advanced: dict[str, Any] = {}
+    if max_results is not None:
+        advanced["max_results"] = max_results
+    if source_policy:
+        advanced["source_policy"] = source_policy
+    if fetch_policy:
+        advanced["fetch_policy"] = fetch_policy
+    if excerpt_max_chars_per_result is not None:
+        advanced["excerpt_settings"] = {"max_chars_per_result": excerpt_max_chars_per_result}
+    if location:
+        advanced["location"] = location
+    if advanced:
+        kwargs["advanced_settings"] = advanced
+    return kwargs
 @main.command()
 @click.argument("objective", required=False)
 @click.option("-q", "--query", multiple=True, help="Keyword search query (can be repeated)")
 @click.option(
     "--mode",
-    type=click.Choice(["one-shot", "agentic", "fast"]),
-    default="fast",
-    help="Search mode",
+    type=click.Choice(list(_SEARCH_MODE_MAP.keys())),
+    default="basic",
+    help="Search mode (one-shot/fast → basic, agentic → advanced)",
     show_default=True,
 )
-@click.option("--max-results", type=int, default=10, help="Maximum results", show_default=True)
+@click.option("--max-results", type=int, help="Maximum results (defaults to server-side default of 10)")
 @click.option("--include-domains", multiple=True, help="Only search these domains (comma-separated or repeated)")
 @click.option("--exclude-domains", multiple=True, help="Exclude these domains (comma-separated or repeated)")
 @click.option("--after-date", help="Only results after this date (YYYY-MM-DD)")
-@click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts")
+@click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts (min 1000)")
 @click.option(
     "--excerpt-max-chars-total", type=int, default=60000, help="Max total characters for excerpts", show_default=True
 )
 @click.option("--max-age-seconds", type=int, help="Max age in seconds before fetching live content (min 600)")
 @click.option("--timeout-seconds", type=float, help="Timeout in seconds for fetching live content")
 @click.option("--disable-cache-fallback", is_flag=True, help="Return error instead of stale cached content")
+@click.option("--location", help="ISO 3166-1 alpha-2 country code for geo-targeted results (e.g. us, gb, de)")
+@click.option("--session-id", help="Session ID to group related search/extract calls")
+@click.option(
+    "--client-model",
+    help="The model generating this request and consuming the results (e.g. claude-opus-4-7, gpt-5.4, gemini-3.1-pro)",
+)
 @click.option("-o", "--output", "output_file", type=click.Path(), help="Save results to file (JSON)")
 @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
 def search(
     objective: str | None,
     query: tuple[str, ...],
     mode: str,
-    max_results: int,
+    max_results: int | None,
     include_domains: tuple[str, ...],
     exclude_domains: tuple[str, ...],
     after_date: str | None,
@@ -688,6 +766,9 @@ def search(
     max_age_seconds: int | None,
     timeout_seconds: float | None,
     disable_cache_fallback: bool,
+    location: str | None,
+    session_id: str | None,
+    client_model: str | None,
     output_file: str | None,
     output_json: bool,
 ):
@@ -704,6 +785,24 @@ def search(
     if not objective and not query:
         raise click.UsageError("Provide an OBJECTIVE argument or at least one --query option.")
+    if mode in _DEPRECATED_SEARCH_MODES:
+        new_mode = _SEARCH_MODE_MAP[mode]
+        _emit_deprecation(
+            f"--mode {mode} is a Beta value and will stop working after the Beta API sunset (June 2026). "
+            f"Use --mode {new_mode} instead."
+        )
+    source_policy: dict[str, Any] = {}
+    if include_domains:
+        source_policy["include_domains"] = parse_comma_separated(include_domains)
+    if exclude_domains:
+        source_policy["exclude_domains"] = parse_comma_separated(exclude_domains)
+    domain_total = len(source_policy.get("include_domains", [])) + len(source_policy.get("exclude_domains", []))
+    if domain_total > 200:
+        raise click.UsageError(f"--include-domains and --exclude-domains combined must be <= 200 (got {domain_total}).")
+    if after_date:
+        source_policy["after_date"] = after_date
     try:
         from parallel import Parallel
@@ -712,29 +811,6 @@ def search(
         api_key = get_api_key()
         client = Parallel(api_key=api_key, default_headers=get_default_headers("cli"))
-        search_kwargs: dict[str, Any] = {"mode": mode, "max_results": max_results}
-        if objective:
-            search_kwargs["objective"] = objective
-        if query:
-            search_kwargs["search_queries"] = list(query)
-        source_policy: dict[str, Any] = {}
-        if include_domains:
-            source_policy["include_domains"] = parse_comma_separated(include_domains)
-        if exclude_domains:
-            source_policy["exclude_domains"] = parse_comma_separated(exclude_domains)
-        if after_date:
-            source_policy["after_date"] = after_date
-        if source_policy:
-            search_kwargs["source_policy"] = source_policy
-        # Excerpt settings (max_chars_total has a default, so always set)
-        excerpts_settings: dict[str, Any] = {"max_chars_total": excerpt_max_chars_total}
-        if excerpt_max_chars_per_result is not None:
-            excerpts_settings["max_chars_per_result"] = excerpt_max_chars_per_result
-        search_kwargs["excerpts"] = excerpts_settings
-        # Fetch policy
         fetch_policy: dict[str, Any] = {}
         if max_age_seconds is not None:
             fetch_policy["max_age_seconds"] = max_age_seconds
@@ -742,21 +818,35 @@ def search(
             fetch_policy["timeout_seconds"] = timeout_seconds
         if disable_cache_fallback:
             fetch_policy["disable_cache_fallback"] = True
-        if fetch_policy:
-            search_kwargs["fetch_policy"] = fetch_policy
+        search_kwargs = build_search_v1_kwargs(
+            objective=objective,
+            query=query,
+            mode=mode,
+            max_results=max_results,
+            source_policy=source_policy or None,
+            excerpt_max_chars_per_result=excerpt_max_chars_per_result,
+            excerpt_max_chars_total=excerpt_max_chars_total,
+            fetch_policy=fetch_policy or None,
+            location=location,
+            session_id=session_id,
+            client_model=client_model,
+        )
         if not output_json:
             console.print("[dim]Searching...[/dim]\n")
-        result = client.beta.search(**search_kwargs)
+        result = client.search(**search_kwargs)
         output_data = {
             "search_id": result.search_id,
+            "session_id": getattr(result, "session_id", None),
             "status": "ok",
             "results": [
                 {"url": r.url, "title": r.title, "publish_date": r.publish_date, "excerpts": r.excerpts}
                 for r in result.results
             ],
+            "usage": [{"name": u.name, "count": u.count} for u in (getattr(result, "usage", None) or [])],
             "warnings": [
                 {"type": w.type, "message": w.message, "detail": getattr(w, "detail", None)} for w in result.warnings
             ]
@@ -787,18 +877,69 @@ def search(
 # =============================================================================
+def build_extract_v1_kwargs(
+    *,
+    urls: tuple[str, ...] | list[str],
+    objective: str | None,
+    query: tuple[str, ...] | list[str],
+    full_content: bool,
+    full_content_max_chars: int | None,
+    excerpt_max_chars_per_result: int | None,
+    excerpt_max_chars_total: int | None,
+    fetch_policy: dict[str, Any] | None,
+    session_id: str | None = None,
+    client_model: str | None = None,
+) -> dict[str, Any]:
+    """Translate Beta-style extract params to V1 client.extract() kwargs.
+    Note: V1 always returns excerpts; the old `--no-excerpts` flag can no longer
+    disable them server-side. The CLI handles that flag by filtering excerpts out
+    of the output, not by passing it to the SDK.
+    """
+    kwargs: dict[str, Any] = {"urls": list(urls)}
+    if objective:
+        kwargs["objective"] = objective
+    if query:
+        kwargs["search_queries"] = list(query)
+    if excerpt_max_chars_total is not None:
+        kwargs["max_chars_total"] = excerpt_max_chars_total
+    if session_id:
+        kwargs["session_id"] = session_id
+    if client_model:
+        kwargs["client_model"] = client_model
+    advanced: dict[str, Any] = {}
+    if excerpt_max_chars_per_result is not None:
+        advanced["excerpt_settings"] = {"max_chars_per_result": excerpt_max_chars_per_result}
+    if full_content_max_chars is not None:
+        advanced["full_content"] = {"max_chars_per_result": full_content_max_chars}
+    elif full_content:
+        advanced["full_content"] = True
+    if fetch_policy:
+        advanced["fetch_policy"] = fetch_policy
+    if advanced:
+        kwargs["advanced_settings"] = advanced
+    return kwargs
 @main.command()
 @click.argument("urls", nargs=-1, required=True)
 @click.option("--objective", help="Focus extraction on a specific goal")
 @click.option("-q", "--query", multiple=True, help="Keywords to prioritize (can be repeated)")
 @click.option("--full-content", is_flag=True, help="Include complete page content")
 @click.option("--full-content-max-chars", type=int, help="Max characters per result for full content")
-@click.option("--no-excerpts", is_flag=True, help="Exclude excerpts from output")
+@click.option("--no-excerpts", is_flag=True, help="Strip excerpts from output (V1 always returns them server-side)")
 @click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts (min 1000)")
 @click.option("--excerpt-max-chars-total", type=int, help="Max total characters for excerpts across all URLs")
 @click.option("--max-age-seconds", type=int, help="Max age in seconds before fetching live content (min 600)")
 @click.option("--timeout-seconds", type=float, help="Timeout in seconds for fetching live content")
 @click.option("--disable-cache-fallback", is_flag=True, help="Return error instead of stale cached content")
+@click.option("--session-id", help="Session ID to group related search/extract calls")
+@click.option(
+    "--client-model",
+    help="The model generating this request and consuming the results (e.g. claude-opus-4-7, gpt-5.4, gemini-3.1-pro)",
+)
 @click.option("-o", "--output", "output_file", type=click.Path(), help="Save results to file (JSON)")
 @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
 def extract(
@@ -813,10 +954,23 @@ def extract(
     max_age_seconds: int | None,
     timeout_seconds: float | None,
     disable_cache_fallback: bool,
+    session_id: str | None,
+    client_model: str | None,
     output_file: str | None,
     output_json: bool,
 ):
     """Extract content from URLs as clean markdown."""
+    if no_excerpts:
+        _emit_deprecation(
+            "--no-excerpts no longer disables excerpts server-side (V1 always returns them); "
+            "the flag now just strips them from the CLI output."
+        )
+    if len(urls) > 20:
+        raise click.UsageError(f"V1 extract accepts at most 20 URLs per request (got {len(urls)}).")
+    if objective is not None and len(objective) > 5000:
+        raise click.UsageError(f"--objective must be 5000 characters or fewer (got {len(objective)}).")
     try:
         from parallel import Parallel
@@ -825,30 +979,6 @@ def extract(
         api_key = get_api_key()
         client = Parallel(api_key=api_key, default_headers=get_default_headers("cli"))
-        extract_kwargs: dict[str, Any] = {
-            "urls": list(urls),
-        }
-        # Excerpt settings - can be bool or object with settings
-        if no_excerpts:
-            extract_kwargs["excerpts"] = False
-        elif excerpt_max_chars_per_result is not None or excerpt_max_chars_total is not None:
-            excerpts_settings: dict[str, Any] = {}
-            if excerpt_max_chars_per_result is not None:
-                excerpts_settings["max_chars_per_result"] = excerpt_max_chars_per_result
-            if excerpt_max_chars_total is not None:
-                excerpts_settings["max_chars_total"] = excerpt_max_chars_total
-            extract_kwargs["excerpts"] = excerpts_settings
-        else:
-            extract_kwargs["excerpts"] = True
-        # Full content settings - can be bool or object with settings
-        if full_content_max_chars is not None:
-            extract_kwargs["full_content"] = {"max_chars_per_result": full_content_max_chars}
-        else:
-            extract_kwargs["full_content"] = full_content
-        # Fetch policy
         fetch_policy: dict[str, Any] = {}
         if max_age_seconds is not None:
             fetch_policy["max_age_seconds"] = max_age_seconds
@@ -856,23 +986,29 @@ def extract(
             fetch_policy["timeout_seconds"] = timeout_seconds
         if disable_cache_fallback:
             fetch_policy["disable_cache_fallback"] = True
-        if fetch_policy:
-            extract_kwargs["fetch_policy"] = fetch_policy
-        if objective:
-            extract_kwargs["objective"] = objective
-        if query:
-            extract_kwargs["search_queries"] = list(query)
+        extract_kwargs = build_extract_v1_kwargs(
+            urls=urls,
+            objective=objective,
+            query=query,
+            full_content=full_content,
+            full_content_max_chars=full_content_max_chars,
+            excerpt_max_chars_per_result=excerpt_max_chars_per_result,
+            excerpt_max_chars_total=excerpt_max_chars_total,
+            fetch_policy=fetch_policy or None,
+            session_id=session_id,
+            client_model=client_model,
+        )
         if not output_json:
             console.print(f"[dim]Extracting content from {len(urls)} URL(s)...[/dim]\n")
-        result = client.beta.extract(**extract_kwargs)
+        result = client.extract(**extract_kwargs)
         results_list = []
         for r in result.results:
             result_dict: dict[str, Any] = {"url": r.url, "title": r.title, "publish_date": r.publish_date}
-            if hasattr(r, "excerpts") and r.excerpts:
+            if not no_excerpts and hasattr(r, "excerpts") and r.excerpts:
                 result_dict["excerpts"] = r.excerpts
             if hasattr(r, "full_content") and r.full_content:
                 result_dict["full_content"] = r.full_content
@@ -892,9 +1028,11 @@ def extract(
         output_data = {
             "extract_id": result.extract_id,
+            "session_id": getattr(result, "session_id", None),
             "status": "ok",
             "results": results_list,
             "errors": errors_list,
+            "usage": [{"name": u.name, "count": u.count} for u in (getattr(result, "usage", None) or [])],
             "warnings": [
                 {"type": w.type, "message": w.message, "detail": getattr(w, "detail", None)} for w in result.warnings
             ]
@@ -914,7 +1052,7 @@ def extract(
                 console.print(f"[bold cyan]{r.title}[/bold cyan]")
                 console.print(f"[link={r.url}]{r.url}[/link]\n")
-                if hasattr(r, "excerpts") and r.excerpts:
+                if not no_excerpts and hasattr(r, "excerpts") and r.excerpts:
                     console.print("[dim]Excerpts:[/dim]")
                     for excerpt in r.excerpts[:3]:
                         text = excerpt[:300] + "..." if len(excerpt) > 300 else excerpt

parallel_web_tools-0.3.0rc2/parallel_web_tools/cli/skills.py ADDED Viewed

@@ -0,0 +1,204 @@
+"""Skills CLI commands for parallel-cli."""
+from __future__ import annotations
+import json
+from typing import NoReturn, Protocol
+import click
+from rich.console import Console
+class HandleError(Protocol):
+    def __call__(
+        self,
+        error: Exception,
+        output_json: bool = False,
+        exit_code: int = 0,
+        prefix: str = "Error",
+    ) -> NoReturn: ...
+def create_skills_group(
+    console: Console,
+    handle_error: HandleError,
+    exit_bad_input: int,
+    exit_api_error: int,
+) -> click.Group:
+    """Create the skills command group.
+    Keeps feature-specific command wiring out of ``commands.py`` while retaining
+    lazy imports of the underlying skills implementation.
+    """
+    @click.group(name="skills")
+    def skills() -> None:
+        """Install and manage Parallel agent skills.
+        Set GH_TOKEN for higher GitHub API rate limits when fetching skills.
+        """
+        pass
+    @skills.command(name="list")
+    @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+    def skills_list(output_json: bool) -> None:
+        """List available Parallel skills from GitHub."""
+        from parallel_web_tools.core.skills import SkillsError, get_skills_repo_ref, list_remote_skills
+        try:
+            ref = get_skills_repo_ref()
+            skill_names = list_remote_skills(ref=ref)
+        except SkillsError as e:
+            handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills list failed")
+        except Exception as e:
+            handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills list failed")
+        if output_json:
+            print(json.dumps({"ref": ref, "skills": skill_names, "count": len(skill_names)}, indent=2))
+            return
+        console.print("[bold]Available skills[/bold]")
+        console.print(f"Ref: [cyan]{ref}[/cyan]")
+        for skill_name in skill_names:
+            console.print(f"- [cyan]{skill_name}[/cyan]")
+    @skills.command(name="install")
+    @click.option(
+        "--project",
+        is_flag=True,
+        help="Install to .agents/skills in detected project root (default is global install).",
+    )
+    @click.option(
+        "--skill",
+        "skill_names",
+        multiple=True,
+        help="Skill name to install (repeatable). Defaults to all. Skills not listed will be removed.",
+    )
+    @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+    def skills_install(project: bool, skill_names: tuple[str, ...], output_json: bool) -> None:
+        """Install Parallel skills from GitHub.
+        When --skill is provided, the managed install set is replaced with exactly
+        the listed skills.
+        """
+        from parallel_web_tools.core.skills import (
+            SkillsError,
+            SkillsInputError,
+            SkillsInstallLocationError,
+            get_skills_repo_ref,
+            install_skills,
+            resolve_install_dir,
+        )
+        try:
+            install_dir = resolve_install_dir(project=project)
+            result = install_skills(
+                install_dir=install_dir,
+                selected_skills=list(skill_names) or None,
+                ref=get_skills_repo_ref(),
+            )
+        except SkillsInstallLocationError as e:
+            handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills install failed")
+        except SkillsInputError as e:
+            handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills install failed")
+        except SkillsError as e:
+            handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills install failed")
+        except Exception as e:
+            handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills install failed")
+        if output_json:
+            print(json.dumps(result, indent=2))
+            return
+        console.print("[bold green]Skills installed[/bold green]")
+        console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
+        console.print(f"Ref: [cyan]{result['ref']}[/cyan]")
+        console.print(f"Installed ({result['count']}): [cyan]{', '.join(result['installed_skills'])}[/cyan]")
+    @skills.command(name="uninstall")
+    @click.option(
+        "--project",
+        is_flag=True,
+        help="Uninstall from .agents/skills in detected project root (default is global install).",
+    )
+    @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+    def skills_uninstall(project: bool, output_json: bool) -> None:
+        """Uninstall skills previously installed by parallel-cli."""
+        from parallel_web_tools.core.skills import SkillsInstallLocationError, resolve_install_dir, uninstall_skills
+        try:
+            install_dir = resolve_install_dir(project=project)
+            result = uninstall_skills(install_dir=install_dir)
+        except SkillsInstallLocationError as e:
+            handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills uninstall failed")
+        except Exception as e:
+            handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills uninstall failed")
+        if output_json:
+            print(json.dumps(result, indent=2))
+            return
+        if result["count"] == 0:
+            console.print("[yellow]No managed skills found to uninstall[/yellow]")
+            console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
+            return
+        console.print("[bold green]Skills uninstalled[/bold green]")
+        console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
+        console.print(f"Removed ({result['count']}): [cyan]{', '.join(result['removed_skills'])}[/cyan]")
+    @skills.command(name="reinstall")
+    @click.option(
+        "--project",
+        is_flag=True,
+        help="Reinstall in .agents/skills in detected project root (default is global install).",
+    )
+    @click.option(
+        "--skill",
+        "skill_names",
+        multiple=True,
+        help="Skill name to reinstall (repeatable). Defaults to all. Skills not listed will be removed.",
+    )
+    @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+    def skills_reinstall(project: bool, skill_names: tuple[str, ...], output_json: bool) -> None:
+        """Reinstall Parallel skills (uninstall managed set then install fresh).
+        When --skill is provided, the managed install set is replaced with exactly
+        the listed skills.
+        """
+        from parallel_web_tools.core.skills import (
+            SkillsError,
+            SkillsInputError,
+            SkillsInstallLocationError,
+            get_skills_repo_ref,
+            reinstall_skills,
+            resolve_install_dir,
+        )
+        try:
+            install_dir = resolve_install_dir(project=project)
+            result = reinstall_skills(
+                install_dir=install_dir,
+                selected_skills=list(skill_names) or None,
+                ref=get_skills_repo_ref(),
+            )
+        except SkillsInstallLocationError as e:
+            handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills reinstall failed")
+        except SkillsInputError as e:
+            handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills reinstall failed")
+        except SkillsError as e:
+            handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills reinstall failed")
+        except Exception as e:
+            handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills reinstall failed")
+        if output_json:
+            print(json.dumps(result, indent=2))
+            return
+        console.print("[bold green]Skills reinstalled[/bold green]")
+        console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
+        console.print(f"Ref: [cyan]{result['ref']}[/cyan]")
+        console.print(f"Removed ({result['removed_count']}): [cyan]{', '.join(result['removed_skills'])}[/cyan]")
+        console.print(f"Installed ({result['installed_count']}): [cyan]{', '.join(result['installed_skills'])}[/cyan]")
+    return skills

parallel_web_tools-0.3.0rc2/parallel_web_tools/core/skills.py ADDED Viewed

@@ -0,0 +1,311 @@
+"""Skill installation helpers for parallel-cli."""
+from __future__ import annotations
+import io
+import json
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+from collections.abc import Iterator
+from contextlib import contextmanager
+from pathlib import Path
+from urllib.parse import quote
+import httpx
+SKILLS_REPO_OWNER = "parallel-web"
+SKILLS_REPO_NAME = "parallel-agent-skills"
+SKILLS_REPO_SKILLS_PATH = "skills"
+DEFAULT_SKILLS_REPO_REF = "main"
+SKILLS_REPO_REF_ENV = "PARALLEL_SKILLS_REPO_REF"
+GITHUB_TOKEN_ENV = "GH_TOKEN"
+GLOBAL_SKILLS_DIR_ENV = "PARALLEL_SKILLS_GLOBAL_DIR"
+PROJECT_ROOT_MARKERS = (".git", "pyproject.toml", "package.json")
+MANIFEST_FILE_NAME = ".parallel-cli-skills-manifest.json"
+class SkillsError(Exception):
+    """Base error for skills operations."""
+class SkillsInstallLocationError(SkillsError):
+    """Raised when a project-local install directory cannot be determined."""
+class SkillsDownloadError(SkillsError):
+    """Raised when remote skills metadata or files cannot be fetched."""
+class SkillsInputError(SkillsError):
+    """Raised when caller-provided skill arguments are invalid."""
+def get_skills_repo_ref() -> str:
+    """Return repository ref used for skill downloads."""
+    configured = os.environ.get(SKILLS_REPO_REF_ENV)
+    if configured and configured.strip():
+        return configured.strip()
+    return DEFAULT_SKILLS_REPO_REF
+def get_global_skills_dir() -> Path:
+    """Return the global skills directory path."""
+    configured = os.environ.get(GLOBAL_SKILLS_DIR_ENV)
+    if configured:
+        return Path(configured).expanduser()
+    return Path.home() / ".agents" / "skills"
+def find_project_root(start: Path | None = None) -> Path | None:
+    """Find a project root by walking upward for known root markers."""
+    cursor = (start or Path.cwd()).resolve()
+    for candidate in (cursor, *cursor.parents):
+        for marker in PROJECT_ROOT_MARKERS:
+            if (candidate / marker).exists():
+                return candidate
+    return None
+def resolve_install_dir(project: bool, start: Path | None = None) -> Path:
+    """Resolve install directory for global or project-local skills."""
+    if not project:
+        return get_global_skills_dir()
+    root = find_project_root(start=start)
+    if root is None:
+        raise SkillsInstallLocationError(
+            "Could not determine project root from current directory. "
+            "Run this inside a project containing one of: .git, pyproject.toml, package.json."
+        )
+    return root / ".agents" / "skills"
+def _github_archive_url(ref: str) -> str:
+    encoded_ref = quote(ref, safe="")
+    return f"https://api.github.com/repos/{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}/zipball/{encoded_ref}"
+def _github_headers() -> dict[str, str]:
+    """Build GitHub API headers for skills archive downloads."""
+    headers = {
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+    }
+    token = os.environ.get(GITHUB_TOKEN_ENV)
+    if token and token.strip():
+        headers["Authorization"] = f"Bearer {token.strip()}"
+    return headers
+def _download_repo_archive(client: httpx.Client, ref: str) -> bytes:
+    # TODO: add retry/backoff for transient GitHub API failures (429/5xx).
+    response = client.get(_github_archive_url(ref))
+    if response.status_code >= 400:
+        raise SkillsDownloadError(
+            f"Failed to download skills archive at ref '{ref}' from "
+            f"{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}: HTTP {response.status_code}"
+        )
+    return response.content
+def _extract_repo_archive(archive_bytes: bytes, dest_dir: Path) -> Path:
+    """Extract a GitHub zipball into dest_dir and return the archive root."""
+    dest_dir.mkdir(parents=True, exist_ok=True)
+    try:
+        with zipfile.ZipFile(io.BytesIO(archive_bytes)) as zf:
+            root_name: str | None = None
+            for member in zf.infolist():
+                member_path = Path(member.filename)
+                parts = member_path.parts
+                if not parts:
+                    continue
+                if parts[0] in ("", "/"):
+                    raise SkillsDownloadError("Invalid archive entry path")
+                if any(part == ".." for part in parts):
+                    raise SkillsDownloadError("Archive contains unsafe path traversal entry")
+                if root_name is None:
+                    root_name = parts[0]
+                target = dest_dir / member_path
+                target_resolved = target.resolve()
+                dest_resolved = dest_dir.resolve()
+                if dest_resolved not in (target_resolved, *target_resolved.parents):
+                    raise SkillsDownloadError("Archive extraction would escape destination directory")
+                if member.is_dir():
+                    target.mkdir(parents=True, exist_ok=True)
+                    continue
+                target.parent.mkdir(parents=True, exist_ok=True)
+                with zf.open(member) as src, target.open("wb") as dst:
+                    shutil.copyfileobj(src, dst)
+    except zipfile.BadZipFile as e:
+        raise SkillsDownloadError("Failed to read downloaded skills archive") from e
+    if not root_name:
+        raise SkillsDownloadError("Downloaded skills archive was empty")
+    root = dest_dir / root_name
+    if not root.exists() or not root.is_dir():
+        raise SkillsDownloadError("Downloaded skills archive had no repository root directory")
+    return root
+@contextmanager
+def _downloaded_repo_root(ref: str) -> Iterator[Path]:
+    with httpx.Client(timeout=30, follow_redirects=True, headers=_github_headers()) as client:
+        archive_bytes = _download_repo_archive(client, ref)
+    with tempfile.TemporaryDirectory(prefix="parallel-skills-") as tmpdir:
+        repo_root = _extract_repo_archive(archive_bytes, Path(tmpdir))
+        yield repo_root
+def _skills_root(repo_root: Path) -> Path:
+    skills_root = repo_root / SKILLS_REPO_SKILLS_PATH
+    if not skills_root.exists() or not skills_root.is_dir():
+        raise SkillsDownloadError(
+            f"Downloaded repository does not contain a '{SKILLS_REPO_SKILLS_PATH}/' directory at the requested ref"
+        )
+    return skills_root
+def _list_skills_from_repo_root(repo_root: Path) -> list[str]:
+    skills_root = _skills_root(repo_root)
+    return sorted(path.name for path in skills_root.iterdir() if path.is_dir())
+def list_remote_skills(ref: str | None = None) -> list[str]:
+    """Return available skill directory names from the remote repository."""
+    resolved_ref = ref or get_skills_repo_ref()
+    with _downloaded_repo_root(resolved_ref) as repo_root:
+        return _list_skills_from_repo_root(repo_root)
+def _manifest_path(install_dir: Path) -> Path:
+    return install_dir / MANIFEST_FILE_NAME
+def _write_manifest(install_dir: Path, ref: str, installed_skills: list[str]) -> None:
+    data = {
+        "repo": f"{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}",
+        "skills_path": SKILLS_REPO_SKILLS_PATH,
+        "ref": ref,
+        "installed_skills": sorted(installed_skills),
+        "installed_at": int(time.time()),
+        "managed_by": "parallel-cli",
+    }
+    install_dir.mkdir(parents=True, exist_ok=True)
+    _manifest_path(install_dir).write_text(json.dumps(data, indent=2))
+def _read_manifest(install_dir: Path) -> dict:
+    path = _manifest_path(install_dir)
+    if not path.exists():
+        return {}
+    try:
+        data = json.loads(path.read_text())
+    except Exception:
+        return {}
+    return data if isinstance(data, dict) else {}
+def install_skills(
+    install_dir: Path,
+    selected_skills: list[str] | None = None,
+    ref: str | None = None,
+) -> dict:
+    """Install selected (or all) skills into install_dir.
+    Only skills previously managed by parallel-cli are reconciled. Unmanaged skill
+    directories are left untouched.
+    """
+    resolved_ref = ref or get_skills_repo_ref()
+    with _downloaded_repo_root(resolved_ref) as repo_root:
+        skills_root = _skills_root(repo_root)
+        available = _list_skills_from_repo_root(repo_root)
+        requested = sorted(set(selected_skills or available))
+        missing = sorted(name for name in requested if name not in available)
+        if missing:
+            raise SkillsInputError(
+                f"Unknown skills requested: {', '.join(missing)}. Available skills: {', '.join(available)}"
+            )
+        manifest = _read_manifest(install_dir)
+        managed_raw = manifest.get("installed_skills")
+        previously_managed: list[str] = (
+            [name for name in managed_raw if isinstance(name, str)] if isinstance(managed_raw, list) else []
+        )
+        install_dir.mkdir(parents=True, exist_ok=True)
+        for skill_name in previously_managed:
+            if skill_name not in requested:
+                skill_dir = install_dir / skill_name
+                if skill_dir.exists() and skill_dir.is_dir():
+                    shutil.rmtree(skill_dir)
+        for skill_name in requested:
+            skill_dir = install_dir / skill_name
+            if skill_dir.exists():
+                shutil.rmtree(skill_dir)
+            shutil.copytree(skills_root / skill_name, skill_dir)
+    _write_manifest(install_dir, resolved_ref, requested)
+    return {
+        "install_dir": str(install_dir),
+        "ref": resolved_ref,
+        "installed_skills": requested,
+        "count": len(requested),
+    }
+def uninstall_skills(install_dir: Path) -> dict:
+    """Uninstall only manifest-managed skills from install_dir."""
+    manifest = _read_manifest(install_dir)
+    managed_raw = manifest.get("installed_skills")
+    managed: list[str] = (
+        [name for name in managed_raw if isinstance(name, str)] if isinstance(managed_raw, list) else []
+    )
+    removed: list[str] = []
+    for skill_name in managed:
+        skill_path = install_dir / skill_name
+        if skill_path.exists() and skill_path.is_dir():
+            shutil.rmtree(skill_path)
+            removed.append(skill_name)
+    manifest_path = _manifest_path(install_dir)
+    if manifest_path.exists():
+        manifest_path.unlink()
+    return {
+        "install_dir": str(install_dir),
+        "removed_skills": sorted(removed),
+        "count": len(removed),
+    }
+def reinstall_skills(
+    install_dir: Path,
+    selected_skills: list[str] | None = None,
+    ref: str | None = None,
+) -> dict:
+    """Reinstall skills by uninstalling managed set then installing fresh."""
+    uninstall_result = uninstall_skills(install_dir)
+    install_result = install_skills(install_dir, selected_skills=selected_skills, ref=ref)
+    return {
+        "install_dir": install_result["install_dir"],
+        "ref": install_result["ref"],
+        "removed_skills": uninstall_result["removed_skills"],
+        "installed_skills": install_result["installed_skills"],
+        "removed_count": uninstall_result["count"],
+        "installed_count": install_result["count"],
+    }

{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/cloud_function/requirements.txt RENAMED Viewed

@@ -1,5 +1,5 @@
 # Cloud Function dependencies for BigQuery Remote Function
 functions-framework>=3.0.0
 flask>=3.0.0
-parallel-web-tools>=0.2.0
+parallel-web-tools>=0.3.0rc2
 google-cloud-secret-manager>=2.20.0

{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "parallel-web-tools"
-version = "0.2.0"
+version = "0.3.0rc2"
 description = "Parallel Tools: CLI and Python SDK for AI-powered web intelligence"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -92,12 +92,12 @@ all = [
 # Development
 dev = [
     "parallel-web-tools[all,spark]",
-    "pytest>=8.0.0",
-    "pytest-cov>=4.0.0",
-    "pyinstaller>=6.0.0",
-    "pre-commit>=4.0.0",
-    "ruff>=0.14.0",
-    "ty>=0.0.21",
+    "pytest>=9.0.0",
+    "pytest-cov>=7.0.0",
+    "pyinstaller>=6.20.0",
+    "pre-commit>=4.6.0",
+    "ruff>=0.15.0",
+    "ty>=0.0.33",
 ]
 [tool.hatch.build.targets.wheel]
@@ -162,6 +162,8 @@ known-first-party = ["parallel_web_tools"]
 [dependency-groups]
 dev = [
-    "ipykernel>=7.1.0",
-    "ty>=0.0.21",
+    "ipykernel>=7.2.0",
+    "pyinstaller>=6.20.0",
+    "tach>=0.34.1",
+    "ty>=0.0.33",
 ]