parallel-web-tools 0.2.0__tar.gz → 0.3.0rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/.gitignore +2 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/PKG-INFO +7 -7
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/__init__.py +1 -1
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/commands.py +208 -70
- parallel_web_tools-0.3.0rc2/parallel_web_tools/cli/skills.py +204 -0
- parallel_web_tools-0.3.0rc2/parallel_web_tools/core/skills.py +311 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/cloud_function/requirements.txt +1 -1
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/pyproject.toml +11 -9
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/README.md +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/planner.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/updater.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/auth.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/batch.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/findall.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/monitor.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/polling.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/research.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/result.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/runner.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/schema.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/sql_utils.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/user_agent.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/cloud_function/main.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/deploy.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/sql/create_functions.sql +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/duckdb/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/duckdb/batch.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/duckdb/findall.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/duckdb/udf.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/polars/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/polars/enrich.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/deploy.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/sql/01_setup.sql +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/sql/02_create_udf.sql +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/sql/03_cleanup.sql +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/spark/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/spark/streaming.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/spark/udf.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/utils.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/__init__.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/bigquery.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/csv.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/duckdb.py +0 -0
- {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/json.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: parallel-web-tools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0rc2
|
|
4
4
|
Summary: Parallel Tools: CLI and Python SDK for AI-powered web intelligence
|
|
5
5
|
Project-URL: Homepage, https://github.com/parallel-web/parallel-web-tools
|
|
6
6
|
Project-URL: Documentation, https://docs.parallel.ai
|
|
@@ -51,19 +51,19 @@ Requires-Dist: duckdb>=1.0.0; extra == 'dev'
|
|
|
51
51
|
Requires-Dist: nest-asyncio>=1.6.0; extra == 'dev'
|
|
52
52
|
Requires-Dist: pandas>=2.3.0; extra == 'dev'
|
|
53
53
|
Requires-Dist: polars>=1.37.0; extra == 'dev'
|
|
54
|
-
Requires-Dist: pre-commit>=4.
|
|
54
|
+
Requires-Dist: pre-commit>=4.6.0; extra == 'dev'
|
|
55
55
|
Requires-Dist: pyarrow>=18.0.0; extra == 'dev'
|
|
56
|
-
Requires-Dist: pyinstaller>=6.
|
|
56
|
+
Requires-Dist: pyinstaller>=6.20.0; extra == 'dev'
|
|
57
57
|
Requires-Dist: pyspark>=3.4.0; extra == 'dev'
|
|
58
|
-
Requires-Dist: pytest-cov>=
|
|
59
|
-
Requires-Dist: pytest>=
|
|
58
|
+
Requires-Dist: pytest-cov>=7.0.0; extra == 'dev'
|
|
59
|
+
Requires-Dist: pytest>=9.0.0; extra == 'dev'
|
|
60
60
|
Requires-Dist: pyyaml>=6.0.0; extra == 'dev'
|
|
61
61
|
Requires-Dist: questionary>=2.0.0; extra == 'dev'
|
|
62
|
-
Requires-Dist: ruff>=0.
|
|
62
|
+
Requires-Dist: ruff>=0.15.0; extra == 'dev'
|
|
63
63
|
Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'dev'
|
|
64
64
|
Requires-Dist: sqlalchemy-bigquery>=1.11.0; extra == 'dev'
|
|
65
65
|
Requires-Dist: sqlalchemy>=2.0.0; extra == 'dev'
|
|
66
|
-
Requires-Dist: ty>=0.0.
|
|
66
|
+
Requires-Dist: ty>=0.0.33; extra == 'dev'
|
|
67
67
|
Provides-Extra: duckdb
|
|
68
68
|
Requires-Dist: duckdb>=1.0.0; extra == 'duckdb'
|
|
69
69
|
Requires-Dist: nest-asyncio>=1.6.0; extra == 'duckdb'
|
|
@@ -7,7 +7,7 @@ import os
|
|
|
7
7
|
import sys
|
|
8
8
|
import tempfile
|
|
9
9
|
import time
|
|
10
|
-
from typing import Any
|
|
10
|
+
from typing import Any, NoReturn
|
|
11
11
|
|
|
12
12
|
import click
|
|
13
13
|
import httpx
|
|
@@ -15,6 +15,7 @@ from dotenv import load_dotenv
|
|
|
15
15
|
from rich.console import Console
|
|
16
16
|
|
|
17
17
|
from parallel_web_tools import __version__
|
|
18
|
+
from parallel_web_tools.cli.skills import create_skills_group
|
|
18
19
|
from parallel_web_tools.core import (
|
|
19
20
|
AVAILABLE_PROCESSORS,
|
|
20
21
|
FINDALL_GENERATORS,
|
|
@@ -170,7 +171,7 @@ def _handle_error(
|
|
|
170
171
|
output_json: bool = False,
|
|
171
172
|
exit_code: int = EXIT_API_ERROR,
|
|
172
173
|
prefix: str = "Error",
|
|
173
|
-
) ->
|
|
174
|
+
) -> NoReturn:
|
|
174
175
|
"""Handle an error with appropriate output format and exit code.
|
|
175
176
|
|
|
176
177
|
In --json mode, outputs structured JSON to stdout. Otherwise, prints a
|
|
@@ -318,12 +319,12 @@ def parse_inline_data(data_json: str) -> tuple[str, list[dict[str, str]]]:
|
|
|
318
319
|
raise click.BadParameter("Data must be an array of objects")
|
|
319
320
|
|
|
320
321
|
# Infer columns from the first row
|
|
321
|
-
columns =
|
|
322
|
+
columns: list[str] = [str(k) for k in data[0].keys()]
|
|
322
323
|
if not columns:
|
|
323
324
|
raise click.BadParameter("Data objects must have at least one field")
|
|
324
325
|
|
|
325
326
|
# Create source_columns with inferred descriptions
|
|
326
|
-
source_columns = [{"name": col, "description": f"The {col} field"} for col in columns]
|
|
327
|
+
source_columns: list[dict[str, str]] = [{"name": col, "description": f"The {col} field"} for col in columns]
|
|
327
328
|
|
|
328
329
|
# Write to a temporary CSV file
|
|
329
330
|
temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False, newline="")
|
|
@@ -647,39 +648,116 @@ def config_cmd(key: str | None, value: str | None, output_json: bool):
|
|
|
647
648
|
console.print(f"[green]Set {key} = {format_bool(is_auto_update_check_enabled())}[/green]")
|
|
648
649
|
|
|
649
650
|
|
|
651
|
+
main.add_command(create_skills_group(console, _handle_error, EXIT_BAD_INPUT, EXIT_API_ERROR))
|
|
652
|
+
|
|
653
|
+
|
|
650
654
|
# =============================================================================
|
|
651
655
|
# Search Command
|
|
652
656
|
# =============================================================================
|
|
653
657
|
|
|
658
|
+
# Beta -> V1 mode mapping. Beta had three modes; V1 has two. We keep the old
|
|
659
|
+
# values as accepted CLI inputs and translate them so existing scripts work.
|
|
660
|
+
_SEARCH_MODE_MAP = {
|
|
661
|
+
"fast": "basic",
|
|
662
|
+
"one-shot": "basic",
|
|
663
|
+
"agentic": "advanced",
|
|
664
|
+
"basic": "basic",
|
|
665
|
+
"advanced": "advanced",
|
|
666
|
+
}
|
|
667
|
+
_DEPRECATED_SEARCH_MODES = {"fast", "one-shot", "agentic"}
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
def _emit_deprecation(message: str) -> None:
|
|
671
|
+
"""Print a deprecation notice to stderr so it doesn't pollute --json output."""
|
|
672
|
+
click.echo(f"[deprecated] {message}", err=True)
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def build_search_v1_kwargs(
|
|
676
|
+
*,
|
|
677
|
+
objective: str | None,
|
|
678
|
+
query: tuple[str, ...] | list[str],
|
|
679
|
+
mode: str | None,
|
|
680
|
+
max_results: int | None,
|
|
681
|
+
source_policy: dict[str, Any] | None,
|
|
682
|
+
excerpt_max_chars_per_result: int | None,
|
|
683
|
+
excerpt_max_chars_total: int | None,
|
|
684
|
+
fetch_policy: dict[str, Any] | None,
|
|
685
|
+
location: str | None = None,
|
|
686
|
+
session_id: str | None = None,
|
|
687
|
+
client_model: str | None = None,
|
|
688
|
+
) -> dict[str, Any]:
|
|
689
|
+
"""Translate Beta-style search params to V1 client.search() kwargs.
|
|
690
|
+
|
|
691
|
+
V1 requires search_queries; if the caller only provided an objective, we
|
|
692
|
+
fall back to using it as the single query so older invocations keep working.
|
|
693
|
+
"""
|
|
694
|
+
queries = list(query) if query else []
|
|
695
|
+
if not queries and objective:
|
|
696
|
+
queries = [objective]
|
|
697
|
+
|
|
698
|
+
kwargs: dict[str, Any] = {"search_queries": queries}
|
|
699
|
+
if objective:
|
|
700
|
+
kwargs["objective"] = objective
|
|
701
|
+
if mode:
|
|
702
|
+
kwargs["mode"] = _SEARCH_MODE_MAP.get(mode, mode)
|
|
703
|
+
if excerpt_max_chars_total is not None:
|
|
704
|
+
kwargs["max_chars_total"] = excerpt_max_chars_total
|
|
705
|
+
if session_id:
|
|
706
|
+
kwargs["session_id"] = session_id
|
|
707
|
+
if client_model:
|
|
708
|
+
kwargs["client_model"] = client_model
|
|
709
|
+
|
|
710
|
+
advanced: dict[str, Any] = {}
|
|
711
|
+
if max_results is not None:
|
|
712
|
+
advanced["max_results"] = max_results
|
|
713
|
+
if source_policy:
|
|
714
|
+
advanced["source_policy"] = source_policy
|
|
715
|
+
if fetch_policy:
|
|
716
|
+
advanced["fetch_policy"] = fetch_policy
|
|
717
|
+
if excerpt_max_chars_per_result is not None:
|
|
718
|
+
advanced["excerpt_settings"] = {"max_chars_per_result": excerpt_max_chars_per_result}
|
|
719
|
+
if location:
|
|
720
|
+
advanced["location"] = location
|
|
721
|
+
if advanced:
|
|
722
|
+
kwargs["advanced_settings"] = advanced
|
|
723
|
+
|
|
724
|
+
return kwargs
|
|
725
|
+
|
|
654
726
|
|
|
655
727
|
@main.command()
|
|
656
728
|
@click.argument("objective", required=False)
|
|
657
729
|
@click.option("-q", "--query", multiple=True, help="Keyword search query (can be repeated)")
|
|
658
730
|
@click.option(
|
|
659
731
|
"--mode",
|
|
660
|
-
type=click.Choice(
|
|
661
|
-
default="
|
|
662
|
-
help="Search mode",
|
|
732
|
+
type=click.Choice(list(_SEARCH_MODE_MAP.keys())),
|
|
733
|
+
default="basic",
|
|
734
|
+
help="Search mode (one-shot/fast → basic, agentic → advanced)",
|
|
663
735
|
show_default=True,
|
|
664
736
|
)
|
|
665
|
-
@click.option("--max-results", type=int,
|
|
737
|
+
@click.option("--max-results", type=int, help="Maximum results (defaults to server-side default of 10)")
|
|
666
738
|
@click.option("--include-domains", multiple=True, help="Only search these domains (comma-separated or repeated)")
|
|
667
739
|
@click.option("--exclude-domains", multiple=True, help="Exclude these domains (comma-separated or repeated)")
|
|
668
740
|
@click.option("--after-date", help="Only results after this date (YYYY-MM-DD)")
|
|
669
|
-
@click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts")
|
|
741
|
+
@click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts (min 1000)")
|
|
670
742
|
@click.option(
|
|
671
743
|
"--excerpt-max-chars-total", type=int, default=60000, help="Max total characters for excerpts", show_default=True
|
|
672
744
|
)
|
|
673
745
|
@click.option("--max-age-seconds", type=int, help="Max age in seconds before fetching live content (min 600)")
|
|
674
746
|
@click.option("--timeout-seconds", type=float, help="Timeout in seconds for fetching live content")
|
|
675
747
|
@click.option("--disable-cache-fallback", is_flag=True, help="Return error instead of stale cached content")
|
|
748
|
+
@click.option("--location", help="ISO 3166-1 alpha-2 country code for geo-targeted results (e.g. us, gb, de)")
|
|
749
|
+
@click.option("--session-id", help="Session ID to group related search/extract calls")
|
|
750
|
+
@click.option(
|
|
751
|
+
"--client-model",
|
|
752
|
+
help="The model generating this request and consuming the results (e.g. claude-opus-4-7, gpt-5.4, gemini-3.1-pro)",
|
|
753
|
+
)
|
|
676
754
|
@click.option("-o", "--output", "output_file", type=click.Path(), help="Save results to file (JSON)")
|
|
677
755
|
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
678
756
|
def search(
|
|
679
757
|
objective: str | None,
|
|
680
758
|
query: tuple[str, ...],
|
|
681
759
|
mode: str,
|
|
682
|
-
max_results: int,
|
|
760
|
+
max_results: int | None,
|
|
683
761
|
include_domains: tuple[str, ...],
|
|
684
762
|
exclude_domains: tuple[str, ...],
|
|
685
763
|
after_date: str | None,
|
|
@@ -688,6 +766,9 @@ def search(
|
|
|
688
766
|
max_age_seconds: int | None,
|
|
689
767
|
timeout_seconds: float | None,
|
|
690
768
|
disable_cache_fallback: bool,
|
|
769
|
+
location: str | None,
|
|
770
|
+
session_id: str | None,
|
|
771
|
+
client_model: str | None,
|
|
691
772
|
output_file: str | None,
|
|
692
773
|
output_json: bool,
|
|
693
774
|
):
|
|
@@ -704,6 +785,24 @@ def search(
|
|
|
704
785
|
if not objective and not query:
|
|
705
786
|
raise click.UsageError("Provide an OBJECTIVE argument or at least one --query option.")
|
|
706
787
|
|
|
788
|
+
if mode in _DEPRECATED_SEARCH_MODES:
|
|
789
|
+
new_mode = _SEARCH_MODE_MAP[mode]
|
|
790
|
+
_emit_deprecation(
|
|
791
|
+
f"--mode {mode} is a Beta value and will stop working after the Beta API sunset (June 2026). "
|
|
792
|
+
f"Use --mode {new_mode} instead."
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
source_policy: dict[str, Any] = {}
|
|
796
|
+
if include_domains:
|
|
797
|
+
source_policy["include_domains"] = parse_comma_separated(include_domains)
|
|
798
|
+
if exclude_domains:
|
|
799
|
+
source_policy["exclude_domains"] = parse_comma_separated(exclude_domains)
|
|
800
|
+
domain_total = len(source_policy.get("include_domains", [])) + len(source_policy.get("exclude_domains", []))
|
|
801
|
+
if domain_total > 200:
|
|
802
|
+
raise click.UsageError(f"--include-domains and --exclude-domains combined must be <= 200 (got {domain_total}).")
|
|
803
|
+
if after_date:
|
|
804
|
+
source_policy["after_date"] = after_date
|
|
805
|
+
|
|
707
806
|
try:
|
|
708
807
|
from parallel import Parallel
|
|
709
808
|
|
|
@@ -712,29 +811,6 @@ def search(
|
|
|
712
811
|
api_key = get_api_key()
|
|
713
812
|
client = Parallel(api_key=api_key, default_headers=get_default_headers("cli"))
|
|
714
813
|
|
|
715
|
-
search_kwargs: dict[str, Any] = {"mode": mode, "max_results": max_results}
|
|
716
|
-
if objective:
|
|
717
|
-
search_kwargs["objective"] = objective
|
|
718
|
-
if query:
|
|
719
|
-
search_kwargs["search_queries"] = list(query)
|
|
720
|
-
|
|
721
|
-
source_policy: dict[str, Any] = {}
|
|
722
|
-
if include_domains:
|
|
723
|
-
source_policy["include_domains"] = parse_comma_separated(include_domains)
|
|
724
|
-
if exclude_domains:
|
|
725
|
-
source_policy["exclude_domains"] = parse_comma_separated(exclude_domains)
|
|
726
|
-
if after_date:
|
|
727
|
-
source_policy["after_date"] = after_date
|
|
728
|
-
if source_policy:
|
|
729
|
-
search_kwargs["source_policy"] = source_policy
|
|
730
|
-
|
|
731
|
-
# Excerpt settings (max_chars_total has a default, so always set)
|
|
732
|
-
excerpts_settings: dict[str, Any] = {"max_chars_total": excerpt_max_chars_total}
|
|
733
|
-
if excerpt_max_chars_per_result is not None:
|
|
734
|
-
excerpts_settings["max_chars_per_result"] = excerpt_max_chars_per_result
|
|
735
|
-
search_kwargs["excerpts"] = excerpts_settings
|
|
736
|
-
|
|
737
|
-
# Fetch policy
|
|
738
814
|
fetch_policy: dict[str, Any] = {}
|
|
739
815
|
if max_age_seconds is not None:
|
|
740
816
|
fetch_policy["max_age_seconds"] = max_age_seconds
|
|
@@ -742,21 +818,35 @@ def search(
|
|
|
742
818
|
fetch_policy["timeout_seconds"] = timeout_seconds
|
|
743
819
|
if disable_cache_fallback:
|
|
744
820
|
fetch_policy["disable_cache_fallback"] = True
|
|
745
|
-
|
|
746
|
-
|
|
821
|
+
|
|
822
|
+
search_kwargs = build_search_v1_kwargs(
|
|
823
|
+
objective=objective,
|
|
824
|
+
query=query,
|
|
825
|
+
mode=mode,
|
|
826
|
+
max_results=max_results,
|
|
827
|
+
source_policy=source_policy or None,
|
|
828
|
+
excerpt_max_chars_per_result=excerpt_max_chars_per_result,
|
|
829
|
+
excerpt_max_chars_total=excerpt_max_chars_total,
|
|
830
|
+
fetch_policy=fetch_policy or None,
|
|
831
|
+
location=location,
|
|
832
|
+
session_id=session_id,
|
|
833
|
+
client_model=client_model,
|
|
834
|
+
)
|
|
747
835
|
|
|
748
836
|
if not output_json:
|
|
749
837
|
console.print("[dim]Searching...[/dim]\n")
|
|
750
838
|
|
|
751
|
-
result = client.
|
|
839
|
+
result = client.search(**search_kwargs)
|
|
752
840
|
|
|
753
841
|
output_data = {
|
|
754
842
|
"search_id": result.search_id,
|
|
843
|
+
"session_id": getattr(result, "session_id", None),
|
|
755
844
|
"status": "ok",
|
|
756
845
|
"results": [
|
|
757
846
|
{"url": r.url, "title": r.title, "publish_date": r.publish_date, "excerpts": r.excerpts}
|
|
758
847
|
for r in result.results
|
|
759
848
|
],
|
|
849
|
+
"usage": [{"name": u.name, "count": u.count} for u in (getattr(result, "usage", None) or [])],
|
|
760
850
|
"warnings": [
|
|
761
851
|
{"type": w.type, "message": w.message, "detail": getattr(w, "detail", None)} for w in result.warnings
|
|
762
852
|
]
|
|
@@ -787,18 +877,69 @@ def search(
|
|
|
787
877
|
# =============================================================================
|
|
788
878
|
|
|
789
879
|
|
|
880
|
+
def build_extract_v1_kwargs(
|
|
881
|
+
*,
|
|
882
|
+
urls: tuple[str, ...] | list[str],
|
|
883
|
+
objective: str | None,
|
|
884
|
+
query: tuple[str, ...] | list[str],
|
|
885
|
+
full_content: bool,
|
|
886
|
+
full_content_max_chars: int | None,
|
|
887
|
+
excerpt_max_chars_per_result: int | None,
|
|
888
|
+
excerpt_max_chars_total: int | None,
|
|
889
|
+
fetch_policy: dict[str, Any] | None,
|
|
890
|
+
session_id: str | None = None,
|
|
891
|
+
client_model: str | None = None,
|
|
892
|
+
) -> dict[str, Any]:
|
|
893
|
+
"""Translate Beta-style extract params to V1 client.extract() kwargs.
|
|
894
|
+
|
|
895
|
+
Note: V1 always returns excerpts; the old `--no-excerpts` flag can no longer
|
|
896
|
+
disable them server-side. The CLI handles that flag by filtering excerpts out
|
|
897
|
+
of the output, not by passing it to the SDK.
|
|
898
|
+
"""
|
|
899
|
+
kwargs: dict[str, Any] = {"urls": list(urls)}
|
|
900
|
+
if objective:
|
|
901
|
+
kwargs["objective"] = objective
|
|
902
|
+
if query:
|
|
903
|
+
kwargs["search_queries"] = list(query)
|
|
904
|
+
if excerpt_max_chars_total is not None:
|
|
905
|
+
kwargs["max_chars_total"] = excerpt_max_chars_total
|
|
906
|
+
if session_id:
|
|
907
|
+
kwargs["session_id"] = session_id
|
|
908
|
+
if client_model:
|
|
909
|
+
kwargs["client_model"] = client_model
|
|
910
|
+
|
|
911
|
+
advanced: dict[str, Any] = {}
|
|
912
|
+
if excerpt_max_chars_per_result is not None:
|
|
913
|
+
advanced["excerpt_settings"] = {"max_chars_per_result": excerpt_max_chars_per_result}
|
|
914
|
+
if full_content_max_chars is not None:
|
|
915
|
+
advanced["full_content"] = {"max_chars_per_result": full_content_max_chars}
|
|
916
|
+
elif full_content:
|
|
917
|
+
advanced["full_content"] = True
|
|
918
|
+
if fetch_policy:
|
|
919
|
+
advanced["fetch_policy"] = fetch_policy
|
|
920
|
+
if advanced:
|
|
921
|
+
kwargs["advanced_settings"] = advanced
|
|
922
|
+
|
|
923
|
+
return kwargs
|
|
924
|
+
|
|
925
|
+
|
|
790
926
|
@main.command()
|
|
791
927
|
@click.argument("urls", nargs=-1, required=True)
|
|
792
928
|
@click.option("--objective", help="Focus extraction on a specific goal")
|
|
793
929
|
@click.option("-q", "--query", multiple=True, help="Keywords to prioritize (can be repeated)")
|
|
794
930
|
@click.option("--full-content", is_flag=True, help="Include complete page content")
|
|
795
931
|
@click.option("--full-content-max-chars", type=int, help="Max characters per result for full content")
|
|
796
|
-
@click.option("--no-excerpts", is_flag=True, help="
|
|
932
|
+
@click.option("--no-excerpts", is_flag=True, help="Strip excerpts from output (V1 always returns them server-side)")
|
|
797
933
|
@click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts (min 1000)")
|
|
798
934
|
@click.option("--excerpt-max-chars-total", type=int, help="Max total characters for excerpts across all URLs")
|
|
799
935
|
@click.option("--max-age-seconds", type=int, help="Max age in seconds before fetching live content (min 600)")
|
|
800
936
|
@click.option("--timeout-seconds", type=float, help="Timeout in seconds for fetching live content")
|
|
801
937
|
@click.option("--disable-cache-fallback", is_flag=True, help="Return error instead of stale cached content")
|
|
938
|
+
@click.option("--session-id", help="Session ID to group related search/extract calls")
|
|
939
|
+
@click.option(
|
|
940
|
+
"--client-model",
|
|
941
|
+
help="The model generating this request and consuming the results (e.g. claude-opus-4-7, gpt-5.4, gemini-3.1-pro)",
|
|
942
|
+
)
|
|
802
943
|
@click.option("-o", "--output", "output_file", type=click.Path(), help="Save results to file (JSON)")
|
|
803
944
|
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
804
945
|
def extract(
|
|
@@ -813,10 +954,23 @@ def extract(
|
|
|
813
954
|
max_age_seconds: int | None,
|
|
814
955
|
timeout_seconds: float | None,
|
|
815
956
|
disable_cache_fallback: bool,
|
|
957
|
+
session_id: str | None,
|
|
958
|
+
client_model: str | None,
|
|
816
959
|
output_file: str | None,
|
|
817
960
|
output_json: bool,
|
|
818
961
|
):
|
|
819
962
|
"""Extract content from URLs as clean markdown."""
|
|
963
|
+
if no_excerpts:
|
|
964
|
+
_emit_deprecation(
|
|
965
|
+
"--no-excerpts no longer disables excerpts server-side (V1 always returns them); "
|
|
966
|
+
"the flag now just strips them from the CLI output."
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
if len(urls) > 20:
|
|
970
|
+
raise click.UsageError(f"V1 extract accepts at most 20 URLs per request (got {len(urls)}).")
|
|
971
|
+
if objective is not None and len(objective) > 5000:
|
|
972
|
+
raise click.UsageError(f"--objective must be 5000 characters or fewer (got {len(objective)}).")
|
|
973
|
+
|
|
820
974
|
try:
|
|
821
975
|
from parallel import Parallel
|
|
822
976
|
|
|
@@ -825,30 +979,6 @@ def extract(
|
|
|
825
979
|
api_key = get_api_key()
|
|
826
980
|
client = Parallel(api_key=api_key, default_headers=get_default_headers("cli"))
|
|
827
981
|
|
|
828
|
-
extract_kwargs: dict[str, Any] = {
|
|
829
|
-
"urls": list(urls),
|
|
830
|
-
}
|
|
831
|
-
|
|
832
|
-
# Excerpt settings - can be bool or object with settings
|
|
833
|
-
if no_excerpts:
|
|
834
|
-
extract_kwargs["excerpts"] = False
|
|
835
|
-
elif excerpt_max_chars_per_result is not None or excerpt_max_chars_total is not None:
|
|
836
|
-
excerpts_settings: dict[str, Any] = {}
|
|
837
|
-
if excerpt_max_chars_per_result is not None:
|
|
838
|
-
excerpts_settings["max_chars_per_result"] = excerpt_max_chars_per_result
|
|
839
|
-
if excerpt_max_chars_total is not None:
|
|
840
|
-
excerpts_settings["max_chars_total"] = excerpt_max_chars_total
|
|
841
|
-
extract_kwargs["excerpts"] = excerpts_settings
|
|
842
|
-
else:
|
|
843
|
-
extract_kwargs["excerpts"] = True
|
|
844
|
-
|
|
845
|
-
# Full content settings - can be bool or object with settings
|
|
846
|
-
if full_content_max_chars is not None:
|
|
847
|
-
extract_kwargs["full_content"] = {"max_chars_per_result": full_content_max_chars}
|
|
848
|
-
else:
|
|
849
|
-
extract_kwargs["full_content"] = full_content
|
|
850
|
-
|
|
851
|
-
# Fetch policy
|
|
852
982
|
fetch_policy: dict[str, Any] = {}
|
|
853
983
|
if max_age_seconds is not None:
|
|
854
984
|
fetch_policy["max_age_seconds"] = max_age_seconds
|
|
@@ -856,23 +986,29 @@ def extract(
|
|
|
856
986
|
fetch_policy["timeout_seconds"] = timeout_seconds
|
|
857
987
|
if disable_cache_fallback:
|
|
858
988
|
fetch_policy["disable_cache_fallback"] = True
|
|
859
|
-
if fetch_policy:
|
|
860
|
-
extract_kwargs["fetch_policy"] = fetch_policy
|
|
861
989
|
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
990
|
+
extract_kwargs = build_extract_v1_kwargs(
|
|
991
|
+
urls=urls,
|
|
992
|
+
objective=objective,
|
|
993
|
+
query=query,
|
|
994
|
+
full_content=full_content,
|
|
995
|
+
full_content_max_chars=full_content_max_chars,
|
|
996
|
+
excerpt_max_chars_per_result=excerpt_max_chars_per_result,
|
|
997
|
+
excerpt_max_chars_total=excerpt_max_chars_total,
|
|
998
|
+
fetch_policy=fetch_policy or None,
|
|
999
|
+
session_id=session_id,
|
|
1000
|
+
client_model=client_model,
|
|
1001
|
+
)
|
|
866
1002
|
|
|
867
1003
|
if not output_json:
|
|
868
1004
|
console.print(f"[dim]Extracting content from {len(urls)} URL(s)...[/dim]\n")
|
|
869
1005
|
|
|
870
|
-
result = client.
|
|
1006
|
+
result = client.extract(**extract_kwargs)
|
|
871
1007
|
|
|
872
1008
|
results_list = []
|
|
873
1009
|
for r in result.results:
|
|
874
1010
|
result_dict: dict[str, Any] = {"url": r.url, "title": r.title, "publish_date": r.publish_date}
|
|
875
|
-
if hasattr(r, "excerpts") and r.excerpts:
|
|
1011
|
+
if not no_excerpts and hasattr(r, "excerpts") and r.excerpts:
|
|
876
1012
|
result_dict["excerpts"] = r.excerpts
|
|
877
1013
|
if hasattr(r, "full_content") and r.full_content:
|
|
878
1014
|
result_dict["full_content"] = r.full_content
|
|
@@ -892,9 +1028,11 @@ def extract(
|
|
|
892
1028
|
|
|
893
1029
|
output_data = {
|
|
894
1030
|
"extract_id": result.extract_id,
|
|
1031
|
+
"session_id": getattr(result, "session_id", None),
|
|
895
1032
|
"status": "ok",
|
|
896
1033
|
"results": results_list,
|
|
897
1034
|
"errors": errors_list,
|
|
1035
|
+
"usage": [{"name": u.name, "count": u.count} for u in (getattr(result, "usage", None) or [])],
|
|
898
1036
|
"warnings": [
|
|
899
1037
|
{"type": w.type, "message": w.message, "detail": getattr(w, "detail", None)} for w in result.warnings
|
|
900
1038
|
]
|
|
@@ -914,7 +1052,7 @@ def extract(
|
|
|
914
1052
|
console.print(f"[bold cyan]{r.title}[/bold cyan]")
|
|
915
1053
|
console.print(f"[link={r.url}]{r.url}[/link]\n")
|
|
916
1054
|
|
|
917
|
-
if hasattr(r, "excerpts") and r.excerpts:
|
|
1055
|
+
if not no_excerpts and hasattr(r, "excerpts") and r.excerpts:
|
|
918
1056
|
console.print("[dim]Excerpts:[/dim]")
|
|
919
1057
|
for excerpt in r.excerpts[:3]:
|
|
920
1058
|
text = excerpt[:300] + "..." if len(excerpt) > 300 else excerpt
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""Skills CLI commands for parallel-cli."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import NoReturn, Protocol
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HandleError(Protocol):
|
|
13
|
+
def __call__(
|
|
14
|
+
self,
|
|
15
|
+
error: Exception,
|
|
16
|
+
output_json: bool = False,
|
|
17
|
+
exit_code: int = 0,
|
|
18
|
+
prefix: str = "Error",
|
|
19
|
+
) -> NoReturn: ...
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def create_skills_group(
|
|
23
|
+
console: Console,
|
|
24
|
+
handle_error: HandleError,
|
|
25
|
+
exit_bad_input: int,
|
|
26
|
+
exit_api_error: int,
|
|
27
|
+
) -> click.Group:
|
|
28
|
+
"""Create the skills command group.
|
|
29
|
+
|
|
30
|
+
Keeps feature-specific command wiring out of ``commands.py`` while retaining
|
|
31
|
+
lazy imports of the underlying skills implementation.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
@click.group(name="skills")
|
|
35
|
+
def skills() -> None:
|
|
36
|
+
"""Install and manage Parallel agent skills.
|
|
37
|
+
|
|
38
|
+
Set GH_TOKEN for higher GitHub API rate limits when fetching skills.
|
|
39
|
+
"""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@skills.command(name="list")
|
|
43
|
+
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
44
|
+
def skills_list(output_json: bool) -> None:
|
|
45
|
+
"""List available Parallel skills from GitHub."""
|
|
46
|
+
from parallel_web_tools.core.skills import SkillsError, get_skills_repo_ref, list_remote_skills
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
ref = get_skills_repo_ref()
|
|
50
|
+
skill_names = list_remote_skills(ref=ref)
|
|
51
|
+
except SkillsError as e:
|
|
52
|
+
handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills list failed")
|
|
53
|
+
except Exception as e:
|
|
54
|
+
handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills list failed")
|
|
55
|
+
|
|
56
|
+
if output_json:
|
|
57
|
+
print(json.dumps({"ref": ref, "skills": skill_names, "count": len(skill_names)}, indent=2))
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
console.print("[bold]Available skills[/bold]")
|
|
61
|
+
console.print(f"Ref: [cyan]{ref}[/cyan]")
|
|
62
|
+
for skill_name in skill_names:
|
|
63
|
+
console.print(f"- [cyan]{skill_name}[/cyan]")
|
|
64
|
+
|
|
65
|
+
@skills.command(name="install")
|
|
66
|
+
@click.option(
|
|
67
|
+
"--project",
|
|
68
|
+
is_flag=True,
|
|
69
|
+
help="Install to .agents/skills in detected project root (default is global install).",
|
|
70
|
+
)
|
|
71
|
+
@click.option(
|
|
72
|
+
"--skill",
|
|
73
|
+
"skill_names",
|
|
74
|
+
multiple=True,
|
|
75
|
+
help="Skill name to install (repeatable). Defaults to all. Skills not listed will be removed.",
|
|
76
|
+
)
|
|
77
|
+
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
78
|
+
def skills_install(project: bool, skill_names: tuple[str, ...], output_json: bool) -> None:
|
|
79
|
+
"""Install Parallel skills from GitHub.
|
|
80
|
+
|
|
81
|
+
When --skill is provided, the managed install set is replaced with exactly
|
|
82
|
+
the listed skills.
|
|
83
|
+
"""
|
|
84
|
+
from parallel_web_tools.core.skills import (
|
|
85
|
+
SkillsError,
|
|
86
|
+
SkillsInputError,
|
|
87
|
+
SkillsInstallLocationError,
|
|
88
|
+
get_skills_repo_ref,
|
|
89
|
+
install_skills,
|
|
90
|
+
resolve_install_dir,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
install_dir = resolve_install_dir(project=project)
|
|
95
|
+
result = install_skills(
|
|
96
|
+
install_dir=install_dir,
|
|
97
|
+
selected_skills=list(skill_names) or None,
|
|
98
|
+
ref=get_skills_repo_ref(),
|
|
99
|
+
)
|
|
100
|
+
except SkillsInstallLocationError as e:
|
|
101
|
+
handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills install failed")
|
|
102
|
+
except SkillsInputError as e:
|
|
103
|
+
handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills install failed")
|
|
104
|
+
except SkillsError as e:
|
|
105
|
+
handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills install failed")
|
|
106
|
+
except Exception as e:
|
|
107
|
+
handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills install failed")
|
|
108
|
+
|
|
109
|
+
if output_json:
|
|
110
|
+
print(json.dumps(result, indent=2))
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
console.print("[bold green]Skills installed[/bold green]")
|
|
114
|
+
console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
|
|
115
|
+
console.print(f"Ref: [cyan]{result['ref']}[/cyan]")
|
|
116
|
+
console.print(f"Installed ({result['count']}): [cyan]{', '.join(result['installed_skills'])}[/cyan]")
|
|
117
|
+
|
|
118
|
+
@skills.command(name="uninstall")
|
|
119
|
+
@click.option(
|
|
120
|
+
"--project",
|
|
121
|
+
is_flag=True,
|
|
122
|
+
help="Uninstall from .agents/skills in detected project root (default is global install).",
|
|
123
|
+
)
|
|
124
|
+
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
125
|
+
def skills_uninstall(project: bool, output_json: bool) -> None:
|
|
126
|
+
"""Uninstall skills previously installed by parallel-cli."""
|
|
127
|
+
from parallel_web_tools.core.skills import SkillsInstallLocationError, resolve_install_dir, uninstall_skills
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
install_dir = resolve_install_dir(project=project)
|
|
131
|
+
result = uninstall_skills(install_dir=install_dir)
|
|
132
|
+
except SkillsInstallLocationError as e:
|
|
133
|
+
handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills uninstall failed")
|
|
134
|
+
except Exception as e:
|
|
135
|
+
handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills uninstall failed")
|
|
136
|
+
|
|
137
|
+
if output_json:
|
|
138
|
+
print(json.dumps(result, indent=2))
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
if result["count"] == 0:
|
|
142
|
+
console.print("[yellow]No managed skills found to uninstall[/yellow]")
|
|
143
|
+
console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
console.print("[bold green]Skills uninstalled[/bold green]")
|
|
147
|
+
console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
|
|
148
|
+
console.print(f"Removed ({result['count']}): [cyan]{', '.join(result['removed_skills'])}[/cyan]")
|
|
149
|
+
|
|
150
|
+
@skills.command(name="reinstall")
|
|
151
|
+
@click.option(
|
|
152
|
+
"--project",
|
|
153
|
+
is_flag=True,
|
|
154
|
+
help="Reinstall in .agents/skills in detected project root (default is global install).",
|
|
155
|
+
)
|
|
156
|
+
@click.option(
|
|
157
|
+
"--skill",
|
|
158
|
+
"skill_names",
|
|
159
|
+
multiple=True,
|
|
160
|
+
help="Skill name to reinstall (repeatable). Defaults to all. Skills not listed will be removed.",
|
|
161
|
+
)
|
|
162
|
+
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
163
|
+
def skills_reinstall(project: bool, skill_names: tuple[str, ...], output_json: bool) -> None:
|
|
164
|
+
"""Reinstall Parallel skills (uninstall managed set then install fresh).
|
|
165
|
+
|
|
166
|
+
When --skill is provided, the managed install set is replaced with exactly
|
|
167
|
+
the listed skills.
|
|
168
|
+
"""
|
|
169
|
+
from parallel_web_tools.core.skills import (
|
|
170
|
+
SkillsError,
|
|
171
|
+
SkillsInputError,
|
|
172
|
+
SkillsInstallLocationError,
|
|
173
|
+
get_skills_repo_ref,
|
|
174
|
+
reinstall_skills,
|
|
175
|
+
resolve_install_dir,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
install_dir = resolve_install_dir(project=project)
|
|
180
|
+
result = reinstall_skills(
|
|
181
|
+
install_dir=install_dir,
|
|
182
|
+
selected_skills=list(skill_names) or None,
|
|
183
|
+
ref=get_skills_repo_ref(),
|
|
184
|
+
)
|
|
185
|
+
except SkillsInstallLocationError as e:
|
|
186
|
+
handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills reinstall failed")
|
|
187
|
+
except SkillsInputError as e:
|
|
188
|
+
handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills reinstall failed")
|
|
189
|
+
except SkillsError as e:
|
|
190
|
+
handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills reinstall failed")
|
|
191
|
+
except Exception as e:
|
|
192
|
+
handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills reinstall failed")
|
|
193
|
+
|
|
194
|
+
if output_json:
|
|
195
|
+
print(json.dumps(result, indent=2))
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
console.print("[bold green]Skills reinstalled[/bold green]")
|
|
199
|
+
console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
|
|
200
|
+
console.print(f"Ref: [cyan]{result['ref']}[/cyan]")
|
|
201
|
+
console.print(f"Removed ({result['removed_count']}): [cyan]{', '.join(result['removed_skills'])}[/cyan]")
|
|
202
|
+
console.print(f"Installed ({result['installed_count']}): [cyan]{', '.join(result['installed_skills'])}[/cyan]")
|
|
203
|
+
|
|
204
|
+
return skills
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""Skill installation helpers for parallel-cli."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import shutil
|
|
9
|
+
import tempfile
|
|
10
|
+
import time
|
|
11
|
+
import zipfile
|
|
12
|
+
from collections.abc import Iterator
|
|
13
|
+
from contextlib import contextmanager
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from urllib.parse import quote
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
|
|
19
|
+
SKILLS_REPO_OWNER = "parallel-web"
|
|
20
|
+
SKILLS_REPO_NAME = "parallel-agent-skills"
|
|
21
|
+
SKILLS_REPO_SKILLS_PATH = "skills"
|
|
22
|
+
DEFAULT_SKILLS_REPO_REF = "main"
|
|
23
|
+
SKILLS_REPO_REF_ENV = "PARALLEL_SKILLS_REPO_REF"
|
|
24
|
+
GITHUB_TOKEN_ENV = "GH_TOKEN"
|
|
25
|
+
GLOBAL_SKILLS_DIR_ENV = "PARALLEL_SKILLS_GLOBAL_DIR"
|
|
26
|
+
|
|
27
|
+
PROJECT_ROOT_MARKERS = (".git", "pyproject.toml", "package.json")
|
|
28
|
+
MANIFEST_FILE_NAME = ".parallel-cli-skills-manifest.json"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SkillsError(Exception):
|
|
32
|
+
"""Base error for skills operations."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SkillsInstallLocationError(SkillsError):
|
|
36
|
+
"""Raised when a project-local install directory cannot be determined."""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SkillsDownloadError(SkillsError):
|
|
40
|
+
"""Raised when remote skills metadata or files cannot be fetched."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SkillsInputError(SkillsError):
|
|
44
|
+
"""Raised when caller-provided skill arguments are invalid."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_skills_repo_ref() -> str:
|
|
48
|
+
"""Return repository ref used for skill downloads."""
|
|
49
|
+
configured = os.environ.get(SKILLS_REPO_REF_ENV)
|
|
50
|
+
if configured and configured.strip():
|
|
51
|
+
return configured.strip()
|
|
52
|
+
return DEFAULT_SKILLS_REPO_REF
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_global_skills_dir() -> Path:
|
|
56
|
+
"""Return the global skills directory path."""
|
|
57
|
+
configured = os.environ.get(GLOBAL_SKILLS_DIR_ENV)
|
|
58
|
+
if configured:
|
|
59
|
+
return Path(configured).expanduser()
|
|
60
|
+
return Path.home() / ".agents" / "skills"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def find_project_root(start: Path | None = None) -> Path | None:
|
|
64
|
+
"""Find a project root by walking upward for known root markers."""
|
|
65
|
+
cursor = (start or Path.cwd()).resolve()
|
|
66
|
+
for candidate in (cursor, *cursor.parents):
|
|
67
|
+
for marker in PROJECT_ROOT_MARKERS:
|
|
68
|
+
if (candidate / marker).exists():
|
|
69
|
+
return candidate
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def resolve_install_dir(project: bool, start: Path | None = None) -> Path:
|
|
74
|
+
"""Resolve install directory for global or project-local skills."""
|
|
75
|
+
if not project:
|
|
76
|
+
return get_global_skills_dir()
|
|
77
|
+
|
|
78
|
+
root = find_project_root(start=start)
|
|
79
|
+
if root is None:
|
|
80
|
+
raise SkillsInstallLocationError(
|
|
81
|
+
"Could not determine project root from current directory. "
|
|
82
|
+
"Run this inside a project containing one of: .git, pyproject.toml, package.json."
|
|
83
|
+
)
|
|
84
|
+
return root / ".agents" / "skills"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _github_archive_url(ref: str) -> str:
|
|
88
|
+
encoded_ref = quote(ref, safe="")
|
|
89
|
+
return f"https://api.github.com/repos/{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}/zipball/{encoded_ref}"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _github_headers() -> dict[str, str]:
|
|
93
|
+
"""Build GitHub API headers for skills archive downloads."""
|
|
94
|
+
headers = {
|
|
95
|
+
"Accept": "application/vnd.github+json",
|
|
96
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
97
|
+
}
|
|
98
|
+
token = os.environ.get(GITHUB_TOKEN_ENV)
|
|
99
|
+
if token and token.strip():
|
|
100
|
+
headers["Authorization"] = f"Bearer {token.strip()}"
|
|
101
|
+
return headers
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _download_repo_archive(client: httpx.Client, ref: str) -> bytes:
|
|
105
|
+
# TODO: add retry/backoff for transient GitHub API failures (429/5xx).
|
|
106
|
+
response = client.get(_github_archive_url(ref))
|
|
107
|
+
if response.status_code >= 400:
|
|
108
|
+
raise SkillsDownloadError(
|
|
109
|
+
f"Failed to download skills archive at ref '{ref}' from "
|
|
110
|
+
f"{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}: HTTP {response.status_code}"
|
|
111
|
+
)
|
|
112
|
+
return response.content
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _extract_repo_archive(archive_bytes: bytes, dest_dir: Path) -> Path:
|
|
116
|
+
"""Extract a GitHub zipball into dest_dir and return the archive root."""
|
|
117
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
with zipfile.ZipFile(io.BytesIO(archive_bytes)) as zf:
|
|
121
|
+
root_name: str | None = None
|
|
122
|
+
|
|
123
|
+
for member in zf.infolist():
|
|
124
|
+
member_path = Path(member.filename)
|
|
125
|
+
parts = member_path.parts
|
|
126
|
+
if not parts:
|
|
127
|
+
continue
|
|
128
|
+
if parts[0] in ("", "/"):
|
|
129
|
+
raise SkillsDownloadError("Invalid archive entry path")
|
|
130
|
+
if any(part == ".." for part in parts):
|
|
131
|
+
raise SkillsDownloadError("Archive contains unsafe path traversal entry")
|
|
132
|
+
if root_name is None:
|
|
133
|
+
root_name = parts[0]
|
|
134
|
+
|
|
135
|
+
target = dest_dir / member_path
|
|
136
|
+
target_resolved = target.resolve()
|
|
137
|
+
dest_resolved = dest_dir.resolve()
|
|
138
|
+
if dest_resolved not in (target_resolved, *target_resolved.parents):
|
|
139
|
+
raise SkillsDownloadError("Archive extraction would escape destination directory")
|
|
140
|
+
|
|
141
|
+
if member.is_dir():
|
|
142
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
with zf.open(member) as src, target.open("wb") as dst:
|
|
147
|
+
shutil.copyfileobj(src, dst)
|
|
148
|
+
except zipfile.BadZipFile as e:
|
|
149
|
+
raise SkillsDownloadError("Failed to read downloaded skills archive") from e
|
|
150
|
+
|
|
151
|
+
if not root_name:
|
|
152
|
+
raise SkillsDownloadError("Downloaded skills archive was empty")
|
|
153
|
+
|
|
154
|
+
root = dest_dir / root_name
|
|
155
|
+
if not root.exists() or not root.is_dir():
|
|
156
|
+
raise SkillsDownloadError("Downloaded skills archive had no repository root directory")
|
|
157
|
+
return root
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@contextmanager
|
|
161
|
+
def _downloaded_repo_root(ref: str) -> Iterator[Path]:
|
|
162
|
+
with httpx.Client(timeout=30, follow_redirects=True, headers=_github_headers()) as client:
|
|
163
|
+
archive_bytes = _download_repo_archive(client, ref)
|
|
164
|
+
|
|
165
|
+
with tempfile.TemporaryDirectory(prefix="parallel-skills-") as tmpdir:
|
|
166
|
+
repo_root = _extract_repo_archive(archive_bytes, Path(tmpdir))
|
|
167
|
+
yield repo_root
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _skills_root(repo_root: Path) -> Path:
|
|
171
|
+
skills_root = repo_root / SKILLS_REPO_SKILLS_PATH
|
|
172
|
+
if not skills_root.exists() or not skills_root.is_dir():
|
|
173
|
+
raise SkillsDownloadError(
|
|
174
|
+
f"Downloaded repository does not contain a '{SKILLS_REPO_SKILLS_PATH}/' directory at the requested ref"
|
|
175
|
+
)
|
|
176
|
+
return skills_root
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _list_skills_from_repo_root(repo_root: Path) -> list[str]:
|
|
180
|
+
skills_root = _skills_root(repo_root)
|
|
181
|
+
return sorted(path.name for path in skills_root.iterdir() if path.is_dir())
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def list_remote_skills(ref: str | None = None) -> list[str]:
|
|
185
|
+
"""Return available skill directory names from the remote repository."""
|
|
186
|
+
resolved_ref = ref or get_skills_repo_ref()
|
|
187
|
+
with _downloaded_repo_root(resolved_ref) as repo_root:
|
|
188
|
+
return _list_skills_from_repo_root(repo_root)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _manifest_path(install_dir: Path) -> Path:
|
|
192
|
+
return install_dir / MANIFEST_FILE_NAME
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _write_manifest(install_dir: Path, ref: str, installed_skills: list[str]) -> None:
|
|
196
|
+
data = {
|
|
197
|
+
"repo": f"{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}",
|
|
198
|
+
"skills_path": SKILLS_REPO_SKILLS_PATH,
|
|
199
|
+
"ref": ref,
|
|
200
|
+
"installed_skills": sorted(installed_skills),
|
|
201
|
+
"installed_at": int(time.time()),
|
|
202
|
+
"managed_by": "parallel-cli",
|
|
203
|
+
}
|
|
204
|
+
install_dir.mkdir(parents=True, exist_ok=True)
|
|
205
|
+
_manifest_path(install_dir).write_text(json.dumps(data, indent=2))
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _read_manifest(install_dir: Path) -> dict:
|
|
209
|
+
path = _manifest_path(install_dir)
|
|
210
|
+
if not path.exists():
|
|
211
|
+
return {}
|
|
212
|
+
try:
|
|
213
|
+
data = json.loads(path.read_text())
|
|
214
|
+
except Exception:
|
|
215
|
+
return {}
|
|
216
|
+
return data if isinstance(data, dict) else {}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def install_skills(
|
|
220
|
+
install_dir: Path,
|
|
221
|
+
selected_skills: list[str] | None = None,
|
|
222
|
+
ref: str | None = None,
|
|
223
|
+
) -> dict:
|
|
224
|
+
"""Install selected (or all) skills into install_dir.
|
|
225
|
+
|
|
226
|
+
Only skills previously managed by parallel-cli are reconciled. Unmanaged skill
|
|
227
|
+
directories are left untouched.
|
|
228
|
+
"""
|
|
229
|
+
resolved_ref = ref or get_skills_repo_ref()
|
|
230
|
+
|
|
231
|
+
with _downloaded_repo_root(resolved_ref) as repo_root:
|
|
232
|
+
skills_root = _skills_root(repo_root)
|
|
233
|
+
available = _list_skills_from_repo_root(repo_root)
|
|
234
|
+
requested = sorted(set(selected_skills or available))
|
|
235
|
+
missing = sorted(name for name in requested if name not in available)
|
|
236
|
+
if missing:
|
|
237
|
+
raise SkillsInputError(
|
|
238
|
+
f"Unknown skills requested: {', '.join(missing)}. Available skills: {', '.join(available)}"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
manifest = _read_manifest(install_dir)
|
|
242
|
+
managed_raw = manifest.get("installed_skills")
|
|
243
|
+
previously_managed: list[str] = (
|
|
244
|
+
[name for name in managed_raw if isinstance(name, str)] if isinstance(managed_raw, list) else []
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
install_dir.mkdir(parents=True, exist_ok=True)
|
|
248
|
+
|
|
249
|
+
for skill_name in previously_managed:
|
|
250
|
+
if skill_name not in requested:
|
|
251
|
+
skill_dir = install_dir / skill_name
|
|
252
|
+
if skill_dir.exists() and skill_dir.is_dir():
|
|
253
|
+
shutil.rmtree(skill_dir)
|
|
254
|
+
|
|
255
|
+
for skill_name in requested:
|
|
256
|
+
skill_dir = install_dir / skill_name
|
|
257
|
+
if skill_dir.exists():
|
|
258
|
+
shutil.rmtree(skill_dir)
|
|
259
|
+
shutil.copytree(skills_root / skill_name, skill_dir)
|
|
260
|
+
|
|
261
|
+
_write_manifest(install_dir, resolved_ref, requested)
|
|
262
|
+
return {
|
|
263
|
+
"install_dir": str(install_dir),
|
|
264
|
+
"ref": resolved_ref,
|
|
265
|
+
"installed_skills": requested,
|
|
266
|
+
"count": len(requested),
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def uninstall_skills(install_dir: Path) -> dict:
|
|
271
|
+
"""Uninstall only manifest-managed skills from install_dir."""
|
|
272
|
+
manifest = _read_manifest(install_dir)
|
|
273
|
+
managed_raw = manifest.get("installed_skills")
|
|
274
|
+
managed: list[str] = (
|
|
275
|
+
[name for name in managed_raw if isinstance(name, str)] if isinstance(managed_raw, list) else []
|
|
276
|
+
)
|
|
277
|
+
removed: list[str] = []
|
|
278
|
+
|
|
279
|
+
for skill_name in managed:
|
|
280
|
+
skill_path = install_dir / skill_name
|
|
281
|
+
if skill_path.exists() and skill_path.is_dir():
|
|
282
|
+
shutil.rmtree(skill_path)
|
|
283
|
+
removed.append(skill_name)
|
|
284
|
+
|
|
285
|
+
manifest_path = _manifest_path(install_dir)
|
|
286
|
+
if manifest_path.exists():
|
|
287
|
+
manifest_path.unlink()
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
"install_dir": str(install_dir),
|
|
291
|
+
"removed_skills": sorted(removed),
|
|
292
|
+
"count": len(removed),
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def reinstall_skills(
|
|
297
|
+
install_dir: Path,
|
|
298
|
+
selected_skills: list[str] | None = None,
|
|
299
|
+
ref: str | None = None,
|
|
300
|
+
) -> dict:
|
|
301
|
+
"""Reinstall skills by uninstalling managed set then installing fresh."""
|
|
302
|
+
uninstall_result = uninstall_skills(install_dir)
|
|
303
|
+
install_result = install_skills(install_dir, selected_skills=selected_skills, ref=ref)
|
|
304
|
+
return {
|
|
305
|
+
"install_dir": install_result["install_dir"],
|
|
306
|
+
"ref": install_result["ref"],
|
|
307
|
+
"removed_skills": uninstall_result["removed_skills"],
|
|
308
|
+
"installed_skills": install_result["installed_skills"],
|
|
309
|
+
"removed_count": uninstall_result["count"],
|
|
310
|
+
"installed_count": install_result["count"],
|
|
311
|
+
}
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "parallel-web-tools"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0rc2"
|
|
8
8
|
description = "Parallel Tools: CLI and Python SDK for AI-powered web intelligence"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -92,12 +92,12 @@ all = [
|
|
|
92
92
|
# Development
|
|
93
93
|
dev = [
|
|
94
94
|
"parallel-web-tools[all,spark]",
|
|
95
|
-
"pytest>=
|
|
96
|
-
"pytest-cov>=
|
|
97
|
-
"pyinstaller>=6.
|
|
98
|
-
"pre-commit>=4.
|
|
99
|
-
"ruff>=0.
|
|
100
|
-
"ty>=0.0.
|
|
95
|
+
"pytest>=9.0.0",
|
|
96
|
+
"pytest-cov>=7.0.0",
|
|
97
|
+
"pyinstaller>=6.20.0",
|
|
98
|
+
"pre-commit>=4.6.0",
|
|
99
|
+
"ruff>=0.15.0",
|
|
100
|
+
"ty>=0.0.33",
|
|
101
101
|
]
|
|
102
102
|
|
|
103
103
|
[tool.hatch.build.targets.wheel]
|
|
@@ -162,6 +162,8 @@ known-first-party = ["parallel_web_tools"]
|
|
|
162
162
|
|
|
163
163
|
[dependency-groups]
|
|
164
164
|
dev = [
|
|
165
|
-
"ipykernel>=7.
|
|
166
|
-
"
|
|
165
|
+
"ipykernel>=7.2.0",
|
|
166
|
+
"pyinstaller>=6.20.0",
|
|
167
|
+
"tach>=0.34.1",
|
|
168
|
+
"ty>=0.0.33",
|
|
167
169
|
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/research.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/sql_utils.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/user_agent.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/utils.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/__init__.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/bigquery.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/csv.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/duckdb.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/json.py
RENAMED
|
File without changes
|