parallel-web-tools 0.2.0__tar.gz → 0.3.0rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/.gitignore +2 -0
  2. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/PKG-INFO +7 -7
  3. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/__init__.py +1 -1
  4. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/commands.py +208 -70
  5. parallel_web_tools-0.3.0rc2/parallel_web_tools/cli/skills.py +204 -0
  6. parallel_web_tools-0.3.0rc2/parallel_web_tools/core/skills.py +311 -0
  7. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/cloud_function/requirements.txt +1 -1
  8. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/pyproject.toml +11 -9
  9. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/README.md +0 -0
  10. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/__init__.py +0 -0
  11. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/planner.py +0 -0
  12. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/cli/updater.py +0 -0
  13. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/__init__.py +0 -0
  14. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/auth.py +0 -0
  15. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/batch.py +0 -0
  16. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/findall.py +0 -0
  17. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/monitor.py +0 -0
  18. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/polling.py +0 -0
  19. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/research.py +0 -0
  20. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/result.py +0 -0
  21. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/runner.py +0 -0
  22. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/schema.py +0 -0
  23. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/sql_utils.py +0 -0
  24. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/core/user_agent.py +0 -0
  25. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/__init__.py +0 -0
  26. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/__init__.py +0 -0
  27. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/cloud_function/main.py +0 -0
  28. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/deploy.py +0 -0
  29. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/bigquery/sql/create_functions.sql +0 -0
  30. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/duckdb/__init__.py +0 -0
  31. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/duckdb/batch.py +0 -0
  32. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/duckdb/findall.py +0 -0
  33. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/duckdb/udf.py +0 -0
  34. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/polars/__init__.py +0 -0
  35. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/polars/enrich.py +0 -0
  36. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/__init__.py +0 -0
  37. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/deploy.py +0 -0
  38. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/sql/01_setup.sql +0 -0
  39. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/sql/02_create_udf.sql +0 -0
  40. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/snowflake/sql/03_cleanup.sql +0 -0
  41. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/spark/__init__.py +0 -0
  42. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/spark/streaming.py +0 -0
  43. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/spark/udf.py +0 -0
  44. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/integrations/utils.py +0 -0
  45. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/__init__.py +0 -0
  46. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/bigquery.py +0 -0
  47. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/csv.py +0 -0
  48. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/duckdb.py +0 -0
  49. {parallel_web_tools-0.2.0 → parallel_web_tools-0.3.0rc2}/parallel_web_tools/processors/json.py +0 -0
@@ -12,6 +12,8 @@ wheels/
12
12
  # Environment files
13
13
  .env.local
14
14
  .env
15
+ .envrc
16
+ .direnv
15
17
 
16
18
  # Data files
17
19
  data/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parallel-web-tools
3
- Version: 0.2.0
3
+ Version: 0.3.0rc2
4
4
  Summary: Parallel Tools: CLI and Python SDK for AI-powered web intelligence
5
5
  Project-URL: Homepage, https://github.com/parallel-web/parallel-web-tools
6
6
  Project-URL: Documentation, https://docs.parallel.ai
@@ -51,19 +51,19 @@ Requires-Dist: duckdb>=1.0.0; extra == 'dev'
51
51
  Requires-Dist: nest-asyncio>=1.6.0; extra == 'dev'
52
52
  Requires-Dist: pandas>=2.3.0; extra == 'dev'
53
53
  Requires-Dist: polars>=1.37.0; extra == 'dev'
54
- Requires-Dist: pre-commit>=4.0.0; extra == 'dev'
54
+ Requires-Dist: pre-commit>=4.6.0; extra == 'dev'
55
55
  Requires-Dist: pyarrow>=18.0.0; extra == 'dev'
56
- Requires-Dist: pyinstaller>=6.0.0; extra == 'dev'
56
+ Requires-Dist: pyinstaller>=6.20.0; extra == 'dev'
57
57
  Requires-Dist: pyspark>=3.4.0; extra == 'dev'
58
- Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
59
- Requires-Dist: pytest>=8.0.0; extra == 'dev'
58
+ Requires-Dist: pytest-cov>=7.0.0; extra == 'dev'
59
+ Requires-Dist: pytest>=9.0.0; extra == 'dev'
60
60
  Requires-Dist: pyyaml>=6.0.0; extra == 'dev'
61
61
  Requires-Dist: questionary>=2.0.0; extra == 'dev'
62
- Requires-Dist: ruff>=0.14.0; extra == 'dev'
62
+ Requires-Dist: ruff>=0.15.0; extra == 'dev'
63
63
  Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'dev'
64
64
  Requires-Dist: sqlalchemy-bigquery>=1.11.0; extra == 'dev'
65
65
  Requires-Dist: sqlalchemy>=2.0.0; extra == 'dev'
66
- Requires-Dist: ty>=0.0.21; extra == 'dev'
66
+ Requires-Dist: ty>=0.0.33; extra == 'dev'
67
67
  Provides-Extra: duckdb
68
68
  Requires-Dist: duckdb>=1.0.0; extra == 'duckdb'
69
69
  Requires-Dist: nest-asyncio>=1.6.0; extra == 'duckdb'
@@ -29,7 +29,7 @@ from parallel_web_tools.core import (
29
29
  run_tasks,
30
30
  )
31
31
 
32
- __version__ = "0.2.0"
32
+ __version__ = "0.3.0rc2"
33
33
 
34
34
  __all__ = [
35
35
  # Auth
@@ -7,7 +7,7 @@ import os
7
7
  import sys
8
8
  import tempfile
9
9
  import time
10
- from typing import Any
10
+ from typing import Any, NoReturn
11
11
 
12
12
  import click
13
13
  import httpx
@@ -15,6 +15,7 @@ from dotenv import load_dotenv
15
15
  from rich.console import Console
16
16
 
17
17
  from parallel_web_tools import __version__
18
+ from parallel_web_tools.cli.skills import create_skills_group
18
19
  from parallel_web_tools.core import (
19
20
  AVAILABLE_PROCESSORS,
20
21
  FINDALL_GENERATORS,
@@ -170,7 +171,7 @@ def _handle_error(
170
171
  output_json: bool = False,
171
172
  exit_code: int = EXIT_API_ERROR,
172
173
  prefix: str = "Error",
173
- ) -> None:
174
+ ) -> NoReturn:
174
175
  """Handle an error with appropriate output format and exit code.
175
176
 
176
177
  In --json mode, outputs structured JSON to stdout. Otherwise, prints a
@@ -318,12 +319,12 @@ def parse_inline_data(data_json: str) -> tuple[str, list[dict[str, str]]]:
318
319
  raise click.BadParameter("Data must be an array of objects")
319
320
 
320
321
  # Infer columns from the first row
321
- columns = list(data[0].keys())
322
+ columns: list[str] = [str(k) for k in data[0].keys()]
322
323
  if not columns:
323
324
  raise click.BadParameter("Data objects must have at least one field")
324
325
 
325
326
  # Create source_columns with inferred descriptions
326
- source_columns = [{"name": col, "description": f"The {col} field"} for col in columns]
327
+ source_columns: list[dict[str, str]] = [{"name": col, "description": f"The {col} field"} for col in columns]
327
328
 
328
329
  # Write to a temporary CSV file
329
330
  temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False, newline="")
@@ -647,39 +648,116 @@ def config_cmd(key: str | None, value: str | None, output_json: bool):
647
648
  console.print(f"[green]Set {key} = {format_bool(is_auto_update_check_enabled())}[/green]")
648
649
 
649
650
 
651
+ main.add_command(create_skills_group(console, _handle_error, EXIT_BAD_INPUT, EXIT_API_ERROR))
652
+
653
+
650
654
  # =============================================================================
651
655
  # Search Command
652
656
  # =============================================================================
653
657
 
658
+ # Beta -> V1 mode mapping. Beta had three modes; V1 has two. We keep the old
659
+ # values as accepted CLI inputs and translate them so existing scripts work.
660
+ _SEARCH_MODE_MAP = {
661
+ "fast": "basic",
662
+ "one-shot": "basic",
663
+ "agentic": "advanced",
664
+ "basic": "basic",
665
+ "advanced": "advanced",
666
+ }
667
+ _DEPRECATED_SEARCH_MODES = {"fast", "one-shot", "agentic"}
668
+
669
+
670
+ def _emit_deprecation(message: str) -> None:
671
+ """Print a deprecation notice to stderr so it doesn't pollute --json output."""
672
+ click.echo(f"[deprecated] {message}", err=True)
673
+
674
+
675
+ def build_search_v1_kwargs(
676
+ *,
677
+ objective: str | None,
678
+ query: tuple[str, ...] | list[str],
679
+ mode: str | None,
680
+ max_results: int | None,
681
+ source_policy: dict[str, Any] | None,
682
+ excerpt_max_chars_per_result: int | None,
683
+ excerpt_max_chars_total: int | None,
684
+ fetch_policy: dict[str, Any] | None,
685
+ location: str | None = None,
686
+ session_id: str | None = None,
687
+ client_model: str | None = None,
688
+ ) -> dict[str, Any]:
689
+ """Translate Beta-style search params to V1 client.search() kwargs.
690
+
691
+ V1 requires search_queries; if the caller only provided an objective, we
692
+ fall back to using it as the single query so older invocations keep working.
693
+ """
694
+ queries = list(query) if query else []
695
+ if not queries and objective:
696
+ queries = [objective]
697
+
698
+ kwargs: dict[str, Any] = {"search_queries": queries}
699
+ if objective:
700
+ kwargs["objective"] = objective
701
+ if mode:
702
+ kwargs["mode"] = _SEARCH_MODE_MAP.get(mode, mode)
703
+ if excerpt_max_chars_total is not None:
704
+ kwargs["max_chars_total"] = excerpt_max_chars_total
705
+ if session_id:
706
+ kwargs["session_id"] = session_id
707
+ if client_model:
708
+ kwargs["client_model"] = client_model
709
+
710
+ advanced: dict[str, Any] = {}
711
+ if max_results is not None:
712
+ advanced["max_results"] = max_results
713
+ if source_policy:
714
+ advanced["source_policy"] = source_policy
715
+ if fetch_policy:
716
+ advanced["fetch_policy"] = fetch_policy
717
+ if excerpt_max_chars_per_result is not None:
718
+ advanced["excerpt_settings"] = {"max_chars_per_result": excerpt_max_chars_per_result}
719
+ if location:
720
+ advanced["location"] = location
721
+ if advanced:
722
+ kwargs["advanced_settings"] = advanced
723
+
724
+ return kwargs
725
+
654
726
 
655
727
  @main.command()
656
728
  @click.argument("objective", required=False)
657
729
  @click.option("-q", "--query", multiple=True, help="Keyword search query (can be repeated)")
658
730
  @click.option(
659
731
  "--mode",
660
- type=click.Choice(["one-shot", "agentic", "fast"]),
661
- default="fast",
662
- help="Search mode",
732
+ type=click.Choice(list(_SEARCH_MODE_MAP.keys())),
733
+ default="basic",
734
+ help="Search mode (one-shot/fast → basic, agentic → advanced)",
663
735
  show_default=True,
664
736
  )
665
- @click.option("--max-results", type=int, default=10, help="Maximum results", show_default=True)
737
+ @click.option("--max-results", type=int, help="Maximum results (defaults to server-side default of 10)")
666
738
  @click.option("--include-domains", multiple=True, help="Only search these domains (comma-separated or repeated)")
667
739
  @click.option("--exclude-domains", multiple=True, help="Exclude these domains (comma-separated or repeated)")
668
740
  @click.option("--after-date", help="Only results after this date (YYYY-MM-DD)")
669
- @click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts")
741
+ @click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts (min 1000)")
670
742
  @click.option(
671
743
  "--excerpt-max-chars-total", type=int, default=60000, help="Max total characters for excerpts", show_default=True
672
744
  )
673
745
  @click.option("--max-age-seconds", type=int, help="Max age in seconds before fetching live content (min 600)")
674
746
  @click.option("--timeout-seconds", type=float, help="Timeout in seconds for fetching live content")
675
747
  @click.option("--disable-cache-fallback", is_flag=True, help="Return error instead of stale cached content")
748
+ @click.option("--location", help="ISO 3166-1 alpha-2 country code for geo-targeted results (e.g. us, gb, de)")
749
+ @click.option("--session-id", help="Session ID to group related search/extract calls")
750
+ @click.option(
751
+ "--client-model",
752
+ help="The model generating this request and consuming the results (e.g. claude-opus-4-7, gpt-5.4, gemini-3.1-pro)",
753
+ )
676
754
  @click.option("-o", "--output", "output_file", type=click.Path(), help="Save results to file (JSON)")
677
755
  @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
678
756
  def search(
679
757
  objective: str | None,
680
758
  query: tuple[str, ...],
681
759
  mode: str,
682
- max_results: int,
760
+ max_results: int | None,
683
761
  include_domains: tuple[str, ...],
684
762
  exclude_domains: tuple[str, ...],
685
763
  after_date: str | None,
@@ -688,6 +766,9 @@ def search(
688
766
  max_age_seconds: int | None,
689
767
  timeout_seconds: float | None,
690
768
  disable_cache_fallback: bool,
769
+ location: str | None,
770
+ session_id: str | None,
771
+ client_model: str | None,
691
772
  output_file: str | None,
692
773
  output_json: bool,
693
774
  ):
@@ -704,6 +785,24 @@ def search(
704
785
  if not objective and not query:
705
786
  raise click.UsageError("Provide an OBJECTIVE argument or at least one --query option.")
706
787
 
788
+ if mode in _DEPRECATED_SEARCH_MODES:
789
+ new_mode = _SEARCH_MODE_MAP[mode]
790
+ _emit_deprecation(
791
+ f"--mode {mode} is a Beta value and will stop working after the Beta API sunset (June 2026). "
792
+ f"Use --mode {new_mode} instead."
793
+ )
794
+
795
+ source_policy: dict[str, Any] = {}
796
+ if include_domains:
797
+ source_policy["include_domains"] = parse_comma_separated(include_domains)
798
+ if exclude_domains:
799
+ source_policy["exclude_domains"] = parse_comma_separated(exclude_domains)
800
+ domain_total = len(source_policy.get("include_domains", [])) + len(source_policy.get("exclude_domains", []))
801
+ if domain_total > 200:
802
+ raise click.UsageError(f"--include-domains and --exclude-domains combined must be <= 200 (got {domain_total}).")
803
+ if after_date:
804
+ source_policy["after_date"] = after_date
805
+
707
806
  try:
708
807
  from parallel import Parallel
709
808
 
@@ -712,29 +811,6 @@ def search(
712
811
  api_key = get_api_key()
713
812
  client = Parallel(api_key=api_key, default_headers=get_default_headers("cli"))
714
813
 
715
- search_kwargs: dict[str, Any] = {"mode": mode, "max_results": max_results}
716
- if objective:
717
- search_kwargs["objective"] = objective
718
- if query:
719
- search_kwargs["search_queries"] = list(query)
720
-
721
- source_policy: dict[str, Any] = {}
722
- if include_domains:
723
- source_policy["include_domains"] = parse_comma_separated(include_domains)
724
- if exclude_domains:
725
- source_policy["exclude_domains"] = parse_comma_separated(exclude_domains)
726
- if after_date:
727
- source_policy["after_date"] = after_date
728
- if source_policy:
729
- search_kwargs["source_policy"] = source_policy
730
-
731
- # Excerpt settings (max_chars_total has a default, so always set)
732
- excerpts_settings: dict[str, Any] = {"max_chars_total": excerpt_max_chars_total}
733
- if excerpt_max_chars_per_result is not None:
734
- excerpts_settings["max_chars_per_result"] = excerpt_max_chars_per_result
735
- search_kwargs["excerpts"] = excerpts_settings
736
-
737
- # Fetch policy
738
814
  fetch_policy: dict[str, Any] = {}
739
815
  if max_age_seconds is not None:
740
816
  fetch_policy["max_age_seconds"] = max_age_seconds
@@ -742,21 +818,35 @@ def search(
742
818
  fetch_policy["timeout_seconds"] = timeout_seconds
743
819
  if disable_cache_fallback:
744
820
  fetch_policy["disable_cache_fallback"] = True
745
- if fetch_policy:
746
- search_kwargs["fetch_policy"] = fetch_policy
821
+
822
+ search_kwargs = build_search_v1_kwargs(
823
+ objective=objective,
824
+ query=query,
825
+ mode=mode,
826
+ max_results=max_results,
827
+ source_policy=source_policy or None,
828
+ excerpt_max_chars_per_result=excerpt_max_chars_per_result,
829
+ excerpt_max_chars_total=excerpt_max_chars_total,
830
+ fetch_policy=fetch_policy or None,
831
+ location=location,
832
+ session_id=session_id,
833
+ client_model=client_model,
834
+ )
747
835
 
748
836
  if not output_json:
749
837
  console.print("[dim]Searching...[/dim]\n")
750
838
 
751
- result = client.beta.search(**search_kwargs)
839
+ result = client.search(**search_kwargs)
752
840
 
753
841
  output_data = {
754
842
  "search_id": result.search_id,
843
+ "session_id": getattr(result, "session_id", None),
755
844
  "status": "ok",
756
845
  "results": [
757
846
  {"url": r.url, "title": r.title, "publish_date": r.publish_date, "excerpts": r.excerpts}
758
847
  for r in result.results
759
848
  ],
849
+ "usage": [{"name": u.name, "count": u.count} for u in (getattr(result, "usage", None) or [])],
760
850
  "warnings": [
761
851
  {"type": w.type, "message": w.message, "detail": getattr(w, "detail", None)} for w in result.warnings
762
852
  ]
@@ -787,18 +877,69 @@ def search(
787
877
  # =============================================================================
788
878
 
789
879
 
880
+ def build_extract_v1_kwargs(
881
+ *,
882
+ urls: tuple[str, ...] | list[str],
883
+ objective: str | None,
884
+ query: tuple[str, ...] | list[str],
885
+ full_content: bool,
886
+ full_content_max_chars: int | None,
887
+ excerpt_max_chars_per_result: int | None,
888
+ excerpt_max_chars_total: int | None,
889
+ fetch_policy: dict[str, Any] | None,
890
+ session_id: str | None = None,
891
+ client_model: str | None = None,
892
+ ) -> dict[str, Any]:
893
+ """Translate Beta-style extract params to V1 client.extract() kwargs.
894
+
895
+ Note: V1 always returns excerpts; the old `--no-excerpts` flag can no longer
896
+ disable them server-side. The CLI handles that flag by filtering excerpts out
897
+ of the output, not by passing it to the SDK.
898
+ """
899
+ kwargs: dict[str, Any] = {"urls": list(urls)}
900
+ if objective:
901
+ kwargs["objective"] = objective
902
+ if query:
903
+ kwargs["search_queries"] = list(query)
904
+ if excerpt_max_chars_total is not None:
905
+ kwargs["max_chars_total"] = excerpt_max_chars_total
906
+ if session_id:
907
+ kwargs["session_id"] = session_id
908
+ if client_model:
909
+ kwargs["client_model"] = client_model
910
+
911
+ advanced: dict[str, Any] = {}
912
+ if excerpt_max_chars_per_result is not None:
913
+ advanced["excerpt_settings"] = {"max_chars_per_result": excerpt_max_chars_per_result}
914
+ if full_content_max_chars is not None:
915
+ advanced["full_content"] = {"max_chars_per_result": full_content_max_chars}
916
+ elif full_content:
917
+ advanced["full_content"] = True
918
+ if fetch_policy:
919
+ advanced["fetch_policy"] = fetch_policy
920
+ if advanced:
921
+ kwargs["advanced_settings"] = advanced
922
+
923
+ return kwargs
924
+
925
+
790
926
  @main.command()
791
927
  @click.argument("urls", nargs=-1, required=True)
792
928
  @click.option("--objective", help="Focus extraction on a specific goal")
793
929
  @click.option("-q", "--query", multiple=True, help="Keywords to prioritize (can be repeated)")
794
930
  @click.option("--full-content", is_flag=True, help="Include complete page content")
795
931
  @click.option("--full-content-max-chars", type=int, help="Max characters per result for full content")
796
- @click.option("--no-excerpts", is_flag=True, help="Exclude excerpts from output")
932
+ @click.option("--no-excerpts", is_flag=True, help="Strip excerpts from output (V1 always returns them server-side)")
797
933
  @click.option("--excerpt-max-chars-per-result", type=int, help="Max characters per result for excerpts (min 1000)")
798
934
  @click.option("--excerpt-max-chars-total", type=int, help="Max total characters for excerpts across all URLs")
799
935
  @click.option("--max-age-seconds", type=int, help="Max age in seconds before fetching live content (min 600)")
800
936
  @click.option("--timeout-seconds", type=float, help="Timeout in seconds for fetching live content")
801
937
  @click.option("--disable-cache-fallback", is_flag=True, help="Return error instead of stale cached content")
938
+ @click.option("--session-id", help="Session ID to group related search/extract calls")
939
+ @click.option(
940
+ "--client-model",
941
+ help="The model generating this request and consuming the results (e.g. claude-opus-4-7, gpt-5.4, gemini-3.1-pro)",
942
+ )
802
943
  @click.option("-o", "--output", "output_file", type=click.Path(), help="Save results to file (JSON)")
803
944
  @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
804
945
  def extract(
@@ -813,10 +954,23 @@ def extract(
813
954
  max_age_seconds: int | None,
814
955
  timeout_seconds: float | None,
815
956
  disable_cache_fallback: bool,
957
+ session_id: str | None,
958
+ client_model: str | None,
816
959
  output_file: str | None,
817
960
  output_json: bool,
818
961
  ):
819
962
  """Extract content from URLs as clean markdown."""
963
+ if no_excerpts:
964
+ _emit_deprecation(
965
+ "--no-excerpts no longer disables excerpts server-side (V1 always returns them); "
966
+ "the flag now just strips them from the CLI output."
967
+ )
968
+
969
+ if len(urls) > 20:
970
+ raise click.UsageError(f"V1 extract accepts at most 20 URLs per request (got {len(urls)}).")
971
+ if objective is not None and len(objective) > 5000:
972
+ raise click.UsageError(f"--objective must be 5000 characters or fewer (got {len(objective)}).")
973
+
820
974
  try:
821
975
  from parallel import Parallel
822
976
 
@@ -825,30 +979,6 @@ def extract(
825
979
  api_key = get_api_key()
826
980
  client = Parallel(api_key=api_key, default_headers=get_default_headers("cli"))
827
981
 
828
- extract_kwargs: dict[str, Any] = {
829
- "urls": list(urls),
830
- }
831
-
832
- # Excerpt settings - can be bool or object with settings
833
- if no_excerpts:
834
- extract_kwargs["excerpts"] = False
835
- elif excerpt_max_chars_per_result is not None or excerpt_max_chars_total is not None:
836
- excerpts_settings: dict[str, Any] = {}
837
- if excerpt_max_chars_per_result is not None:
838
- excerpts_settings["max_chars_per_result"] = excerpt_max_chars_per_result
839
- if excerpt_max_chars_total is not None:
840
- excerpts_settings["max_chars_total"] = excerpt_max_chars_total
841
- extract_kwargs["excerpts"] = excerpts_settings
842
- else:
843
- extract_kwargs["excerpts"] = True
844
-
845
- # Full content settings - can be bool or object with settings
846
- if full_content_max_chars is not None:
847
- extract_kwargs["full_content"] = {"max_chars_per_result": full_content_max_chars}
848
- else:
849
- extract_kwargs["full_content"] = full_content
850
-
851
- # Fetch policy
852
982
  fetch_policy: dict[str, Any] = {}
853
983
  if max_age_seconds is not None:
854
984
  fetch_policy["max_age_seconds"] = max_age_seconds
@@ -856,23 +986,29 @@ def extract(
856
986
  fetch_policy["timeout_seconds"] = timeout_seconds
857
987
  if disable_cache_fallback:
858
988
  fetch_policy["disable_cache_fallback"] = True
859
- if fetch_policy:
860
- extract_kwargs["fetch_policy"] = fetch_policy
861
989
 
862
- if objective:
863
- extract_kwargs["objective"] = objective
864
- if query:
865
- extract_kwargs["search_queries"] = list(query)
990
+ extract_kwargs = build_extract_v1_kwargs(
991
+ urls=urls,
992
+ objective=objective,
993
+ query=query,
994
+ full_content=full_content,
995
+ full_content_max_chars=full_content_max_chars,
996
+ excerpt_max_chars_per_result=excerpt_max_chars_per_result,
997
+ excerpt_max_chars_total=excerpt_max_chars_total,
998
+ fetch_policy=fetch_policy or None,
999
+ session_id=session_id,
1000
+ client_model=client_model,
1001
+ )
866
1002
 
867
1003
  if not output_json:
868
1004
  console.print(f"[dim]Extracting content from {len(urls)} URL(s)...[/dim]\n")
869
1005
 
870
- result = client.beta.extract(**extract_kwargs)
1006
+ result = client.extract(**extract_kwargs)
871
1007
 
872
1008
  results_list = []
873
1009
  for r in result.results:
874
1010
  result_dict: dict[str, Any] = {"url": r.url, "title": r.title, "publish_date": r.publish_date}
875
- if hasattr(r, "excerpts") and r.excerpts:
1011
+ if not no_excerpts and hasattr(r, "excerpts") and r.excerpts:
876
1012
  result_dict["excerpts"] = r.excerpts
877
1013
  if hasattr(r, "full_content") and r.full_content:
878
1014
  result_dict["full_content"] = r.full_content
@@ -892,9 +1028,11 @@ def extract(
892
1028
 
893
1029
  output_data = {
894
1030
  "extract_id": result.extract_id,
1031
+ "session_id": getattr(result, "session_id", None),
895
1032
  "status": "ok",
896
1033
  "results": results_list,
897
1034
  "errors": errors_list,
1035
+ "usage": [{"name": u.name, "count": u.count} for u in (getattr(result, "usage", None) or [])],
898
1036
  "warnings": [
899
1037
  {"type": w.type, "message": w.message, "detail": getattr(w, "detail", None)} for w in result.warnings
900
1038
  ]
@@ -914,7 +1052,7 @@ def extract(
914
1052
  console.print(f"[bold cyan]{r.title}[/bold cyan]")
915
1053
  console.print(f"[link={r.url}]{r.url}[/link]\n")
916
1054
 
917
- if hasattr(r, "excerpts") and r.excerpts:
1055
+ if not no_excerpts and hasattr(r, "excerpts") and r.excerpts:
918
1056
  console.print("[dim]Excerpts:[/dim]")
919
1057
  for excerpt in r.excerpts[:3]:
920
1058
  text = excerpt[:300] + "..." if len(excerpt) > 300 else excerpt
@@ -0,0 +1,204 @@
1
+ """Skills CLI commands for parallel-cli."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import NoReturn, Protocol
7
+
8
+ import click
9
+ from rich.console import Console
10
+
11
+
12
+ class HandleError(Protocol):
13
+ def __call__(
14
+ self,
15
+ error: Exception,
16
+ output_json: bool = False,
17
+ exit_code: int = 0,
18
+ prefix: str = "Error",
19
+ ) -> NoReturn: ...
20
+
21
+
22
+ def create_skills_group(
23
+ console: Console,
24
+ handle_error: HandleError,
25
+ exit_bad_input: int,
26
+ exit_api_error: int,
27
+ ) -> click.Group:
28
+ """Create the skills command group.
29
+
30
+ Keeps feature-specific command wiring out of ``commands.py`` while retaining
31
+ lazy imports of the underlying skills implementation.
32
+ """
33
+
34
+ @click.group(name="skills")
35
+ def skills() -> None:
36
+ """Install and manage Parallel agent skills.
37
+
38
+ Set GH_TOKEN for higher GitHub API rate limits when fetching skills.
39
+ """
40
+ pass
41
+
42
+ @skills.command(name="list")
43
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
44
+ def skills_list(output_json: bool) -> None:
45
+ """List available Parallel skills from GitHub."""
46
+ from parallel_web_tools.core.skills import SkillsError, get_skills_repo_ref, list_remote_skills
47
+
48
+ try:
49
+ ref = get_skills_repo_ref()
50
+ skill_names = list_remote_skills(ref=ref)
51
+ except SkillsError as e:
52
+ handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills list failed")
53
+ except Exception as e:
54
+ handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills list failed")
55
+
56
+ if output_json:
57
+ print(json.dumps({"ref": ref, "skills": skill_names, "count": len(skill_names)}, indent=2))
58
+ return
59
+
60
+ console.print("[bold]Available skills[/bold]")
61
+ console.print(f"Ref: [cyan]{ref}[/cyan]")
62
+ for skill_name in skill_names:
63
+ console.print(f"- [cyan]{skill_name}[/cyan]")
64
+
65
+ @skills.command(name="install")
66
+ @click.option(
67
+ "--project",
68
+ is_flag=True,
69
+ help="Install to .agents/skills in detected project root (default is global install).",
70
+ )
71
+ @click.option(
72
+ "--skill",
73
+ "skill_names",
74
+ multiple=True,
75
+ help="Skill name to install (repeatable). Defaults to all. Skills not listed will be removed.",
76
+ )
77
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
78
+ def skills_install(project: bool, skill_names: tuple[str, ...], output_json: bool) -> None:
79
+ """Install Parallel skills from GitHub.
80
+
81
+ When --skill is provided, the managed install set is replaced with exactly
82
+ the listed skills.
83
+ """
84
+ from parallel_web_tools.core.skills import (
85
+ SkillsError,
86
+ SkillsInputError,
87
+ SkillsInstallLocationError,
88
+ get_skills_repo_ref,
89
+ install_skills,
90
+ resolve_install_dir,
91
+ )
92
+
93
+ try:
94
+ install_dir = resolve_install_dir(project=project)
95
+ result = install_skills(
96
+ install_dir=install_dir,
97
+ selected_skills=list(skill_names) or None,
98
+ ref=get_skills_repo_ref(),
99
+ )
100
+ except SkillsInstallLocationError as e:
101
+ handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills install failed")
102
+ except SkillsInputError as e:
103
+ handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills install failed")
104
+ except SkillsError as e:
105
+ handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills install failed")
106
+ except Exception as e:
107
+ handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills install failed")
108
+
109
+ if output_json:
110
+ print(json.dumps(result, indent=2))
111
+ return
112
+
113
+ console.print("[bold green]Skills installed[/bold green]")
114
+ console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
115
+ console.print(f"Ref: [cyan]{result['ref']}[/cyan]")
116
+ console.print(f"Installed ({result['count']}): [cyan]{', '.join(result['installed_skills'])}[/cyan]")
117
+
118
+ @skills.command(name="uninstall")
119
+ @click.option(
120
+ "--project",
121
+ is_flag=True,
122
+ help="Uninstall from .agents/skills in detected project root (default is global install).",
123
+ )
124
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
125
+ def skills_uninstall(project: bool, output_json: bool) -> None:
126
+ """Uninstall skills previously installed by parallel-cli."""
127
+ from parallel_web_tools.core.skills import SkillsInstallLocationError, resolve_install_dir, uninstall_skills
128
+
129
+ try:
130
+ install_dir = resolve_install_dir(project=project)
131
+ result = uninstall_skills(install_dir=install_dir)
132
+ except SkillsInstallLocationError as e:
133
+ handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills uninstall failed")
134
+ except Exception as e:
135
+ handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills uninstall failed")
136
+
137
+ if output_json:
138
+ print(json.dumps(result, indent=2))
139
+ return
140
+
141
+ if result["count"] == 0:
142
+ console.print("[yellow]No managed skills found to uninstall[/yellow]")
143
+ console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
144
+ return
145
+
146
+ console.print("[bold green]Skills uninstalled[/bold green]")
147
+ console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
148
+ console.print(f"Removed ({result['count']}): [cyan]{', '.join(result['removed_skills'])}[/cyan]")
149
+
150
+ @skills.command(name="reinstall")
151
+ @click.option(
152
+ "--project",
153
+ is_flag=True,
154
+ help="Reinstall in .agents/skills in detected project root (default is global install).",
155
+ )
156
+ @click.option(
157
+ "--skill",
158
+ "skill_names",
159
+ multiple=True,
160
+ help="Skill name to reinstall (repeatable). Defaults to all. Skills not listed will be removed.",
161
+ )
162
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
163
+ def skills_reinstall(project: bool, skill_names: tuple[str, ...], output_json: bool) -> None:
164
+ """Reinstall Parallel skills (uninstall managed set then install fresh).
165
+
166
+ When --skill is provided, the managed install set is replaced with exactly
167
+ the listed skills.
168
+ """
169
+ from parallel_web_tools.core.skills import (
170
+ SkillsError,
171
+ SkillsInputError,
172
+ SkillsInstallLocationError,
173
+ get_skills_repo_ref,
174
+ reinstall_skills,
175
+ resolve_install_dir,
176
+ )
177
+
178
+ try:
179
+ install_dir = resolve_install_dir(project=project)
180
+ result = reinstall_skills(
181
+ install_dir=install_dir,
182
+ selected_skills=list(skill_names) or None,
183
+ ref=get_skills_repo_ref(),
184
+ )
185
+ except SkillsInstallLocationError as e:
186
+ handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills reinstall failed")
187
+ except SkillsInputError as e:
188
+ handle_error(e, output_json=output_json, exit_code=exit_bad_input, prefix="Skills reinstall failed")
189
+ except SkillsError as e:
190
+ handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills reinstall failed")
191
+ except Exception as e:
192
+ handle_error(e, output_json=output_json, exit_code=exit_api_error, prefix="Skills reinstall failed")
193
+
194
+ if output_json:
195
+ print(json.dumps(result, indent=2))
196
+ return
197
+
198
+ console.print("[bold green]Skills reinstalled[/bold green]")
199
+ console.print(f"Location: [cyan]{result['install_dir']}[/cyan]")
200
+ console.print(f"Ref: [cyan]{result['ref']}[/cyan]")
201
+ console.print(f"Removed ({result['removed_count']}): [cyan]{', '.join(result['removed_skills'])}[/cyan]")
202
+ console.print(f"Installed ({result['installed_count']}): [cyan]{', '.join(result['installed_skills'])}[/cyan]")
203
+
204
+ return skills
@@ -0,0 +1,311 @@
1
+ """Skill installation helpers for parallel-cli."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import json
7
+ import os
8
+ import shutil
9
+ import tempfile
10
+ import time
11
+ import zipfile
12
+ from collections.abc import Iterator
13
+ from contextlib import contextmanager
14
+ from pathlib import Path
15
+ from urllib.parse import quote
16
+
17
+ import httpx
18
+
19
+ SKILLS_REPO_OWNER = "parallel-web"
20
+ SKILLS_REPO_NAME = "parallel-agent-skills"
21
+ SKILLS_REPO_SKILLS_PATH = "skills"
22
+ DEFAULT_SKILLS_REPO_REF = "main"
23
+ SKILLS_REPO_REF_ENV = "PARALLEL_SKILLS_REPO_REF"
24
+ GITHUB_TOKEN_ENV = "GH_TOKEN"
25
+ GLOBAL_SKILLS_DIR_ENV = "PARALLEL_SKILLS_GLOBAL_DIR"
26
+
27
+ PROJECT_ROOT_MARKERS = (".git", "pyproject.toml", "package.json")
28
+ MANIFEST_FILE_NAME = ".parallel-cli-skills-manifest.json"
29
+
30
+
31
+ class SkillsError(Exception):
32
+ """Base error for skills operations."""
33
+
34
+
35
+ class SkillsInstallLocationError(SkillsError):
36
+ """Raised when a project-local install directory cannot be determined."""
37
+
38
+
39
+ class SkillsDownloadError(SkillsError):
40
+ """Raised when remote skills metadata or files cannot be fetched."""
41
+
42
+
43
+ class SkillsInputError(SkillsError):
44
+ """Raised when caller-provided skill arguments are invalid."""
45
+
46
+
47
+ def get_skills_repo_ref() -> str:
48
+ """Return repository ref used for skill downloads."""
49
+ configured = os.environ.get(SKILLS_REPO_REF_ENV)
50
+ if configured and configured.strip():
51
+ return configured.strip()
52
+ return DEFAULT_SKILLS_REPO_REF
53
+
54
+
55
+ def get_global_skills_dir() -> Path:
56
+ """Return the global skills directory path."""
57
+ configured = os.environ.get(GLOBAL_SKILLS_DIR_ENV)
58
+ if configured:
59
+ return Path(configured).expanduser()
60
+ return Path.home() / ".agents" / "skills"
61
+
62
+
63
+ def find_project_root(start: Path | None = None) -> Path | None:
64
+ """Find a project root by walking upward for known root markers."""
65
+ cursor = (start or Path.cwd()).resolve()
66
+ for candidate in (cursor, *cursor.parents):
67
+ for marker in PROJECT_ROOT_MARKERS:
68
+ if (candidate / marker).exists():
69
+ return candidate
70
+ return None
71
+
72
+
73
+ def resolve_install_dir(project: bool, start: Path | None = None) -> Path:
74
+ """Resolve install directory for global or project-local skills."""
75
+ if not project:
76
+ return get_global_skills_dir()
77
+
78
+ root = find_project_root(start=start)
79
+ if root is None:
80
+ raise SkillsInstallLocationError(
81
+ "Could not determine project root from current directory. "
82
+ "Run this inside a project containing one of: .git, pyproject.toml, package.json."
83
+ )
84
+ return root / ".agents" / "skills"
85
+
86
+
87
+ def _github_archive_url(ref: str) -> str:
88
+ encoded_ref = quote(ref, safe="")
89
+ return f"https://api.github.com/repos/{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}/zipball/{encoded_ref}"
90
+
91
+
92
+ def _github_headers() -> dict[str, str]:
93
+ """Build GitHub API headers for skills archive downloads."""
94
+ headers = {
95
+ "Accept": "application/vnd.github+json",
96
+ "X-GitHub-Api-Version": "2022-11-28",
97
+ }
98
+ token = os.environ.get(GITHUB_TOKEN_ENV)
99
+ if token and token.strip():
100
+ headers["Authorization"] = f"Bearer {token.strip()}"
101
+ return headers
102
+
103
+
104
+ def _download_repo_archive(client: httpx.Client, ref: str) -> bytes:
105
+ # TODO: add retry/backoff for transient GitHub API failures (429/5xx).
106
+ response = client.get(_github_archive_url(ref))
107
+ if response.status_code >= 400:
108
+ raise SkillsDownloadError(
109
+ f"Failed to download skills archive at ref '{ref}' from "
110
+ f"{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}: HTTP {response.status_code}"
111
+ )
112
+ return response.content
113
+
114
+
115
+ def _extract_repo_archive(archive_bytes: bytes, dest_dir: Path) -> Path:
116
+ """Extract a GitHub zipball into dest_dir and return the archive root."""
117
+ dest_dir.mkdir(parents=True, exist_ok=True)
118
+
119
+ try:
120
+ with zipfile.ZipFile(io.BytesIO(archive_bytes)) as zf:
121
+ root_name: str | None = None
122
+
123
+ for member in zf.infolist():
124
+ member_path = Path(member.filename)
125
+ parts = member_path.parts
126
+ if not parts:
127
+ continue
128
+ if parts[0] in ("", "/"):
129
+ raise SkillsDownloadError("Invalid archive entry path")
130
+ if any(part == ".." for part in parts):
131
+ raise SkillsDownloadError("Archive contains unsafe path traversal entry")
132
+ if root_name is None:
133
+ root_name = parts[0]
134
+
135
+ target = dest_dir / member_path
136
+ target_resolved = target.resolve()
137
+ dest_resolved = dest_dir.resolve()
138
+ if dest_resolved not in (target_resolved, *target_resolved.parents):
139
+ raise SkillsDownloadError("Archive extraction would escape destination directory")
140
+
141
+ if member.is_dir():
142
+ target.mkdir(parents=True, exist_ok=True)
143
+ continue
144
+
145
+ target.parent.mkdir(parents=True, exist_ok=True)
146
+ with zf.open(member) as src, target.open("wb") as dst:
147
+ shutil.copyfileobj(src, dst)
148
+ except zipfile.BadZipFile as e:
149
+ raise SkillsDownloadError("Failed to read downloaded skills archive") from e
150
+
151
+ if not root_name:
152
+ raise SkillsDownloadError("Downloaded skills archive was empty")
153
+
154
+ root = dest_dir / root_name
155
+ if not root.exists() or not root.is_dir():
156
+ raise SkillsDownloadError("Downloaded skills archive had no repository root directory")
157
+ return root
158
+
159
+
160
+ @contextmanager
161
+ def _downloaded_repo_root(ref: str) -> Iterator[Path]:
162
+ with httpx.Client(timeout=30, follow_redirects=True, headers=_github_headers()) as client:
163
+ archive_bytes = _download_repo_archive(client, ref)
164
+
165
+ with tempfile.TemporaryDirectory(prefix="parallel-skills-") as tmpdir:
166
+ repo_root = _extract_repo_archive(archive_bytes, Path(tmpdir))
167
+ yield repo_root
168
+
169
+
170
+ def _skills_root(repo_root: Path) -> Path:
171
+ skills_root = repo_root / SKILLS_REPO_SKILLS_PATH
172
+ if not skills_root.exists() or not skills_root.is_dir():
173
+ raise SkillsDownloadError(
174
+ f"Downloaded repository does not contain a '{SKILLS_REPO_SKILLS_PATH}/' directory at the requested ref"
175
+ )
176
+ return skills_root
177
+
178
+
179
+ def _list_skills_from_repo_root(repo_root: Path) -> list[str]:
180
+ skills_root = _skills_root(repo_root)
181
+ return sorted(path.name for path in skills_root.iterdir() if path.is_dir())
182
+
183
+
184
+ def list_remote_skills(ref: str | None = None) -> list[str]:
185
+ """Return available skill directory names from the remote repository."""
186
+ resolved_ref = ref or get_skills_repo_ref()
187
+ with _downloaded_repo_root(resolved_ref) as repo_root:
188
+ return _list_skills_from_repo_root(repo_root)
189
+
190
+
191
+ def _manifest_path(install_dir: Path) -> Path:
192
+ return install_dir / MANIFEST_FILE_NAME
193
+
194
+
195
+ def _write_manifest(install_dir: Path, ref: str, installed_skills: list[str]) -> None:
196
+ data = {
197
+ "repo": f"{SKILLS_REPO_OWNER}/{SKILLS_REPO_NAME}",
198
+ "skills_path": SKILLS_REPO_SKILLS_PATH,
199
+ "ref": ref,
200
+ "installed_skills": sorted(installed_skills),
201
+ "installed_at": int(time.time()),
202
+ "managed_by": "parallel-cli",
203
+ }
204
+ install_dir.mkdir(parents=True, exist_ok=True)
205
+ _manifest_path(install_dir).write_text(json.dumps(data, indent=2))
206
+
207
+
208
+ def _read_manifest(install_dir: Path) -> dict:
209
+ path = _manifest_path(install_dir)
210
+ if not path.exists():
211
+ return {}
212
+ try:
213
+ data = json.loads(path.read_text())
214
+ except Exception:
215
+ return {}
216
+ return data if isinstance(data, dict) else {}
217
+
218
+
219
+ def install_skills(
220
+ install_dir: Path,
221
+ selected_skills: list[str] | None = None,
222
+ ref: str | None = None,
223
+ ) -> dict:
224
+ """Install selected (or all) skills into install_dir.
225
+
226
+ Only skills previously managed by parallel-cli are reconciled. Unmanaged skill
227
+ directories are left untouched.
228
+ """
229
+ resolved_ref = ref or get_skills_repo_ref()
230
+
231
+ with _downloaded_repo_root(resolved_ref) as repo_root:
232
+ skills_root = _skills_root(repo_root)
233
+ available = _list_skills_from_repo_root(repo_root)
234
+ requested = sorted(set(selected_skills or available))
235
+ missing = sorted(name for name in requested if name not in available)
236
+ if missing:
237
+ raise SkillsInputError(
238
+ f"Unknown skills requested: {', '.join(missing)}. Available skills: {', '.join(available)}"
239
+ )
240
+
241
+ manifest = _read_manifest(install_dir)
242
+ managed_raw = manifest.get("installed_skills")
243
+ previously_managed: list[str] = (
244
+ [name for name in managed_raw if isinstance(name, str)] if isinstance(managed_raw, list) else []
245
+ )
246
+
247
+ install_dir.mkdir(parents=True, exist_ok=True)
248
+
249
+ for skill_name in previously_managed:
250
+ if skill_name not in requested:
251
+ skill_dir = install_dir / skill_name
252
+ if skill_dir.exists() and skill_dir.is_dir():
253
+ shutil.rmtree(skill_dir)
254
+
255
+ for skill_name in requested:
256
+ skill_dir = install_dir / skill_name
257
+ if skill_dir.exists():
258
+ shutil.rmtree(skill_dir)
259
+ shutil.copytree(skills_root / skill_name, skill_dir)
260
+
261
+ _write_manifest(install_dir, resolved_ref, requested)
262
+ return {
263
+ "install_dir": str(install_dir),
264
+ "ref": resolved_ref,
265
+ "installed_skills": requested,
266
+ "count": len(requested),
267
+ }
268
+
269
+
270
+ def uninstall_skills(install_dir: Path) -> dict:
271
+ """Uninstall only manifest-managed skills from install_dir."""
272
+ manifest = _read_manifest(install_dir)
273
+ managed_raw = manifest.get("installed_skills")
274
+ managed: list[str] = (
275
+ [name for name in managed_raw if isinstance(name, str)] if isinstance(managed_raw, list) else []
276
+ )
277
+ removed: list[str] = []
278
+
279
+ for skill_name in managed:
280
+ skill_path = install_dir / skill_name
281
+ if skill_path.exists() and skill_path.is_dir():
282
+ shutil.rmtree(skill_path)
283
+ removed.append(skill_name)
284
+
285
+ manifest_path = _manifest_path(install_dir)
286
+ if manifest_path.exists():
287
+ manifest_path.unlink()
288
+
289
+ return {
290
+ "install_dir": str(install_dir),
291
+ "removed_skills": sorted(removed),
292
+ "count": len(removed),
293
+ }
294
+
295
+
296
+ def reinstall_skills(
297
+ install_dir: Path,
298
+ selected_skills: list[str] | None = None,
299
+ ref: str | None = None,
300
+ ) -> dict:
301
+ """Reinstall skills by uninstalling managed set then installing fresh."""
302
+ uninstall_result = uninstall_skills(install_dir)
303
+ install_result = install_skills(install_dir, selected_skills=selected_skills, ref=ref)
304
+ return {
305
+ "install_dir": install_result["install_dir"],
306
+ "ref": install_result["ref"],
307
+ "removed_skills": uninstall_result["removed_skills"],
308
+ "installed_skills": install_result["installed_skills"],
309
+ "removed_count": uninstall_result["count"],
310
+ "installed_count": install_result["count"],
311
+ }
@@ -1,5 +1,5 @@
1
1
  # Cloud Function dependencies for BigQuery Remote Function
2
2
  functions-framework>=3.0.0
3
3
  flask>=3.0.0
4
- parallel-web-tools>=0.2.0
4
+ parallel-web-tools>=0.3.0rc2
5
5
  google-cloud-secret-manager>=2.20.0
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "parallel-web-tools"
7
- version = "0.2.0"
7
+ version = "0.3.0rc2"
8
8
  description = "Parallel Tools: CLI and Python SDK for AI-powered web intelligence"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -92,12 +92,12 @@ all = [
92
92
  # Development
93
93
  dev = [
94
94
  "parallel-web-tools[all,spark]",
95
- "pytest>=8.0.0",
96
- "pytest-cov>=4.0.0",
97
- "pyinstaller>=6.0.0",
98
- "pre-commit>=4.0.0",
99
- "ruff>=0.14.0",
100
- "ty>=0.0.21",
95
+ "pytest>=9.0.0",
96
+ "pytest-cov>=7.0.0",
97
+ "pyinstaller>=6.20.0",
98
+ "pre-commit>=4.6.0",
99
+ "ruff>=0.15.0",
100
+ "ty>=0.0.33",
101
101
  ]
102
102
 
103
103
  [tool.hatch.build.targets.wheel]
@@ -162,6 +162,8 @@ known-first-party = ["parallel_web_tools"]
162
162
 
163
163
  [dependency-groups]
164
164
  dev = [
165
- "ipykernel>=7.1.0",
166
- "ty>=0.0.21",
165
+ "ipykernel>=7.2.0",
166
+ "pyinstaller>=6.20.0",
167
+ "tach>=0.34.1",
168
+ "ty>=0.0.33",
167
169
  ]