parallel-web-tools 0.1.3__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/PKG-INFO +31 -2
  2. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/README.md +30 -1
  3. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/__init__.py +1 -1
  4. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/commands.py +32 -3
  5. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/batch.py +26 -3
  6. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/research.py +39 -13
  7. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/runner.py +17 -9
  8. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/cloud_function/requirements.txt +1 -1
  9. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/bigquery.py +9 -3
  10. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/csv.py +9 -3
  11. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/duckdb.py +9 -3
  12. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/json.py +9 -3
  13. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/pyproject.toml +1 -1
  14. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/.gitignore +0 -0
  15. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/__init__.py +0 -0
  16. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/planner.py +0 -0
  17. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/updater.py +0 -0
  18. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/__init__.py +0 -0
  19. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/auth.py +0 -0
  20. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/findall.py +0 -0
  21. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/monitor.py +0 -0
  22. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/polling.py +0 -0
  23. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/result.py +0 -0
  24. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/schema.py +0 -0
  25. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/sql_utils.py +0 -0
  26. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/user_agent.py +0 -0
  27. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/__init__.py +0 -0
  28. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/__init__.py +0 -0
  29. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/cloud_function/main.py +0 -0
  30. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/deploy.py +0 -0
  31. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/sql/create_functions.sql +0 -0
  32. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/__init__.py +0 -0
  33. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/batch.py +0 -0
  34. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/findall.py +0 -0
  35. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/udf.py +0 -0
  36. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/polars/__init__.py +0 -0
  37. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/polars/enrich.py +0 -0
  38. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/__init__.py +0 -0
  39. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/deploy.py +0 -0
  40. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/01_setup.sql +0 -0
  41. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/02_create_udf.sql +0 -0
  42. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/03_cleanup.sql +0 -0
  43. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/__init__.py +0 -0
  44. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/streaming.py +0 -0
  45. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/udf.py +0 -0
  46. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/utils.py +0 -0
  47. {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parallel-web-tools
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Summary: Parallel Tools: CLI and Python SDK for AI-powered web intelligence
5
5
  Project-URL: Homepage, https://github.com/parallel-web/parallel-web-tools
6
6
  Project-URL: Documentation, https://docs.parallel.ai
@@ -98,6 +98,7 @@ CLI and data enrichment utilities for the [Parallel API](https://docs.parallel.a
98
98
  - **Web Search** - AI-powered search with domain filtering and date ranges
99
99
  - **Content Extraction** - Extract clean markdown from any URL
100
100
  - **Data Enrichment** - Enrich CSV, JSON, DuckDB, and BigQuery data with AI
101
+ - **Follow-up Context** - Chain research and enrichment tasks using `--previous-interaction-id`
101
102
  - **AI-Assisted Planning** - Use natural language to define what data you want
102
103
  - **Multiple Integrations** - Polars, DuckDB, Snowflake, BigQuery, Spark
103
104
 
@@ -293,13 +294,41 @@ echo "What is the latest funding for Anthropic?" | parallel-cli search - --json
293
294
  echo "Research question" | parallel-cli research run - --json
294
295
 
295
296
  # Async: launch then poll separately
296
- parallel-cli research run "question" --no-wait --json # returns run_id
297
+ parallel-cli research run "question" --no-wait --json # returns run_id + interaction_id
297
298
  parallel-cli research status trun_xxx --json # check status
298
299
  parallel-cli research poll trun_xxx --json # wait and get result
299
300
 
301
+ # Follow-up: reuse context from a previous task
302
+ parallel-cli research run "follow-up question" --previous-interaction-id trun_xxx --json
303
+ parallel-cli enrich run --data '[...]' --previous-interaction-id trun_xxx --json
304
+
300
305
  # Exit codes: 0=ok, 2=bad input, 3=auth error, 4=api error, 5=timeout
301
306
  ```
302
307
 
308
+ ### Follow-up research with context reuse
309
+
310
+ Tasks return an `interaction_id` that can be passed as `--previous-interaction-id` on a subsequent research or enrichment run. The new task inherits the context from the prior one, so follow-up questions can reference earlier results without repeating them.
311
+
312
+ ```bash
313
+ # Step 1: Run initial research (interaction_id is in the JSON output)
314
+ parallel-cli research run "What are the top 3 AI companies?" --json --processor lite-fast
315
+ # → { "run_id": "trun_abc", "interaction_id": "trun_abc", ... }
316
+
317
+ # Step 2: Follow-up research referencing the first task's context
318
+ parallel-cli research run "What products does the #1 company make?" \
319
+ --previous-interaction-id trun_abc --json
320
+
321
+ # Step 3: Use research context for enrichment
322
+ parallel-cli enrich run \
323
+ --data '[{"company": "Anthropic"}, {"company": "OpenAI"}]' \
324
+ --target enriched.csv \
325
+ --source-columns '[{"name": "company", "description": "Company name"}]' \
326
+ --enriched-columns '[{"name": "products", "description": "Main products"}]' \
327
+ --previous-interaction-id trun_abc --json
328
+ ```
329
+
330
+ The `interaction_id` is shown in both human-readable and `--json` output for `research run`, `research status`, and `research poll`.
331
+
303
332
  ### More examples
304
333
 
305
334
  ```bash
@@ -13,6 +13,7 @@ CLI and data enrichment utilities for the [Parallel API](https://docs.parallel.a
13
13
  - **Web Search** - AI-powered search with domain filtering and date ranges
14
14
  - **Content Extraction** - Extract clean markdown from any URL
15
15
  - **Data Enrichment** - Enrich CSV, JSON, DuckDB, and BigQuery data with AI
16
+ - **Follow-up Context** - Chain research and enrichment tasks using `--previous-interaction-id`
16
17
  - **AI-Assisted Planning** - Use natural language to define what data you want
17
18
  - **Multiple Integrations** - Polars, DuckDB, Snowflake, BigQuery, Spark
18
19
 
@@ -208,13 +209,41 @@ echo "What is the latest funding for Anthropic?" | parallel-cli search - --json
208
209
  echo "Research question" | parallel-cli research run - --json
209
210
 
210
211
  # Async: launch then poll separately
211
- parallel-cli research run "question" --no-wait --json # returns run_id
212
+ parallel-cli research run "question" --no-wait --json # returns run_id + interaction_id
212
213
  parallel-cli research status trun_xxx --json # check status
213
214
  parallel-cli research poll trun_xxx --json # wait and get result
214
215
 
216
+ # Follow-up: reuse context from a previous task
217
+ parallel-cli research run "follow-up question" --previous-interaction-id trun_xxx --json
218
+ parallel-cli enrich run --data '[...]' --previous-interaction-id trun_xxx --json
219
+
215
220
  # Exit codes: 0=ok, 2=bad input, 3=auth error, 4=api error, 5=timeout
216
221
  ```
217
222
 
223
+ ### Follow-up research with context reuse
224
+
225
+ Tasks return an `interaction_id` that can be passed as `--previous-interaction-id` on a subsequent research or enrichment run. The new task inherits the context from the prior one, so follow-up questions can reference earlier results without repeating them.
226
+
227
+ ```bash
228
+ # Step 1: Run initial research (interaction_id is in the JSON output)
229
+ parallel-cli research run "What are the top 3 AI companies?" --json --processor lite-fast
230
+ # → { "run_id": "trun_abc", "interaction_id": "trun_abc", ... }
231
+
232
+ # Step 2: Follow-up research referencing the first task's context
233
+ parallel-cli research run "What products does the #1 company make?" \
234
+ --previous-interaction-id trun_abc --json
235
+
236
+ # Step 3: Use research context for enrichment
237
+ parallel-cli enrich run \
238
+ --data '[{"company": "Anthropic"}, {"company": "OpenAI"}]' \
239
+ --target enriched.csv \
240
+ --source-columns '[{"name": "company", "description": "Company name"}]' \
241
+ --enriched-columns '[{"name": "products", "description": "Main products"}]' \
242
+ --previous-interaction-id trun_abc --json
243
+ ```
244
+
245
+ The `interaction_id` is shown in both human-readable and `--json` output for `research run`, `research status`, and `research poll`.
246
+
218
247
  ### More examples
219
248
 
220
249
  ```bash
@@ -29,7 +29,7 @@ from parallel_web_tools.core import (
29
29
  run_tasks,
30
30
  )
31
31
 
32
- __version__ = "0.1.3"
32
+ __version__ = "0.2.0"
33
33
 
34
34
  __all__ = [
35
35
  # Auth
@@ -960,6 +960,10 @@ def enrich():
960
960
  @click.option("--dry-run", is_flag=True, help="Show what would be executed without making API calls")
961
961
  @click.option("--json", "output_json", is_flag=True, help="Output results as JSON to stdout")
962
962
  @click.option("-o", "--output", "output_file", type=click.Path(), help="Save results to JSON file")
963
+ @click.option(
964
+ "--previous-interaction-id",
965
+ help="Interaction ID from a previous task to reuse as context",
966
+ )
963
967
  def enrich_run(
964
968
  config_file: str | None,
965
969
  source_type: str | None,
@@ -974,6 +978,7 @@ def enrich_run(
974
978
  dry_run: bool,
975
979
  output_json: bool,
976
980
  output_file: str | None,
981
+ previous_interaction_id: str | None,
977
982
  ):
978
983
  """Run data enrichment from YAML config or CLI arguments.
979
984
 
@@ -1043,7 +1048,7 @@ def enrich_run(
1043
1048
 
1044
1049
  if not output_json:
1045
1050
  console.print(f"[bold cyan]Running enrichment from {config_file}...[/bold cyan]\n")
1046
- result = run_enrichment(config_file, no_wait=no_wait)
1051
+ result = run_enrichment(config_file, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
1047
1052
  else:
1048
1053
  # After validation, these are guaranteed non-None
1049
1054
  assert source_type is not None
@@ -1134,7 +1139,7 @@ def enrich_run(
1134
1139
 
1135
1140
  if not output_json:
1136
1141
  console.print(f"[bold cyan]Running enrichment: {source} -> {target}[/bold cyan]\n")
1137
- result = run_enrichment_from_dict(config, no_wait=no_wait)
1142
+ result = run_enrichment_from_dict(config, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
1138
1143
 
1139
1144
  if no_wait and result:
1140
1145
  if output_json:
@@ -1572,6 +1577,10 @@ def research():
1572
1577
  "-o", "--output", "output_file", type=click.Path(), help="Save results (creates {name}.json and {name}.md)"
1573
1578
  )
1574
1579
  @click.option("--json", "output_json", is_flag=True, help="Output JSON to stdout")
1580
+ @click.option(
1581
+ "--previous-interaction-id",
1582
+ help="Interaction ID from a previous task to reuse as context",
1583
+ )
1575
1584
  def research_run(
1576
1585
  query: str | None,
1577
1586
  input_file: str | None,
@@ -1582,12 +1591,16 @@ def research_run(
1582
1591
  dry_run: bool,
1583
1592
  output_file: str | None,
1584
1593
  output_json: bool,
1594
+ previous_interaction_id: str | None,
1585
1595
  ):
1586
1596
  """Run deep research on a question or topic.
1587
1597
 
1588
1598
  QUERY is the research question (max 15,000 chars). Alternatively, use --input-file
1589
1599
  or pass "-" as QUERY to read from stdin.
1590
1600
 
1601
+ Use --previous-interaction-id to continue research from a prior task's context.
1602
+ The interaction ID is shown in the output of every research run.
1603
+
1591
1604
  Examples:
1592
1605
 
1593
1606
  parallel-cli research run "What are the latest developments in quantum computing?"
@@ -1595,6 +1608,9 @@ def research_run(
1595
1608
  parallel-cli research run -f question.txt --processor ultra -o report
1596
1609
 
1597
1610
  echo "My research question" | parallel-cli research run - --json
1611
+
1612
+ # Follow-up research using context from a previous task:
1613
+ parallel-cli research run "What are the implications?" --previous-interaction-id trun_abc123
1598
1614
  """
1599
1615
  # Read from stdin if "-" is passed
1600
1616
  if query == "-":
@@ -1634,13 +1650,18 @@ def research_run(
1634
1650
  # Create task and return immediately
1635
1651
  if not output_json:
1636
1652
  console.print(f"[dim]Creating research task with processor: {processor}...[/dim]")
1637
- result = create_research_task(query, processor=processor, source="cli")
1653
+ result = create_research_task(
1654
+ query, processor=processor, source="cli", previous_interaction_id=previous_interaction_id
1655
+ )
1638
1656
 
1639
1657
  if not output_json:
1640
1658
  console.print(f"\n[bold green]Task created: {result['run_id']}[/bold green]")
1659
+ if result.get("interaction_id"):
1660
+ console.print(f"Interaction ID: {result['interaction_id']}")
1641
1661
  console.print(f"Track progress: {result['result_url']}")
1642
1662
  console.print("\n[dim]Use 'parallel-cli research status <run_id>' to check status[/dim]")
1643
1663
  console.print("[dim]Use 'parallel-cli research poll <run_id>' to wait for results[/dim]")
1664
+ console.print("[dim]Use '--previous-interaction-id' on a new run to continue this research[/dim]")
1644
1665
 
1645
1666
  if output_json:
1646
1667
  print(json.dumps(result, indent=2))
@@ -1673,6 +1694,7 @@ def research_run(
1673
1694
  poll_interval=poll_interval,
1674
1695
  on_status=on_status,
1675
1696
  source="cli",
1697
+ previous_interaction_id=previous_interaction_id,
1676
1698
  )
1677
1699
 
1678
1700
  _output_research_result(result, output_file, output_json)
@@ -1715,11 +1737,13 @@ def research_status(run_id: str, output_json: bool):
1715
1737
  }.get(status, "white")
1716
1738
 
1717
1739
  console.print(f"[bold]Task:[/bold] {run_id}")
1740
+ console.print(f"[bold]Interaction ID:[/bold] {result.get('interaction_id', run_id)}")
1718
1741
  console.print(f"[bold]Status:[/bold] [{status_color}]{status}[/{status_color}]")
1719
1742
  console.print(f"[bold]URL:[/bold] {result['result_url']}")
1720
1743
 
1721
1744
  if status == "completed":
1722
1745
  console.print("\n[dim]Use 'parallel-cli research poll <run_id>' to retrieve results[/dim]")
1746
+ console.print("[dim]Use '--previous-interaction-id' on a new run to continue this research[/dim]")
1723
1747
 
1724
1748
  except Exception as e:
1725
1749
  _handle_error(e, output_json=output_json)
@@ -1915,6 +1939,7 @@ def _output_research_result(
1915
1939
  output = result.get("output", {})
1916
1940
  output_data = {
1917
1941
  "run_id": result.get("run_id"),
1942
+ "interaction_id": result.get("interaction_id"),
1918
1943
  "result_url": result.get("result_url"),
1919
1944
  "status": result.get("status"),
1920
1945
  "output": output.copy() if isinstance(output, dict) else output,
@@ -1957,6 +1982,7 @@ def _output_research_result(
1957
1982
  else:
1958
1983
  console.print("\n[bold green]Research Complete![/bold green]")
1959
1984
  console.print(f"[dim]Task: {result.get('run_id')}[/dim]")
1985
+ console.print(f"[dim]Interaction ID: {result.get('interaction_id')}[/dim]")
1960
1986
  console.print(f"[dim]URL: {result.get('result_url')}[/dim]\n")
1961
1987
 
1962
1988
  # Show executive summary if available
@@ -1973,6 +1999,9 @@ def _output_research_result(
1973
1999
 
1974
2000
  if not output_file:
1975
2001
  console.print("[dim]Use --output to save full results to a file, or --json to print to stdout[/dim]")
2002
+ interaction_id = result.get("interaction_id")
2003
+ if interaction_id:
2004
+ console.print(f"[dim]Use '--previous-interaction-id {interaction_id}' to continue this research[/dim]")
1976
2005
 
1977
2006
 
1978
2007
  # =============================================================================
@@ -99,6 +99,7 @@ def enrich_batch(
99
99
  poll_interval: int = 5,
100
100
  include_basis: bool = True,
101
101
  source: ClientSource = "python",
102
+ previous_interaction_id: str | None = None,
102
103
  ) -> list[dict[str, Any]]:
103
104
  """Enrich multiple inputs using the Parallel Task Group API.
104
105
 
@@ -111,6 +112,7 @@ def enrich_batch(
111
112
  poll_interval: Seconds between status polls
112
113
  include_basis: Whether to include citations
113
114
  source: Client source identifier for User-Agent (default: python)
115
+ previous_interaction_id: Interaction ID from a previous task to reuse as context.
114
116
 
115
117
  Returns:
116
118
  List of result dictionaries in same order as inputs.
@@ -131,7 +133,13 @@ def enrich_batch(
131
133
  taskgroup_id = task_group.task_group_id
132
134
 
133
135
  # Add runs - use SDK type for proper typing
134
- run_inputs: list[BetaRunInputParam] = [{"input": inp, "processor": processor} for inp in inputs]
136
+ def _make_run_input(inp: dict[str, Any]) -> BetaRunInputParam:
137
+ entry: BetaRunInputParam = {"input": inp, "processor": processor}
138
+ if previous_interaction_id:
139
+ entry["previous_interaction_id"] = previous_interaction_id
140
+ return entry
141
+
142
+ run_inputs: list[BetaRunInputParam] = [_make_run_input(inp) for inp in inputs]
135
143
  response = client.beta.task_group.add_runs(
136
144
  taskgroup_id,
137
145
  default_task_spec=task_spec,
@@ -187,6 +195,7 @@ def enrich_single(
187
195
  timeout: int = 300,
188
196
  include_basis: bool = True,
189
197
  source: ClientSource = "python",
198
+ previous_interaction_id: str | None = None,
190
199
  ) -> dict[str, Any]:
191
200
  """Enrich a single input using the Parallel API."""
192
201
  results = enrich_batch(
@@ -197,6 +206,7 @@ def enrich_single(
197
206
  timeout=timeout,
198
207
  include_basis=include_basis,
199
208
  source=source,
209
+ previous_interaction_id=previous_interaction_id,
200
210
  )
201
211
  return results[0] if results else {"error": "No result"}
202
212
 
@@ -207,6 +217,7 @@ def create_task_group(
207
217
  OutputModel,
208
218
  processor: str = "core-fast",
209
219
  source: ClientSource = "python",
220
+ previous_interaction_id: str | None = None,
210
221
  ) -> dict[str, Any]:
211
222
  """Create a task group and add runs without waiting for completion.
212
223
 
@@ -216,6 +227,7 @@ def create_task_group(
216
227
  OutputModel: Pydantic model for output schema.
217
228
  processor: Parallel processor (default: core-fast).
218
229
  source: Client source identifier for User-Agent.
230
+ previous_interaction_id: Interaction ID from a previous task to reuse as context.
219
231
 
220
232
  Returns:
221
233
  Dict with taskgroup_id, url, and num_runs.
@@ -238,12 +250,19 @@ def create_task_group(
238
250
  taskgroup_id = task_group.task_group_id
239
251
  logger.info(f"Created taskgroup id {taskgroup_id}")
240
252
 
253
+ # Build run input helper
254
+ def _make_run_input(row: dict[str, Any]) -> BetaRunInputParam:
255
+ entry: BetaRunInputParam = {"input": row, "processor": processor}
256
+ if previous_interaction_id:
257
+ entry["previous_interaction_id"] = previous_interaction_id
258
+ return entry
259
+
241
260
  # Add runs in batches
242
261
  batch_size = 100
243
262
  total_created = 0
244
263
  for i in range(0, len(input_data), batch_size):
245
264
  batch = input_data[i : i + batch_size]
246
- run_inputs: list[BetaRunInputParam] = [{"input": row, "processor": processor} for row in batch]
265
+ run_inputs: list[BetaRunInputParam] = [_make_run_input(row) for row in batch]
247
266
  response = client.beta.task_group.add_runs(
248
267
  taskgroup_id,
249
268
  default_task_spec=task_spec,
@@ -361,6 +380,7 @@ def run_tasks(
361
380
  processor: str = "core-fast",
362
381
  source: ClientSource = "python",
363
382
  timeout: int = 3600,
383
+ previous_interaction_id: str | None = None,
364
384
  ) -> list[Any]:
365
385
  """Run batch tasks using Pydantic models for schema.
366
386
 
@@ -368,6 +388,7 @@ def run_tasks(
368
388
 
369
389
  Args:
370
390
  timeout: Max seconds to wait for completion (default: 3600 = 1 hour).
391
+ previous_interaction_id: Interaction ID from a previous task to reuse as context.
371
392
  """
372
393
  logger = logging.getLogger(__name__)
373
394
 
@@ -375,7 +396,9 @@ def run_tasks(
375
396
  logger.info(f"Generated batch_id: {batch_id}")
376
397
 
377
398
  # Create task group and add runs
378
- tg_info = create_task_group(input_data, InputModel, OutputModel, processor, source)
399
+ tg_info = create_task_group(
400
+ input_data, InputModel, OutputModel, processor, source, previous_interaction_id=previous_interaction_id
401
+ )
379
402
  taskgroup_id = tg_info["taskgroup_id"]
380
403
 
381
404
  # Wait for completion
@@ -74,6 +74,7 @@ def create_research_task(
74
74
  processor: str = "pro-fast",
75
75
  api_key: str | None = None,
76
76
  source: ClientSource = "python",
77
+ previous_interaction_id: str | None = None,
77
78
  ) -> dict[str, Any]:
78
79
  """Create a deep research task without waiting for results.
79
80
 
@@ -82,19 +83,25 @@ def create_research_task(
82
83
  processor: Processor tier (see RESEARCH_PROCESSORS).
83
84
  api_key: Optional API key.
84
85
  source: Client source identifier for User-Agent.
86
+ previous_interaction_id: Interaction ID from a previous task to reuse as context.
85
87
 
86
88
  Returns:
87
- Dict with run_id, result_url, and other task metadata.
89
+ Dict with run_id, interaction_id, result_url, and other task metadata.
88
90
  """
89
91
  client = create_client(api_key, source)
90
92
 
91
- task = client.task_run.create(
92
- input=query[:15000],
93
- processor=processor,
94
- )
93
+ create_kwargs: dict[str, Any] = {
94
+ "input": query[:15000],
95
+ "processor": processor,
96
+ }
97
+ if previous_interaction_id:
98
+ create_kwargs["previous_interaction_id"] = previous_interaction_id
99
+
100
+ task = client.task_run.create(**create_kwargs)
95
101
 
96
102
  return {
97
103
  "run_id": task.run_id,
104
+ "interaction_id": getattr(task, "interaction_id", task.run_id),
98
105
  "result_url": f"{PLATFORM_BASE}/play/deep-research/{task.run_id}",
99
106
  "processor": processor,
100
107
  "status": getattr(task, "status", "pending"),
@@ -114,13 +121,14 @@ def get_research_status(
114
121
  source: Client source identifier for User-Agent.
115
122
 
116
123
  Returns:
117
- Dict with status and other task info.
124
+ Dict with status, interaction_id, and other task info.
118
125
  """
119
126
  client = create_client(api_key, source)
120
127
  status = client.task_run.retrieve(run_id=run_id)
121
128
 
122
129
  return {
123
130
  "run_id": run_id,
131
+ "interaction_id": getattr(status, "interaction_id", run_id),
124
132
  "status": status.status,
125
133
  "result_url": f"{PLATFORM_BASE}/play/deep-research/{run_id}",
126
134
  }
@@ -162,6 +170,7 @@ def _poll_until_complete(
162
170
  timeout: int,
163
171
  poll_interval: int,
164
172
  on_status: Callable[[str, str], None] | None,
173
+ interaction_id: str | None = None,
165
174
  ) -> dict[str, Any]:
166
175
  """Poll a research task until completion and return the result.
167
176
 
@@ -172,6 +181,7 @@ def _poll_until_complete(
172
181
  timeout: Maximum wait time in seconds.
173
182
  poll_interval: Seconds between status checks.
174
183
  on_status: Optional callback called with (status, run_id) on each poll.
184
+ interaction_id: Known interaction ID (updated from poll responses).
175
185
 
176
186
  Returns:
177
187
  Dict with content and metadata.
@@ -180,9 +190,15 @@ def _poll_until_complete(
180
190
  TimeoutError: If the task doesn't complete within timeout.
181
191
  RuntimeError: If the task fails or is cancelled.
182
192
  """
193
+ # Track interaction_id from poll responses
194
+ poll_state = {"interaction_id": interaction_id}
183
195
 
184
196
  def retrieve():
185
- return client.task_run.retrieve(run_id=run_id)
197
+ response = client.task_run.retrieve(run_id=run_id)
198
+ # Capture interaction_id from the latest response
199
+ if hasattr(response, "interaction_id") and response.interaction_id:
200
+ poll_state["interaction_id"] = response.interaction_id
201
+ return response
186
202
 
187
203
  def extract_status(response):
188
204
  return response.status
@@ -193,6 +209,7 @@ def _poll_until_complete(
193
209
  output_data = _serialize_output(output)
194
210
  return {
195
211
  "run_id": run_id,
212
+ "interaction_id": poll_state["interaction_id"] or run_id,
196
213
  "result_url": result_url,
197
214
  "status": "completed",
198
215
  "output": output_data,
@@ -226,6 +243,7 @@ def run_research(
226
243
  poll_interval: int = 45,
227
244
  on_status: Callable[[str, str], None] | None = None,
228
245
  source: ClientSource = "python",
246
+ previous_interaction_id: str | None = None,
229
247
  ) -> dict[str, Any]:
230
248
  """Run deep research and wait for results.
231
249
 
@@ -240,6 +258,7 @@ def run_research(
240
258
  poll_interval: Seconds between status checks (default: 45).
241
259
  on_status: Optional callback called with (status, run_id) on each poll.
242
260
  source: Client source identifier for User-Agent.
261
+ previous_interaction_id: Interaction ID from a previous task to reuse as context.
243
262
 
244
263
  Returns:
245
264
  Dict with content and metadata.
@@ -250,17 +269,24 @@ def run_research(
250
269
  """
251
270
  client = create_client(api_key, source)
252
271
 
253
- task = client.task_run.create(
254
- input=query[:15000],
255
- processor=processor,
256
- )
272
+ create_kwargs: dict[str, Any] = {
273
+ "input": query[:15000],
274
+ "processor": processor,
275
+ }
276
+ if previous_interaction_id:
277
+ create_kwargs["previous_interaction_id"] = previous_interaction_id
278
+
279
+ task = client.task_run.create(**create_kwargs)
257
280
  run_id = task.run_id
281
+ interaction_id = getattr(task, "interaction_id", run_id)
258
282
  result_url = f"{PLATFORM_BASE}/play/deep-research/{run_id}"
259
283
 
260
284
  if on_status:
261
285
  on_status("created", run_id)
262
286
 
263
- return _poll_until_complete(client, run_id, result_url, timeout, poll_interval, on_status)
287
+ return _poll_until_complete(
288
+ client, run_id, result_url, timeout, poll_interval, on_status, interaction_id=interaction_id
289
+ )
264
290
 
265
291
 
266
292
  def poll_research(
@@ -284,7 +310,7 @@ def poll_research(
284
310
  source: Client source identifier for User-Agent.
285
311
 
286
312
  Returns:
287
- Dict with content and metadata.
313
+ Dict with content and metadata including interaction_id.
288
314
  """
289
315
  client = create_client(api_key, source)
290
316
  result_url = f"{PLATFORM_BASE}/play/deep-research/{run_id}"
@@ -8,35 +8,40 @@ from parallel_web_tools.core.schema import InputSchema, SourceType, load_schema,
8
8
  logger = logging.getLogger(__name__)
9
9
 
10
10
 
11
- def _run_processor(parsed_schema: InputSchema, no_wait: bool = False) -> dict | None:
11
+ def _run_processor(
12
+ parsed_schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
13
+ ) -> dict | None:
12
14
  """Run the appropriate processor for the given schema."""
13
15
  match parsed_schema.source_type:
14
16
  case SourceType.CSV:
15
17
  from parallel_web_tools.processors.csv import process_csv
16
18
 
17
- return process_csv(parsed_schema, no_wait=no_wait)
19
+ return process_csv(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
18
20
  case SourceType.JSON:
19
21
  from parallel_web_tools.processors.json import process_json
20
22
 
21
- return process_json(parsed_schema, no_wait=no_wait)
23
+ return process_json(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
22
24
  case SourceType.DUCKDB:
23
25
  from parallel_web_tools.processors.duckdb import process_duckdb
24
26
 
25
- return process_duckdb(parsed_schema, no_wait=no_wait)
27
+ return process_duckdb(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
26
28
  case SourceType.BIGQUERY:
27
29
  from parallel_web_tools.processors.bigquery import process_bigquery
28
30
 
29
- return process_bigquery(parsed_schema, no_wait=no_wait)
31
+ return process_bigquery(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
30
32
  case _:
31
33
  raise NotImplementedError(f"{parsed_schema.source_type} is not supported")
32
34
 
33
35
 
34
- def run_enrichment(config_file: str | Path, no_wait: bool = False) -> dict | None:
36
+ def run_enrichment(
37
+ config_file: str | Path, no_wait: bool = False, previous_interaction_id: str | None = None
38
+ ) -> dict | None:
35
39
  """Run data enrichment using a YAML config file.
36
40
 
37
41
  Args:
38
42
  config_file: Path to YAML configuration file
39
43
  no_wait: If True, return taskgroup info without waiting for completion.
44
+ previous_interaction_id: Interaction ID from a previous task to reuse as context.
40
45
 
41
46
  Example:
42
47
  >>> from parallel_web_tools import run_enrichment
@@ -52,7 +57,7 @@ def run_enrichment(config_file: str | Path, no_wait: bool = False) -> dict | Non
52
57
  parsed_schema = parse_schema(schema)
53
58
 
54
59
  logger.info(f"Running enrichment: {parsed_schema.source} -> {parsed_schema.target}")
55
- result = _run_processor(parsed_schema, no_wait=no_wait)
60
+ result = _run_processor(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
56
61
 
57
62
  if no_wait:
58
63
  return result
@@ -61,12 +66,15 @@ def run_enrichment(config_file: str | Path, no_wait: bool = False) -> dict | Non
61
66
  return None
62
67
 
63
68
 
64
- def run_enrichment_from_dict(config: dict, no_wait: bool = False) -> dict | None:
69
+ def run_enrichment_from_dict(
70
+ config: dict, no_wait: bool = False, previous_interaction_id: str | None = None
71
+ ) -> dict | None:
65
72
  """Run data enrichment using a configuration dictionary.
66
73
 
67
74
  Args:
68
75
  config: Configuration dictionary matching YAML schema
69
76
  no_wait: If True, return taskgroup info without waiting for completion.
77
+ previous_interaction_id: Interaction ID from a previous task to reuse as context.
70
78
 
71
79
  Example:
72
80
  >>> config = {
@@ -82,7 +90,7 @@ def run_enrichment_from_dict(config: dict, no_wait: bool = False) -> dict | None
82
90
  parsed_schema = parse_schema(config)
83
91
 
84
92
  logger.info(f"Running enrichment: {parsed_schema.source} -> {parsed_schema.target}")
85
- result = _run_processor(parsed_schema, no_wait=no_wait)
93
+ result = _run_processor(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
86
94
 
87
95
  if no_wait:
88
96
  return result
@@ -1,5 +1,5 @@
1
1
  # Cloud Function dependencies for BigQuery Remote Function
2
2
  functions-framework>=3.0.0
3
3
  flask>=3.0.0
4
- parallel-web-tools>=0.1.3
4
+ parallel-web-tools>=0.2.0
5
5
  google-cloud-secret-manager>=2.20.0
@@ -46,7 +46,9 @@ def fetch_all(conn: Connection, table: str) -> list[dict[str, Any]]:
46
46
  return [dict(row) for row in rows]
47
47
 
48
48
 
49
- def process_bigquery(schema: InputSchema, no_wait: bool = False) -> dict[str, Any] | None:
49
+ def process_bigquery(
50
+ schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
51
+ ) -> dict[str, Any] | None:
50
52
  """Process BigQuery table and enrich data."""
51
53
  InputModel, OutputModel = parse_input_and_output_models(schema)
52
54
 
@@ -57,9 +59,13 @@ def process_bigquery(schema: InputSchema, no_wait: bool = False) -> dict[str, An
57
59
  data = fetch_all(conn, schema.source)
58
60
 
59
61
  if no_wait:
60
- return create_task_group(data, InputModel, OutputModel, schema.processor)
62
+ return create_task_group(
63
+ data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
64
+ )
61
65
 
62
- output_rows = run_tasks(data, InputModel, OutputModel, schema.processor)
66
+ output_rows = run_tasks(
67
+ data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
68
+ )
63
69
  df = pl.DataFrame(output_rows)
64
70
 
65
71
  _project, dataset, table = split_bq_name(schema.target)
@@ -10,7 +10,9 @@ from parallel_web_tools.core.batch import create_task_group
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
- def process_csv(schema: InputSchema, no_wait: bool = False) -> dict[str, Any] | None:
13
+ def process_csv(
14
+ schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
15
+ ) -> dict[str, Any] | None:
14
16
  """Process CSV file and enrich data."""
15
17
  logger.info("Processing CSV file: %s", schema.source)
16
18
 
@@ -24,10 +26,14 @@ def process_csv(schema: InputSchema, no_wait: bool = False) -> dict[str, Any] |
24
26
  data.append(dict(row))
25
27
 
26
28
  if no_wait:
27
- return create_task_group(data, InputModel, OutputModel, schema.processor)
29
+ return create_task_group(
30
+ data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
31
+ )
28
32
 
29
33
  # Process all rows in batch
30
- output_rows = run_tasks(data, InputModel, OutputModel, schema.processor)
34
+ output_rows = run_tasks(
35
+ data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
36
+ )
31
37
 
32
38
  # Write results to target CSV
33
39
  with open(schema.target, "w", newline="") as f:
@@ -11,7 +11,9 @@ from parallel_web_tools.core.batch import create_task_group
11
11
  from parallel_web_tools.core.sql_utils import quote_identifier
12
12
 
13
13
 
14
- def process_duckdb(schema: InputSchema, no_wait: bool = False) -> dict[str, Any] | None:
14
+ def process_duckdb(
15
+ schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
16
+ ) -> dict[str, Any] | None:
15
17
  """Process DuckDB table and enrich data."""
16
18
  InputModel, OutputModel = parse_input_and_output_models(schema)
17
19
  duckdb_file = os.getenv("DUCKDB_FILE")
@@ -25,9 +27,13 @@ def process_duckdb(schema: InputSchema, no_wait: bool = False) -> dict[str, Any]
25
27
  data = con.sql(f"SELECT * from {source_quoted}").pl().to_dicts()
26
28
 
27
29
  if no_wait:
28
- return create_task_group(data, InputModel, OutputModel, schema.processor)
30
+ return create_task_group(
31
+ data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
32
+ )
29
33
 
30
- output_rows = run_tasks(data, InputModel, OutputModel, schema.processor)
34
+ output_rows = run_tasks(
35
+ data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
36
+ )
31
37
 
32
38
  # Write output_rows to the target table
33
39
  df = pl.DataFrame(output_rows) # noqa: F841
@@ -10,7 +10,9 @@ from parallel_web_tools.core.batch import create_task_group
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
- def process_json(schema: InputSchema, no_wait: bool = False) -> dict[str, Any] | None:
13
+ def process_json(
14
+ schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
15
+ ) -> dict[str, Any] | None:
14
16
  """Process JSON file and enrich data."""
15
17
  logger.info("Processing JSON file: %s", schema.source)
16
18
 
@@ -21,10 +23,14 @@ def process_json(schema: InputSchema, no_wait: bool = False) -> dict[str, Any] |
21
23
  data = json.load(f)
22
24
 
23
25
  if no_wait:
24
- return create_task_group(data, InputModel, OutputModel, schema.processor)
26
+ return create_task_group(
27
+ data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
28
+ )
25
29
 
26
30
  # Process all rows in batch
27
- output_rows = run_tasks(data, InputModel, OutputModel, schema.processor)
31
+ output_rows = run_tasks(
32
+ data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
33
+ )
28
34
 
29
35
  # Write results to target JSON
30
36
  with open(schema.target, "w") as f:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "parallel-web-tools"
7
- version = "0.1.3"
7
+ version = "0.2.0"
8
8
  description = "Parallel Tools: CLI and Python SDK for AI-powered web intelligence"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"