parallel-web-tools 0.1.3__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/PKG-INFO +31 -2
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/README.md +30 -1
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/__init__.py +1 -1
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/commands.py +32 -3
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/batch.py +26 -3
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/research.py +39 -13
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/runner.py +17 -9
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/cloud_function/requirements.txt +1 -1
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/bigquery.py +9 -3
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/csv.py +9 -3
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/duckdb.py +9 -3
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/json.py +9 -3
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/pyproject.toml +1 -1
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/.gitignore +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/__init__.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/planner.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/updater.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/__init__.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/auth.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/findall.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/monitor.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/polling.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/result.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/schema.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/sql_utils.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/core/user_agent.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/__init__.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/__init__.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/cloud_function/main.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/deploy.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/sql/create_functions.sql +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/__init__.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/batch.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/findall.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/udf.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/polars/__init__.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/polars/enrich.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/__init__.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/deploy.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/01_setup.sql +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/02_create_udf.sql +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/03_cleanup.sql +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/__init__.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/streaming.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/udf.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/utils.py +0 -0
- {parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: parallel-web-tools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Parallel Tools: CLI and Python SDK for AI-powered web intelligence
|
|
5
5
|
Project-URL: Homepage, https://github.com/parallel-web/parallel-web-tools
|
|
6
6
|
Project-URL: Documentation, https://docs.parallel.ai
|
|
@@ -98,6 +98,7 @@ CLI and data enrichment utilities for the [Parallel API](https://docs.parallel.a
|
|
|
98
98
|
- **Web Search** - AI-powered search with domain filtering and date ranges
|
|
99
99
|
- **Content Extraction** - Extract clean markdown from any URL
|
|
100
100
|
- **Data Enrichment** - Enrich CSV, JSON, DuckDB, and BigQuery data with AI
|
|
101
|
+
- **Follow-up Context** - Chain research and enrichment tasks using `--previous-interaction-id`
|
|
101
102
|
- **AI-Assisted Planning** - Use natural language to define what data you want
|
|
102
103
|
- **Multiple Integrations** - Polars, DuckDB, Snowflake, BigQuery, Spark
|
|
103
104
|
|
|
@@ -293,13 +294,41 @@ echo "What is the latest funding for Anthropic?" | parallel-cli search - --json
|
|
|
293
294
|
echo "Research question" | parallel-cli research run - --json
|
|
294
295
|
|
|
295
296
|
# Async: launch then poll separately
|
|
296
|
-
parallel-cli research run "question" --no-wait --json # returns run_id
|
|
297
|
+
parallel-cli research run "question" --no-wait --json # returns run_id + interaction_id
|
|
297
298
|
parallel-cli research status trun_xxx --json # check status
|
|
298
299
|
parallel-cli research poll trun_xxx --json # wait and get result
|
|
299
300
|
|
|
301
|
+
# Follow-up: reuse context from a previous task
|
|
302
|
+
parallel-cli research run "follow-up question" --previous-interaction-id trun_xxx --json
|
|
303
|
+
parallel-cli enrich run --data '[...]' --previous-interaction-id trun_xxx --json
|
|
304
|
+
|
|
300
305
|
# Exit codes: 0=ok, 2=bad input, 3=auth error, 4=api error, 5=timeout
|
|
301
306
|
```
|
|
302
307
|
|
|
308
|
+
### Follow-up research with context reuse
|
|
309
|
+
|
|
310
|
+
Tasks return an `interaction_id` that can be passed as `--previous-interaction-id` on a subsequent research or enrichment run. The new task inherits the context from the prior one, so follow-up questions can reference earlier results without repeating them.
|
|
311
|
+
|
|
312
|
+
```bash
|
|
313
|
+
# Step 1: Run initial research (interaction_id is in the JSON output)
|
|
314
|
+
parallel-cli research run "What are the top 3 AI companies?" --json --processor lite-fast
|
|
315
|
+
# → { "run_id": "trun_abc", "interaction_id": "trun_abc", ... }
|
|
316
|
+
|
|
317
|
+
# Step 2: Follow-up research referencing the first task's context
|
|
318
|
+
parallel-cli research run "What products does the #1 company make?" \
|
|
319
|
+
--previous-interaction-id trun_abc --json
|
|
320
|
+
|
|
321
|
+
# Step 3: Use research context for enrichment
|
|
322
|
+
parallel-cli enrich run \
|
|
323
|
+
--data '[{"company": "Anthropic"}, {"company": "OpenAI"}]' \
|
|
324
|
+
--target enriched.csv \
|
|
325
|
+
--source-columns '[{"name": "company", "description": "Company name"}]' \
|
|
326
|
+
--enriched-columns '[{"name": "products", "description": "Main products"}]' \
|
|
327
|
+
--previous-interaction-id trun_abc --json
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
The `interaction_id` is shown in both human-readable and `--json` output for `research run`, `research status`, and `research poll`.
|
|
331
|
+
|
|
303
332
|
### More examples
|
|
304
333
|
|
|
305
334
|
```bash
|
|
@@ -13,6 +13,7 @@ CLI and data enrichment utilities for the [Parallel API](https://docs.parallel.a
|
|
|
13
13
|
- **Web Search** - AI-powered search with domain filtering and date ranges
|
|
14
14
|
- **Content Extraction** - Extract clean markdown from any URL
|
|
15
15
|
- **Data Enrichment** - Enrich CSV, JSON, DuckDB, and BigQuery data with AI
|
|
16
|
+
- **Follow-up Context** - Chain research and enrichment tasks using `--previous-interaction-id`
|
|
16
17
|
- **AI-Assisted Planning** - Use natural language to define what data you want
|
|
17
18
|
- **Multiple Integrations** - Polars, DuckDB, Snowflake, BigQuery, Spark
|
|
18
19
|
|
|
@@ -208,13 +209,41 @@ echo "What is the latest funding for Anthropic?" | parallel-cli search - --json
|
|
|
208
209
|
echo "Research question" | parallel-cli research run - --json
|
|
209
210
|
|
|
210
211
|
# Async: launch then poll separately
|
|
211
|
-
parallel-cli research run "question" --no-wait --json # returns run_id
|
|
212
|
+
parallel-cli research run "question" --no-wait --json # returns run_id + interaction_id
|
|
212
213
|
parallel-cli research status trun_xxx --json # check status
|
|
213
214
|
parallel-cli research poll trun_xxx --json # wait and get result
|
|
214
215
|
|
|
216
|
+
# Follow-up: reuse context from a previous task
|
|
217
|
+
parallel-cli research run "follow-up question" --previous-interaction-id trun_xxx --json
|
|
218
|
+
parallel-cli enrich run --data '[...]' --previous-interaction-id trun_xxx --json
|
|
219
|
+
|
|
215
220
|
# Exit codes: 0=ok, 2=bad input, 3=auth error, 4=api error, 5=timeout
|
|
216
221
|
```
|
|
217
222
|
|
|
223
|
+
### Follow-up research with context reuse
|
|
224
|
+
|
|
225
|
+
Tasks return an `interaction_id` that can be passed as `--previous-interaction-id` on a subsequent research or enrichment run. The new task inherits the context from the prior one, so follow-up questions can reference earlier results without repeating them.
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Step 1: Run initial research (interaction_id is in the JSON output)
|
|
229
|
+
parallel-cli research run "What are the top 3 AI companies?" --json --processor lite-fast
|
|
230
|
+
# → { "run_id": "trun_abc", "interaction_id": "trun_abc", ... }
|
|
231
|
+
|
|
232
|
+
# Step 2: Follow-up research referencing the first task's context
|
|
233
|
+
parallel-cli research run "What products does the #1 company make?" \
|
|
234
|
+
--previous-interaction-id trun_abc --json
|
|
235
|
+
|
|
236
|
+
# Step 3: Use research context for enrichment
|
|
237
|
+
parallel-cli enrich run \
|
|
238
|
+
--data '[{"company": "Anthropic"}, {"company": "OpenAI"}]' \
|
|
239
|
+
--target enriched.csv \
|
|
240
|
+
--source-columns '[{"name": "company", "description": "Company name"}]' \
|
|
241
|
+
--enriched-columns '[{"name": "products", "description": "Main products"}]' \
|
|
242
|
+
--previous-interaction-id trun_abc --json
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
The `interaction_id` is shown in both human-readable and `--json` output for `research run`, `research status`, and `research poll`.
|
|
246
|
+
|
|
218
247
|
### More examples
|
|
219
248
|
|
|
220
249
|
```bash
|
|
@@ -960,6 +960,10 @@ def enrich():
|
|
|
960
960
|
@click.option("--dry-run", is_flag=True, help="Show what would be executed without making API calls")
|
|
961
961
|
@click.option("--json", "output_json", is_flag=True, help="Output results as JSON to stdout")
|
|
962
962
|
@click.option("-o", "--output", "output_file", type=click.Path(), help="Save results to JSON file")
|
|
963
|
+
@click.option(
|
|
964
|
+
"--previous-interaction-id",
|
|
965
|
+
help="Interaction ID from a previous task to reuse as context",
|
|
966
|
+
)
|
|
963
967
|
def enrich_run(
|
|
964
968
|
config_file: str | None,
|
|
965
969
|
source_type: str | None,
|
|
@@ -974,6 +978,7 @@ def enrich_run(
|
|
|
974
978
|
dry_run: bool,
|
|
975
979
|
output_json: bool,
|
|
976
980
|
output_file: str | None,
|
|
981
|
+
previous_interaction_id: str | None,
|
|
977
982
|
):
|
|
978
983
|
"""Run data enrichment from YAML config or CLI arguments.
|
|
979
984
|
|
|
@@ -1043,7 +1048,7 @@ def enrich_run(
|
|
|
1043
1048
|
|
|
1044
1049
|
if not output_json:
|
|
1045
1050
|
console.print(f"[bold cyan]Running enrichment from {config_file}...[/bold cyan]\n")
|
|
1046
|
-
result = run_enrichment(config_file, no_wait=no_wait)
|
|
1051
|
+
result = run_enrichment(config_file, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
|
|
1047
1052
|
else:
|
|
1048
1053
|
# After validation, these are guaranteed non-None
|
|
1049
1054
|
assert source_type is not None
|
|
@@ -1134,7 +1139,7 @@ def enrich_run(
|
|
|
1134
1139
|
|
|
1135
1140
|
if not output_json:
|
|
1136
1141
|
console.print(f"[bold cyan]Running enrichment: {source} -> {target}[/bold cyan]\n")
|
|
1137
|
-
result = run_enrichment_from_dict(config, no_wait=no_wait)
|
|
1142
|
+
result = run_enrichment_from_dict(config, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
|
|
1138
1143
|
|
|
1139
1144
|
if no_wait and result:
|
|
1140
1145
|
if output_json:
|
|
@@ -1572,6 +1577,10 @@ def research():
|
|
|
1572
1577
|
"-o", "--output", "output_file", type=click.Path(), help="Save results (creates {name}.json and {name}.md)"
|
|
1573
1578
|
)
|
|
1574
1579
|
@click.option("--json", "output_json", is_flag=True, help="Output JSON to stdout")
|
|
1580
|
+
@click.option(
|
|
1581
|
+
"--previous-interaction-id",
|
|
1582
|
+
help="Interaction ID from a previous task to reuse as context",
|
|
1583
|
+
)
|
|
1575
1584
|
def research_run(
|
|
1576
1585
|
query: str | None,
|
|
1577
1586
|
input_file: str | None,
|
|
@@ -1582,12 +1591,16 @@ def research_run(
|
|
|
1582
1591
|
dry_run: bool,
|
|
1583
1592
|
output_file: str | None,
|
|
1584
1593
|
output_json: bool,
|
|
1594
|
+
previous_interaction_id: str | None,
|
|
1585
1595
|
):
|
|
1586
1596
|
"""Run deep research on a question or topic.
|
|
1587
1597
|
|
|
1588
1598
|
QUERY is the research question (max 15,000 chars). Alternatively, use --input-file
|
|
1589
1599
|
or pass "-" as QUERY to read from stdin.
|
|
1590
1600
|
|
|
1601
|
+
Use --previous-interaction-id to continue research from a prior task's context.
|
|
1602
|
+
The interaction ID is shown in the output of every research run.
|
|
1603
|
+
|
|
1591
1604
|
Examples:
|
|
1592
1605
|
|
|
1593
1606
|
parallel-cli research run "What are the latest developments in quantum computing?"
|
|
@@ -1595,6 +1608,9 @@ def research_run(
|
|
|
1595
1608
|
parallel-cli research run -f question.txt --processor ultra -o report
|
|
1596
1609
|
|
|
1597
1610
|
echo "My research question" | parallel-cli research run - --json
|
|
1611
|
+
|
|
1612
|
+
# Follow-up research using context from a previous task:
|
|
1613
|
+
parallel-cli research run "What are the implications?" --previous-interaction-id trun_abc123
|
|
1598
1614
|
"""
|
|
1599
1615
|
# Read from stdin if "-" is passed
|
|
1600
1616
|
if query == "-":
|
|
@@ -1634,13 +1650,18 @@ def research_run(
|
|
|
1634
1650
|
# Create task and return immediately
|
|
1635
1651
|
if not output_json:
|
|
1636
1652
|
console.print(f"[dim]Creating research task with processor: {processor}...[/dim]")
|
|
1637
|
-
result = create_research_task(
|
|
1653
|
+
result = create_research_task(
|
|
1654
|
+
query, processor=processor, source="cli", previous_interaction_id=previous_interaction_id
|
|
1655
|
+
)
|
|
1638
1656
|
|
|
1639
1657
|
if not output_json:
|
|
1640
1658
|
console.print(f"\n[bold green]Task created: {result['run_id']}[/bold green]")
|
|
1659
|
+
if result.get("interaction_id"):
|
|
1660
|
+
console.print(f"Interaction ID: {result['interaction_id']}")
|
|
1641
1661
|
console.print(f"Track progress: {result['result_url']}")
|
|
1642
1662
|
console.print("\n[dim]Use 'parallel-cli research status <run_id>' to check status[/dim]")
|
|
1643
1663
|
console.print("[dim]Use 'parallel-cli research poll <run_id>' to wait for results[/dim]")
|
|
1664
|
+
console.print("[dim]Use '--previous-interaction-id' on a new run to continue this research[/dim]")
|
|
1644
1665
|
|
|
1645
1666
|
if output_json:
|
|
1646
1667
|
print(json.dumps(result, indent=2))
|
|
@@ -1673,6 +1694,7 @@ def research_run(
|
|
|
1673
1694
|
poll_interval=poll_interval,
|
|
1674
1695
|
on_status=on_status,
|
|
1675
1696
|
source="cli",
|
|
1697
|
+
previous_interaction_id=previous_interaction_id,
|
|
1676
1698
|
)
|
|
1677
1699
|
|
|
1678
1700
|
_output_research_result(result, output_file, output_json)
|
|
@@ -1715,11 +1737,13 @@ def research_status(run_id: str, output_json: bool):
|
|
|
1715
1737
|
}.get(status, "white")
|
|
1716
1738
|
|
|
1717
1739
|
console.print(f"[bold]Task:[/bold] {run_id}")
|
|
1740
|
+
console.print(f"[bold]Interaction ID:[/bold] {result.get('interaction_id', run_id)}")
|
|
1718
1741
|
console.print(f"[bold]Status:[/bold] [{status_color}]{status}[/{status_color}]")
|
|
1719
1742
|
console.print(f"[bold]URL:[/bold] {result['result_url']}")
|
|
1720
1743
|
|
|
1721
1744
|
if status == "completed":
|
|
1722
1745
|
console.print("\n[dim]Use 'parallel-cli research poll <run_id>' to retrieve results[/dim]")
|
|
1746
|
+
console.print("[dim]Use '--previous-interaction-id' on a new run to continue this research[/dim]")
|
|
1723
1747
|
|
|
1724
1748
|
except Exception as e:
|
|
1725
1749
|
_handle_error(e, output_json=output_json)
|
|
@@ -1915,6 +1939,7 @@ def _output_research_result(
|
|
|
1915
1939
|
output = result.get("output", {})
|
|
1916
1940
|
output_data = {
|
|
1917
1941
|
"run_id": result.get("run_id"),
|
|
1942
|
+
"interaction_id": result.get("interaction_id"),
|
|
1918
1943
|
"result_url": result.get("result_url"),
|
|
1919
1944
|
"status": result.get("status"),
|
|
1920
1945
|
"output": output.copy() if isinstance(output, dict) else output,
|
|
@@ -1957,6 +1982,7 @@ def _output_research_result(
|
|
|
1957
1982
|
else:
|
|
1958
1983
|
console.print("\n[bold green]Research Complete![/bold green]")
|
|
1959
1984
|
console.print(f"[dim]Task: {result.get('run_id')}[/dim]")
|
|
1985
|
+
console.print(f"[dim]Interaction ID: {result.get('interaction_id')}[/dim]")
|
|
1960
1986
|
console.print(f"[dim]URL: {result.get('result_url')}[/dim]\n")
|
|
1961
1987
|
|
|
1962
1988
|
# Show executive summary if available
|
|
@@ -1973,6 +1999,9 @@ def _output_research_result(
|
|
|
1973
1999
|
|
|
1974
2000
|
if not output_file:
|
|
1975
2001
|
console.print("[dim]Use --output to save full results to a file, or --json to print to stdout[/dim]")
|
|
2002
|
+
interaction_id = result.get("interaction_id")
|
|
2003
|
+
if interaction_id:
|
|
2004
|
+
console.print(f"[dim]Use '--previous-interaction-id {interaction_id}' to continue this research[/dim]")
|
|
1976
2005
|
|
|
1977
2006
|
|
|
1978
2007
|
# =============================================================================
|
|
@@ -99,6 +99,7 @@ def enrich_batch(
|
|
|
99
99
|
poll_interval: int = 5,
|
|
100
100
|
include_basis: bool = True,
|
|
101
101
|
source: ClientSource = "python",
|
|
102
|
+
previous_interaction_id: str | None = None,
|
|
102
103
|
) -> list[dict[str, Any]]:
|
|
103
104
|
"""Enrich multiple inputs using the Parallel Task Group API.
|
|
104
105
|
|
|
@@ -111,6 +112,7 @@ def enrich_batch(
|
|
|
111
112
|
poll_interval: Seconds between status polls
|
|
112
113
|
include_basis: Whether to include citations
|
|
113
114
|
source: Client source identifier for User-Agent (default: python)
|
|
115
|
+
previous_interaction_id: Interaction ID from a previous task to reuse as context.
|
|
114
116
|
|
|
115
117
|
Returns:
|
|
116
118
|
List of result dictionaries in same order as inputs.
|
|
@@ -131,7 +133,13 @@ def enrich_batch(
|
|
|
131
133
|
taskgroup_id = task_group.task_group_id
|
|
132
134
|
|
|
133
135
|
# Add runs - use SDK type for proper typing
|
|
134
|
-
|
|
136
|
+
def _make_run_input(inp: dict[str, Any]) -> BetaRunInputParam:
|
|
137
|
+
entry: BetaRunInputParam = {"input": inp, "processor": processor}
|
|
138
|
+
if previous_interaction_id:
|
|
139
|
+
entry["previous_interaction_id"] = previous_interaction_id
|
|
140
|
+
return entry
|
|
141
|
+
|
|
142
|
+
run_inputs: list[BetaRunInputParam] = [_make_run_input(inp) for inp in inputs]
|
|
135
143
|
response = client.beta.task_group.add_runs(
|
|
136
144
|
taskgroup_id,
|
|
137
145
|
default_task_spec=task_spec,
|
|
@@ -187,6 +195,7 @@ def enrich_single(
|
|
|
187
195
|
timeout: int = 300,
|
|
188
196
|
include_basis: bool = True,
|
|
189
197
|
source: ClientSource = "python",
|
|
198
|
+
previous_interaction_id: str | None = None,
|
|
190
199
|
) -> dict[str, Any]:
|
|
191
200
|
"""Enrich a single input using the Parallel API."""
|
|
192
201
|
results = enrich_batch(
|
|
@@ -197,6 +206,7 @@ def enrich_single(
|
|
|
197
206
|
timeout=timeout,
|
|
198
207
|
include_basis=include_basis,
|
|
199
208
|
source=source,
|
|
209
|
+
previous_interaction_id=previous_interaction_id,
|
|
200
210
|
)
|
|
201
211
|
return results[0] if results else {"error": "No result"}
|
|
202
212
|
|
|
@@ -207,6 +217,7 @@ def create_task_group(
|
|
|
207
217
|
OutputModel,
|
|
208
218
|
processor: str = "core-fast",
|
|
209
219
|
source: ClientSource = "python",
|
|
220
|
+
previous_interaction_id: str | None = None,
|
|
210
221
|
) -> dict[str, Any]:
|
|
211
222
|
"""Create a task group and add runs without waiting for completion.
|
|
212
223
|
|
|
@@ -216,6 +227,7 @@ def create_task_group(
|
|
|
216
227
|
OutputModel: Pydantic model for output schema.
|
|
217
228
|
processor: Parallel processor (default: core-fast).
|
|
218
229
|
source: Client source identifier for User-Agent.
|
|
230
|
+
previous_interaction_id: Interaction ID from a previous task to reuse as context.
|
|
219
231
|
|
|
220
232
|
Returns:
|
|
221
233
|
Dict with taskgroup_id, url, and num_runs.
|
|
@@ -238,12 +250,19 @@ def create_task_group(
|
|
|
238
250
|
taskgroup_id = task_group.task_group_id
|
|
239
251
|
logger.info(f"Created taskgroup id {taskgroup_id}")
|
|
240
252
|
|
|
253
|
+
# Build run input helper
|
|
254
|
+
def _make_run_input(row: dict[str, Any]) -> BetaRunInputParam:
|
|
255
|
+
entry: BetaRunInputParam = {"input": row, "processor": processor}
|
|
256
|
+
if previous_interaction_id:
|
|
257
|
+
entry["previous_interaction_id"] = previous_interaction_id
|
|
258
|
+
return entry
|
|
259
|
+
|
|
241
260
|
# Add runs in batches
|
|
242
261
|
batch_size = 100
|
|
243
262
|
total_created = 0
|
|
244
263
|
for i in range(0, len(input_data), batch_size):
|
|
245
264
|
batch = input_data[i : i + batch_size]
|
|
246
|
-
run_inputs: list[BetaRunInputParam] = [
|
|
265
|
+
run_inputs: list[BetaRunInputParam] = [_make_run_input(row) for row in batch]
|
|
247
266
|
response = client.beta.task_group.add_runs(
|
|
248
267
|
taskgroup_id,
|
|
249
268
|
default_task_spec=task_spec,
|
|
@@ -361,6 +380,7 @@ def run_tasks(
|
|
|
361
380
|
processor: str = "core-fast",
|
|
362
381
|
source: ClientSource = "python",
|
|
363
382
|
timeout: int = 3600,
|
|
383
|
+
previous_interaction_id: str | None = None,
|
|
364
384
|
) -> list[Any]:
|
|
365
385
|
"""Run batch tasks using Pydantic models for schema.
|
|
366
386
|
|
|
@@ -368,6 +388,7 @@ def run_tasks(
|
|
|
368
388
|
|
|
369
389
|
Args:
|
|
370
390
|
timeout: Max seconds to wait for completion (default: 3600 = 1 hour).
|
|
391
|
+
previous_interaction_id: Interaction ID from a previous task to reuse as context.
|
|
371
392
|
"""
|
|
372
393
|
logger = logging.getLogger(__name__)
|
|
373
394
|
|
|
@@ -375,7 +396,9 @@ def run_tasks(
|
|
|
375
396
|
logger.info(f"Generated batch_id: {batch_id}")
|
|
376
397
|
|
|
377
398
|
# Create task group and add runs
|
|
378
|
-
tg_info = create_task_group(
|
|
399
|
+
tg_info = create_task_group(
|
|
400
|
+
input_data, InputModel, OutputModel, processor, source, previous_interaction_id=previous_interaction_id
|
|
401
|
+
)
|
|
379
402
|
taskgroup_id = tg_info["taskgroup_id"]
|
|
380
403
|
|
|
381
404
|
# Wait for completion
|
|
@@ -74,6 +74,7 @@ def create_research_task(
|
|
|
74
74
|
processor: str = "pro-fast",
|
|
75
75
|
api_key: str | None = None,
|
|
76
76
|
source: ClientSource = "python",
|
|
77
|
+
previous_interaction_id: str | None = None,
|
|
77
78
|
) -> dict[str, Any]:
|
|
78
79
|
"""Create a deep research task without waiting for results.
|
|
79
80
|
|
|
@@ -82,19 +83,25 @@ def create_research_task(
|
|
|
82
83
|
processor: Processor tier (see RESEARCH_PROCESSORS).
|
|
83
84
|
api_key: Optional API key.
|
|
84
85
|
source: Client source identifier for User-Agent.
|
|
86
|
+
previous_interaction_id: Interaction ID from a previous task to reuse as context.
|
|
85
87
|
|
|
86
88
|
Returns:
|
|
87
|
-
Dict with run_id, result_url, and other task metadata.
|
|
89
|
+
Dict with run_id, interaction_id, result_url, and other task metadata.
|
|
88
90
|
"""
|
|
89
91
|
client = create_client(api_key, source)
|
|
90
92
|
|
|
91
|
-
|
|
92
|
-
input
|
|
93
|
-
processor
|
|
94
|
-
|
|
93
|
+
create_kwargs: dict[str, Any] = {
|
|
94
|
+
"input": query[:15000],
|
|
95
|
+
"processor": processor,
|
|
96
|
+
}
|
|
97
|
+
if previous_interaction_id:
|
|
98
|
+
create_kwargs["previous_interaction_id"] = previous_interaction_id
|
|
99
|
+
|
|
100
|
+
task = client.task_run.create(**create_kwargs)
|
|
95
101
|
|
|
96
102
|
return {
|
|
97
103
|
"run_id": task.run_id,
|
|
104
|
+
"interaction_id": getattr(task, "interaction_id", task.run_id),
|
|
98
105
|
"result_url": f"{PLATFORM_BASE}/play/deep-research/{task.run_id}",
|
|
99
106
|
"processor": processor,
|
|
100
107
|
"status": getattr(task, "status", "pending"),
|
|
@@ -114,13 +121,14 @@ def get_research_status(
|
|
|
114
121
|
source: Client source identifier for User-Agent.
|
|
115
122
|
|
|
116
123
|
Returns:
|
|
117
|
-
Dict with status and other task info.
|
|
124
|
+
Dict with status, interaction_id, and other task info.
|
|
118
125
|
"""
|
|
119
126
|
client = create_client(api_key, source)
|
|
120
127
|
status = client.task_run.retrieve(run_id=run_id)
|
|
121
128
|
|
|
122
129
|
return {
|
|
123
130
|
"run_id": run_id,
|
|
131
|
+
"interaction_id": getattr(status, "interaction_id", run_id),
|
|
124
132
|
"status": status.status,
|
|
125
133
|
"result_url": f"{PLATFORM_BASE}/play/deep-research/{run_id}",
|
|
126
134
|
}
|
|
@@ -162,6 +170,7 @@ def _poll_until_complete(
|
|
|
162
170
|
timeout: int,
|
|
163
171
|
poll_interval: int,
|
|
164
172
|
on_status: Callable[[str, str], None] | None,
|
|
173
|
+
interaction_id: str | None = None,
|
|
165
174
|
) -> dict[str, Any]:
|
|
166
175
|
"""Poll a research task until completion and return the result.
|
|
167
176
|
|
|
@@ -172,6 +181,7 @@ def _poll_until_complete(
|
|
|
172
181
|
timeout: Maximum wait time in seconds.
|
|
173
182
|
poll_interval: Seconds between status checks.
|
|
174
183
|
on_status: Optional callback called with (status, run_id) on each poll.
|
|
184
|
+
interaction_id: Known interaction ID (updated from poll responses).
|
|
175
185
|
|
|
176
186
|
Returns:
|
|
177
187
|
Dict with content and metadata.
|
|
@@ -180,9 +190,15 @@ def _poll_until_complete(
|
|
|
180
190
|
TimeoutError: If the task doesn't complete within timeout.
|
|
181
191
|
RuntimeError: If the task fails or is cancelled.
|
|
182
192
|
"""
|
|
193
|
+
# Track interaction_id from poll responses
|
|
194
|
+
poll_state = {"interaction_id": interaction_id}
|
|
183
195
|
|
|
184
196
|
def retrieve():
|
|
185
|
-
|
|
197
|
+
response = client.task_run.retrieve(run_id=run_id)
|
|
198
|
+
# Capture interaction_id from the latest response
|
|
199
|
+
if hasattr(response, "interaction_id") and response.interaction_id:
|
|
200
|
+
poll_state["interaction_id"] = response.interaction_id
|
|
201
|
+
return response
|
|
186
202
|
|
|
187
203
|
def extract_status(response):
|
|
188
204
|
return response.status
|
|
@@ -193,6 +209,7 @@ def _poll_until_complete(
|
|
|
193
209
|
output_data = _serialize_output(output)
|
|
194
210
|
return {
|
|
195
211
|
"run_id": run_id,
|
|
212
|
+
"interaction_id": poll_state["interaction_id"] or run_id,
|
|
196
213
|
"result_url": result_url,
|
|
197
214
|
"status": "completed",
|
|
198
215
|
"output": output_data,
|
|
@@ -226,6 +243,7 @@ def run_research(
|
|
|
226
243
|
poll_interval: int = 45,
|
|
227
244
|
on_status: Callable[[str, str], None] | None = None,
|
|
228
245
|
source: ClientSource = "python",
|
|
246
|
+
previous_interaction_id: str | None = None,
|
|
229
247
|
) -> dict[str, Any]:
|
|
230
248
|
"""Run deep research and wait for results.
|
|
231
249
|
|
|
@@ -240,6 +258,7 @@ def run_research(
|
|
|
240
258
|
poll_interval: Seconds between status checks (default: 45).
|
|
241
259
|
on_status: Optional callback called with (status, run_id) on each poll.
|
|
242
260
|
source: Client source identifier for User-Agent.
|
|
261
|
+
previous_interaction_id: Interaction ID from a previous task to reuse as context.
|
|
243
262
|
|
|
244
263
|
Returns:
|
|
245
264
|
Dict with content and metadata.
|
|
@@ -250,17 +269,24 @@ def run_research(
|
|
|
250
269
|
"""
|
|
251
270
|
client = create_client(api_key, source)
|
|
252
271
|
|
|
253
|
-
|
|
254
|
-
input
|
|
255
|
-
processor
|
|
256
|
-
|
|
272
|
+
create_kwargs: dict[str, Any] = {
|
|
273
|
+
"input": query[:15000],
|
|
274
|
+
"processor": processor,
|
|
275
|
+
}
|
|
276
|
+
if previous_interaction_id:
|
|
277
|
+
create_kwargs["previous_interaction_id"] = previous_interaction_id
|
|
278
|
+
|
|
279
|
+
task = client.task_run.create(**create_kwargs)
|
|
257
280
|
run_id = task.run_id
|
|
281
|
+
interaction_id = getattr(task, "interaction_id", run_id)
|
|
258
282
|
result_url = f"{PLATFORM_BASE}/play/deep-research/{run_id}"
|
|
259
283
|
|
|
260
284
|
if on_status:
|
|
261
285
|
on_status("created", run_id)
|
|
262
286
|
|
|
263
|
-
return _poll_until_complete(
|
|
287
|
+
return _poll_until_complete(
|
|
288
|
+
client, run_id, result_url, timeout, poll_interval, on_status, interaction_id=interaction_id
|
|
289
|
+
)
|
|
264
290
|
|
|
265
291
|
|
|
266
292
|
def poll_research(
|
|
@@ -284,7 +310,7 @@ def poll_research(
|
|
|
284
310
|
source: Client source identifier for User-Agent.
|
|
285
311
|
|
|
286
312
|
Returns:
|
|
287
|
-
Dict with content and metadata.
|
|
313
|
+
Dict with content and metadata including interaction_id.
|
|
288
314
|
"""
|
|
289
315
|
client = create_client(api_key, source)
|
|
290
316
|
result_url = f"{PLATFORM_BASE}/play/deep-research/{run_id}"
|
|
@@ -8,35 +8,40 @@ from parallel_web_tools.core.schema import InputSchema, SourceType, load_schema,
|
|
|
8
8
|
logger = logging.getLogger(__name__)
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def _run_processor(
|
|
11
|
+
def _run_processor(
|
|
12
|
+
parsed_schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
|
|
13
|
+
) -> dict | None:
|
|
12
14
|
"""Run the appropriate processor for the given schema."""
|
|
13
15
|
match parsed_schema.source_type:
|
|
14
16
|
case SourceType.CSV:
|
|
15
17
|
from parallel_web_tools.processors.csv import process_csv
|
|
16
18
|
|
|
17
|
-
return process_csv(parsed_schema, no_wait=no_wait)
|
|
19
|
+
return process_csv(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
|
|
18
20
|
case SourceType.JSON:
|
|
19
21
|
from parallel_web_tools.processors.json import process_json
|
|
20
22
|
|
|
21
|
-
return process_json(parsed_schema, no_wait=no_wait)
|
|
23
|
+
return process_json(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
|
|
22
24
|
case SourceType.DUCKDB:
|
|
23
25
|
from parallel_web_tools.processors.duckdb import process_duckdb
|
|
24
26
|
|
|
25
|
-
return process_duckdb(parsed_schema, no_wait=no_wait)
|
|
27
|
+
return process_duckdb(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
|
|
26
28
|
case SourceType.BIGQUERY:
|
|
27
29
|
from parallel_web_tools.processors.bigquery import process_bigquery
|
|
28
30
|
|
|
29
|
-
return process_bigquery(parsed_schema, no_wait=no_wait)
|
|
31
|
+
return process_bigquery(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
|
|
30
32
|
case _:
|
|
31
33
|
raise NotImplementedError(f"{parsed_schema.source_type} is not supported")
|
|
32
34
|
|
|
33
35
|
|
|
34
|
-
def run_enrichment(
|
|
36
|
+
def run_enrichment(
|
|
37
|
+
config_file: str | Path, no_wait: bool = False, previous_interaction_id: str | None = None
|
|
38
|
+
) -> dict | None:
|
|
35
39
|
"""Run data enrichment using a YAML config file.
|
|
36
40
|
|
|
37
41
|
Args:
|
|
38
42
|
config_file: Path to YAML configuration file
|
|
39
43
|
no_wait: If True, return taskgroup info without waiting for completion.
|
|
44
|
+
previous_interaction_id: Interaction ID from a previous task to reuse as context.
|
|
40
45
|
|
|
41
46
|
Example:
|
|
42
47
|
>>> from parallel_web_tools import run_enrichment
|
|
@@ -52,7 +57,7 @@ def run_enrichment(config_file: str | Path, no_wait: bool = False) -> dict | Non
|
|
|
52
57
|
parsed_schema = parse_schema(schema)
|
|
53
58
|
|
|
54
59
|
logger.info(f"Running enrichment: {parsed_schema.source} -> {parsed_schema.target}")
|
|
55
|
-
result = _run_processor(parsed_schema, no_wait=no_wait)
|
|
60
|
+
result = _run_processor(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
|
|
56
61
|
|
|
57
62
|
if no_wait:
|
|
58
63
|
return result
|
|
@@ -61,12 +66,15 @@ def run_enrichment(config_file: str | Path, no_wait: bool = False) -> dict | Non
|
|
|
61
66
|
return None
|
|
62
67
|
|
|
63
68
|
|
|
64
|
-
def run_enrichment_from_dict(
|
|
69
|
+
def run_enrichment_from_dict(
|
|
70
|
+
config: dict, no_wait: bool = False, previous_interaction_id: str | None = None
|
|
71
|
+
) -> dict | None:
|
|
65
72
|
"""Run data enrichment using a configuration dictionary.
|
|
66
73
|
|
|
67
74
|
Args:
|
|
68
75
|
config: Configuration dictionary matching YAML schema
|
|
69
76
|
no_wait: If True, return taskgroup info without waiting for completion.
|
|
77
|
+
previous_interaction_id: Interaction ID from a previous task to reuse as context.
|
|
70
78
|
|
|
71
79
|
Example:
|
|
72
80
|
>>> config = {
|
|
@@ -82,7 +90,7 @@ def run_enrichment_from_dict(config: dict, no_wait: bool = False) -> dict | None
|
|
|
82
90
|
parsed_schema = parse_schema(config)
|
|
83
91
|
|
|
84
92
|
logger.info(f"Running enrichment: {parsed_schema.source} -> {parsed_schema.target}")
|
|
85
|
-
result = _run_processor(parsed_schema, no_wait=no_wait)
|
|
93
|
+
result = _run_processor(parsed_schema, no_wait=no_wait, previous_interaction_id=previous_interaction_id)
|
|
86
94
|
|
|
87
95
|
if no_wait:
|
|
88
96
|
return result
|
{parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/bigquery.py
RENAMED
|
@@ -46,7 +46,9 @@ def fetch_all(conn: Connection, table: str) -> list[dict[str, Any]]:
|
|
|
46
46
|
return [dict(row) for row in rows]
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
def process_bigquery(
|
|
49
|
+
def process_bigquery(
|
|
50
|
+
schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
|
|
51
|
+
) -> dict[str, Any] | None:
|
|
50
52
|
"""Process BigQuery table and enrich data."""
|
|
51
53
|
InputModel, OutputModel = parse_input_and_output_models(schema)
|
|
52
54
|
|
|
@@ -57,9 +59,13 @@ def process_bigquery(schema: InputSchema, no_wait: bool = False) -> dict[str, An
|
|
|
57
59
|
data = fetch_all(conn, schema.source)
|
|
58
60
|
|
|
59
61
|
if no_wait:
|
|
60
|
-
return create_task_group(
|
|
62
|
+
return create_task_group(
|
|
63
|
+
data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
|
|
64
|
+
)
|
|
61
65
|
|
|
62
|
-
output_rows = run_tasks(
|
|
66
|
+
output_rows = run_tasks(
|
|
67
|
+
data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
|
|
68
|
+
)
|
|
63
69
|
df = pl.DataFrame(output_rows)
|
|
64
70
|
|
|
65
71
|
_project, dataset, table = split_bq_name(schema.target)
|
|
@@ -10,7 +10,9 @@ from parallel_web_tools.core.batch import create_task_group
|
|
|
10
10
|
logger = logging.getLogger(__name__)
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def process_csv(
|
|
13
|
+
def process_csv(
|
|
14
|
+
schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
|
|
15
|
+
) -> dict[str, Any] | None:
|
|
14
16
|
"""Process CSV file and enrich data."""
|
|
15
17
|
logger.info("Processing CSV file: %s", schema.source)
|
|
16
18
|
|
|
@@ -24,10 +26,14 @@ def process_csv(schema: InputSchema, no_wait: bool = False) -> dict[str, Any] |
|
|
|
24
26
|
data.append(dict(row))
|
|
25
27
|
|
|
26
28
|
if no_wait:
|
|
27
|
-
return create_task_group(
|
|
29
|
+
return create_task_group(
|
|
30
|
+
data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
|
|
31
|
+
)
|
|
28
32
|
|
|
29
33
|
# Process all rows in batch
|
|
30
|
-
output_rows = run_tasks(
|
|
34
|
+
output_rows = run_tasks(
|
|
35
|
+
data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
|
|
36
|
+
)
|
|
31
37
|
|
|
32
38
|
# Write results to target CSV
|
|
33
39
|
with open(schema.target, "w", newline="") as f:
|
{parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/duckdb.py
RENAMED
|
@@ -11,7 +11,9 @@ from parallel_web_tools.core.batch import create_task_group
|
|
|
11
11
|
from parallel_web_tools.core.sql_utils import quote_identifier
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def process_duckdb(
|
|
14
|
+
def process_duckdb(
|
|
15
|
+
schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
|
|
16
|
+
) -> dict[str, Any] | None:
|
|
15
17
|
"""Process DuckDB table and enrich data."""
|
|
16
18
|
InputModel, OutputModel = parse_input_and_output_models(schema)
|
|
17
19
|
duckdb_file = os.getenv("DUCKDB_FILE")
|
|
@@ -25,9 +27,13 @@ def process_duckdb(schema: InputSchema, no_wait: bool = False) -> dict[str, Any]
|
|
|
25
27
|
data = con.sql(f"SELECT * from {source_quoted}").pl().to_dicts()
|
|
26
28
|
|
|
27
29
|
if no_wait:
|
|
28
|
-
return create_task_group(
|
|
30
|
+
return create_task_group(
|
|
31
|
+
data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
|
|
32
|
+
)
|
|
29
33
|
|
|
30
|
-
output_rows = run_tasks(
|
|
34
|
+
output_rows = run_tasks(
|
|
35
|
+
data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
|
|
36
|
+
)
|
|
31
37
|
|
|
32
38
|
# Write output_rows to the target table
|
|
33
39
|
df = pl.DataFrame(output_rows) # noqa: F841
|
|
@@ -10,7 +10,9 @@ from parallel_web_tools.core.batch import create_task_group
|
|
|
10
10
|
logger = logging.getLogger(__name__)
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def process_json(
|
|
13
|
+
def process_json(
|
|
14
|
+
schema: InputSchema, no_wait: bool = False, previous_interaction_id: str | None = None
|
|
15
|
+
) -> dict[str, Any] | None:
|
|
14
16
|
"""Process JSON file and enrich data."""
|
|
15
17
|
logger.info("Processing JSON file: %s", schema.source)
|
|
16
18
|
|
|
@@ -21,10 +23,14 @@ def process_json(schema: InputSchema, no_wait: bool = False) -> dict[str, Any] |
|
|
|
21
23
|
data = json.load(f)
|
|
22
24
|
|
|
23
25
|
if no_wait:
|
|
24
|
-
return create_task_group(
|
|
26
|
+
return create_task_group(
|
|
27
|
+
data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
|
|
28
|
+
)
|
|
25
29
|
|
|
26
30
|
# Process all rows in batch
|
|
27
|
-
output_rows = run_tasks(
|
|
31
|
+
output_rows = run_tasks(
|
|
32
|
+
data, InputModel, OutputModel, schema.processor, previous_interaction_id=previous_interaction_id
|
|
33
|
+
)
|
|
28
34
|
|
|
29
35
|
# Write results to target JSON
|
|
30
36
|
with open(schema.target, "w") as f:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/udf.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/udf.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/utils.py
RENAMED
|
File without changes
|
{parallel_web_tools-0.1.3 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/__init__.py
RENAMED
|
File without changes
|