parallel-web-tools 0.1.2rc2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/PKG-INFO +107 -8
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/README.md +104 -5
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/__init__.py +1 -1
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/commands.py +218 -12
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/batch.py +26 -3
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/research.py +39 -13
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/runner.py +17 -9
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/cloud_function/requirements.txt +1 -1
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/bigquery.py +9 -3
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/csv.py +9 -3
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/duckdb.py +9 -3
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/json.py +9 -3
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/pyproject.toml +3 -3
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/.gitignore +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/__init__.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/planner.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/cli/updater.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/__init__.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/auth.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/findall.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/monitor.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/polling.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/result.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/schema.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/sql_utils.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/core/user_agent.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/__init__.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/__init__.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/cloud_function/main.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/deploy.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/bigquery/sql/create_functions.sql +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/__init__.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/batch.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/findall.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/duckdb/udf.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/polars/__init__.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/polars/enrich.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/__init__.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/deploy.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/01_setup.sql +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/02_create_udf.sql +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/snowflake/sql/03_cleanup.sql +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/__init__.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/streaming.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/spark/udf.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/integrations/utils.py +0 -0
- {parallel_web_tools-0.1.2rc2 → parallel_web_tools-0.2.0}/parallel_web_tools/processors/__init__.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: parallel-web-tools
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Parallel Tools: CLI and
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Parallel Tools: CLI and Python SDK for AI-powered web intelligence
|
|
5
5
|
Project-URL: Homepage, https://github.com/parallel-web/parallel-web-tools
|
|
6
6
|
Project-URL: Documentation, https://docs.parallel.ai
|
|
7
7
|
Project-URL: Repository, https://github.com/parallel-web/parallel-web-tools
|
|
@@ -24,7 +24,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
24
24
|
Requires-Python: >=3.10
|
|
25
25
|
Requires-Dist: click>=8.1.0
|
|
26
26
|
Requires-Dist: httpx>=0.25.0
|
|
27
|
-
Requires-Dist: parallel-web>=0.4.
|
|
27
|
+
Requires-Dist: parallel-web>=0.4.2
|
|
28
28
|
Requires-Dist: python-dotenv>=1.0.0
|
|
29
29
|
Requires-Dist: rich>=13.0.0
|
|
30
30
|
Provides-Extra: all
|
|
@@ -98,6 +98,7 @@ CLI and data enrichment utilities for the [Parallel API](https://docs.parallel.a
|
|
|
98
98
|
- **Web Search** - AI-powered search with domain filtering and date ranges
|
|
99
99
|
- **Content Extraction** - Extract clean markdown from any URL
|
|
100
100
|
- **Data Enrichment** - Enrich CSV, JSON, DuckDB, and BigQuery data with AI
|
|
101
|
+
- **Follow-up Context** - Chain research and enrichment tasks using `--previous-interaction-id`
|
|
101
102
|
- **AI-Assisted Planning** - Use natural language to define what data you want
|
|
102
103
|
- **Multiple Integrations** - Polars, DuckDB, Snowflake, BigQuery, Spark
|
|
103
104
|
|
|
@@ -110,10 +111,14 @@ Requires **Python 3.10+**.
|
|
|
110
111
|
Install the standalone `parallel-cli` binary for search, extract, enrichment, and deep research (no Python required):
|
|
111
112
|
|
|
112
113
|
```bash
|
|
114
|
+
# macOS / Linux (Homebrew)
|
|
115
|
+
brew install parallel-web/tap/parallel-cli
|
|
116
|
+
|
|
117
|
+
# macOS / Linux (shell script)
|
|
113
118
|
curl -fsSL https://parallel.ai/install.sh | bash
|
|
114
119
|
```
|
|
115
120
|
|
|
116
|
-
|
|
121
|
+
The shell script automatically detects your platform (macOS/Linux, x64/arm64) and installs to `~/.local/bin`.
|
|
117
122
|
|
|
118
123
|
> **Note:** The standalone binary supports `search`, `extract`, `research`, and `enrich run` with CLI arguments, CSV files, and JSON files. For YAML config files, interactive planner, DuckDB/BigQuery sources, or deployment commands, use pip install.
|
|
119
124
|
|
|
@@ -150,7 +155,7 @@ pip install parallel-web-tools[all]
|
|
|
150
155
|
```
|
|
151
156
|
parallel-cli
|
|
152
157
|
├── auth # Check authentication status
|
|
153
|
-
├── login # OAuth login (or use PARALLEL_API_KEY
|
|
158
|
+
├── login # OAuth login (--device for SSH/containers/CI, or use PARALLEL_API_KEY)
|
|
154
159
|
├── logout # Remove stored credentials
|
|
155
160
|
├── search # Web search
|
|
156
161
|
├── extract / fetch # Extract content from URLs
|
|
@@ -172,6 +177,9 @@ parallel-cli
|
|
|
172
177
|
│ ├── status # Check status of a FindAll run
|
|
173
178
|
│ ├── poll # Poll until completion
|
|
174
179
|
│ ├── result # Fetch results of a completed run
|
|
180
|
+
│ ├── enrich # Enrich existing FindAll results with new columns
|
|
181
|
+
│ ├── extend # Request additional candidates for a run
|
|
182
|
+
│ ├── schema # Get the schema for a FindAll run
|
|
175
183
|
│ └── cancel # Cancel a running FindAll
|
|
176
184
|
└── monitor # Continuous web change tracking
|
|
177
185
|
├── create # Create a new web monitor
|
|
@@ -189,9 +197,12 @@ parallel-cli
|
|
|
189
197
|
### 1. Authenticate
|
|
190
198
|
|
|
191
199
|
```bash
|
|
192
|
-
# Interactive OAuth login
|
|
200
|
+
# Interactive OAuth login (opens browser)
|
|
193
201
|
parallel-cli login
|
|
194
202
|
|
|
203
|
+
# Device authorization flow — for SSH, containers, CI, or headless environments
|
|
204
|
+
parallel-cli login --device
|
|
205
|
+
|
|
195
206
|
# Or set environment variable
|
|
196
207
|
export PARALLEL_API_KEY=your_api_key
|
|
197
208
|
```
|
|
@@ -283,13 +294,41 @@ echo "What is the latest funding for Anthropic?" | parallel-cli search - --json
|
|
|
283
294
|
echo "Research question" | parallel-cli research run - --json
|
|
284
295
|
|
|
285
296
|
# Async: launch then poll separately
|
|
286
|
-
parallel-cli research run "question" --no-wait --json # returns run_id
|
|
297
|
+
parallel-cli research run "question" --no-wait --json # returns run_id + interaction_id
|
|
287
298
|
parallel-cli research status trun_xxx --json # check status
|
|
288
299
|
parallel-cli research poll trun_xxx --json # wait and get result
|
|
289
300
|
|
|
301
|
+
# Follow-up: reuse context from a previous task
|
|
302
|
+
parallel-cli research run "follow-up question" --previous-interaction-id trun_xxx --json
|
|
303
|
+
parallel-cli enrich run --data '[...]' --previous-interaction-id trun_xxx --json
|
|
304
|
+
|
|
290
305
|
# Exit codes: 0=ok, 2=bad input, 3=auth error, 4=api error, 5=timeout
|
|
291
306
|
```
|
|
292
307
|
|
|
308
|
+
### Follow-up research with context reuse
|
|
309
|
+
|
|
310
|
+
Tasks return an `interaction_id` that can be passed as `--previous-interaction-id` on a subsequent research or enrichment run. The new task inherits the context from the prior one, so follow-up questions can reference earlier results without repeating them.
|
|
311
|
+
|
|
312
|
+
```bash
|
|
313
|
+
# Step 1: Run initial research (interaction_id is in the JSON output)
|
|
314
|
+
parallel-cli research run "What are the top 3 AI companies?" --json --processor lite-fast
|
|
315
|
+
# → { "run_id": "trun_abc", "interaction_id": "trun_abc", ... }
|
|
316
|
+
|
|
317
|
+
# Step 2: Follow-up research referencing the first task's context
|
|
318
|
+
parallel-cli research run "What products does the #1 company make?" \
|
|
319
|
+
--previous-interaction-id trun_abc --json
|
|
320
|
+
|
|
321
|
+
# Step 3: Use research context for enrichment
|
|
322
|
+
parallel-cli enrich run \
|
|
323
|
+
--data '[{"company": "Anthropic"}, {"company": "OpenAI"}]' \
|
|
324
|
+
--target enriched.csv \
|
|
325
|
+
--source-columns '[{"name": "company", "description": "Company name"}]' \
|
|
326
|
+
--enriched-columns '[{"name": "products", "description": "Main products"}]' \
|
|
327
|
+
--previous-interaction-id trun_abc --json
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
The `interaction_id` is shown in both human-readable and `--json` output for `research run`, `research status`, and `research poll`.
|
|
331
|
+
|
|
293
332
|
### More examples
|
|
294
333
|
|
|
295
334
|
```bash
|
|
@@ -354,9 +393,11 @@ print(result.result)
|
|
|
354
393
|
**DuckDB:**
|
|
355
394
|
```python
|
|
356
395
|
import duckdb
|
|
357
|
-
from parallel_web_tools.integrations.duckdb import enrich_table
|
|
396
|
+
from parallel_web_tools.integrations.duckdb import enrich_table, findall_table
|
|
358
397
|
|
|
359
398
|
conn = duckdb.connect()
|
|
399
|
+
|
|
400
|
+
# Enrich an existing table
|
|
360
401
|
conn.execute("CREATE TABLE companies AS SELECT 'Google' as name")
|
|
361
402
|
result = enrich_table(
|
|
362
403
|
conn,
|
|
@@ -365,6 +406,14 @@ result = enrich_table(
|
|
|
365
406
|
output_columns=["CEO name", "Founding year"],
|
|
366
407
|
)
|
|
367
408
|
print(result.result.fetchdf())
|
|
409
|
+
|
|
410
|
+
# Discover entities with FindAll
|
|
411
|
+
result = findall_table(
|
|
412
|
+
conn,
|
|
413
|
+
"countries that have won the FIFA World Cup and their capital cities",
|
|
414
|
+
match_limit=10,
|
|
415
|
+
)
|
|
416
|
+
result.result.show()
|
|
368
417
|
```
|
|
369
418
|
|
|
370
419
|
## Programmatic Usage
|
|
@@ -385,6 +434,56 @@ run_enrichment_from_dict({
|
|
|
385
434
|
})
|
|
386
435
|
```
|
|
387
436
|
|
|
437
|
+
### Device Authorization (RFC 8628)
|
|
438
|
+
|
|
439
|
+
For headless environments (SSH, containers, CI), use the device authorization flow:
|
|
440
|
+
|
|
441
|
+
```python
|
|
442
|
+
from parallel_web_tools import request_device_code, poll_device_token
|
|
443
|
+
|
|
444
|
+
# Step 1: Request a device code
|
|
445
|
+
device_info = request_device_code()
|
|
446
|
+
print(f"Go to: {device_info.verification_uri_complete}")
|
|
447
|
+
|
|
448
|
+
# Step 2: Poll until the user authorizes
|
|
449
|
+
token = poll_device_token(device_info.device_code)
|
|
450
|
+
```
|
|
451
|
+
|
|
452
|
+
### FindAll
|
|
453
|
+
|
|
454
|
+
Discover entities from the web using natural language:
|
|
455
|
+
|
|
456
|
+
```python
|
|
457
|
+
from parallel_web_tools import run_findall
|
|
458
|
+
|
|
459
|
+
# Discover entities (auto-enriches by default)
|
|
460
|
+
result = run_findall("AI startups in healthcare", match_limit=20)
|
|
461
|
+
|
|
462
|
+
# Post-run operations
|
|
463
|
+
from parallel_web_tools import enrich_findall, extend_findall, get_findall_schema
|
|
464
|
+
|
|
465
|
+
schema = get_findall_schema(result.run_id)
|
|
466
|
+
enriched = enrich_findall(result.run_id, ["funding amount", "number of employees"])
|
|
467
|
+
extended = extend_findall(result.run_id, additional_matches=10)
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
### Monitor
|
|
471
|
+
|
|
472
|
+
Track web changes programmatically:
|
|
473
|
+
|
|
474
|
+
```python
|
|
475
|
+
from parallel_web_tools import create_monitor, list_monitors, get_monitor
|
|
476
|
+
|
|
477
|
+
# Create a monitor
|
|
478
|
+
monitor = create_monitor(query="Track Tesla SEC filings", cadence="daily")
|
|
479
|
+
|
|
480
|
+
# List all monitors
|
|
481
|
+
monitors = list_monitors()
|
|
482
|
+
|
|
483
|
+
# Get monitor details and events
|
|
484
|
+
details = get_monitor(monitor.monitor_id)
|
|
485
|
+
```
|
|
486
|
+
|
|
388
487
|
## YAML Configuration Format
|
|
389
488
|
|
|
390
489
|
```yaml
|
|
@@ -13,6 +13,7 @@ CLI and data enrichment utilities for the [Parallel API](https://docs.parallel.a
|
|
|
13
13
|
- **Web Search** - AI-powered search with domain filtering and date ranges
|
|
14
14
|
- **Content Extraction** - Extract clean markdown from any URL
|
|
15
15
|
- **Data Enrichment** - Enrich CSV, JSON, DuckDB, and BigQuery data with AI
|
|
16
|
+
- **Follow-up Context** - Chain research and enrichment tasks using `--previous-interaction-id`
|
|
16
17
|
- **AI-Assisted Planning** - Use natural language to define what data you want
|
|
17
18
|
- **Multiple Integrations** - Polars, DuckDB, Snowflake, BigQuery, Spark
|
|
18
19
|
|
|
@@ -25,10 +26,14 @@ Requires **Python 3.10+**.
|
|
|
25
26
|
Install the standalone `parallel-cli` binary for search, extract, enrichment, and deep research (no Python required):
|
|
26
27
|
|
|
27
28
|
```bash
|
|
29
|
+
# macOS / Linux (Homebrew)
|
|
30
|
+
brew install parallel-web/tap/parallel-cli
|
|
31
|
+
|
|
32
|
+
# macOS / Linux (shell script)
|
|
28
33
|
curl -fsSL https://parallel.ai/install.sh | bash
|
|
29
34
|
```
|
|
30
35
|
|
|
31
|
-
|
|
36
|
+
The shell script automatically detects your platform (macOS/Linux, x64/arm64) and installs to `~/.local/bin`.
|
|
32
37
|
|
|
33
38
|
> **Note:** The standalone binary supports `search`, `extract`, `research`, and `enrich run` with CLI arguments, CSV files, and JSON files. For YAML config files, interactive planner, DuckDB/BigQuery sources, or deployment commands, use pip install.
|
|
34
39
|
|
|
@@ -65,7 +70,7 @@ pip install parallel-web-tools[all]
|
|
|
65
70
|
```
|
|
66
71
|
parallel-cli
|
|
67
72
|
├── auth # Check authentication status
|
|
68
|
-
├── login # OAuth login (or use PARALLEL_API_KEY
|
|
73
|
+
├── login # OAuth login (--device for SSH/containers/CI, or use PARALLEL_API_KEY)
|
|
69
74
|
├── logout # Remove stored credentials
|
|
70
75
|
├── search # Web search
|
|
71
76
|
├── extract / fetch # Extract content from URLs
|
|
@@ -87,6 +92,9 @@ parallel-cli
|
|
|
87
92
|
│ ├── status # Check status of a FindAll run
|
|
88
93
|
│ ├── poll # Poll until completion
|
|
89
94
|
│ ├── result # Fetch results of a completed run
|
|
95
|
+
│ ├── enrich # Enrich existing FindAll results with new columns
|
|
96
|
+
│ ├── extend # Request additional candidates for a run
|
|
97
|
+
│ ├── schema # Get the schema for a FindAll run
|
|
90
98
|
│ └── cancel # Cancel a running FindAll
|
|
91
99
|
└── monitor # Continuous web change tracking
|
|
92
100
|
├── create # Create a new web monitor
|
|
@@ -104,9 +112,12 @@ parallel-cli
|
|
|
104
112
|
### 1. Authenticate
|
|
105
113
|
|
|
106
114
|
```bash
|
|
107
|
-
# Interactive OAuth login
|
|
115
|
+
# Interactive OAuth login (opens browser)
|
|
108
116
|
parallel-cli login
|
|
109
117
|
|
|
118
|
+
# Device authorization flow — for SSH, containers, CI, or headless environments
|
|
119
|
+
parallel-cli login --device
|
|
120
|
+
|
|
110
121
|
# Or set environment variable
|
|
111
122
|
export PARALLEL_API_KEY=your_api_key
|
|
112
123
|
```
|
|
@@ -198,13 +209,41 @@ echo "What is the latest funding for Anthropic?" | parallel-cli search - --json
|
|
|
198
209
|
echo "Research question" | parallel-cli research run - --json
|
|
199
210
|
|
|
200
211
|
# Async: launch then poll separately
|
|
201
|
-
parallel-cli research run "question" --no-wait --json # returns run_id
|
|
212
|
+
parallel-cli research run "question" --no-wait --json # returns run_id + interaction_id
|
|
202
213
|
parallel-cli research status trun_xxx --json # check status
|
|
203
214
|
parallel-cli research poll trun_xxx --json # wait and get result
|
|
204
215
|
|
|
216
|
+
# Follow-up: reuse context from a previous task
|
|
217
|
+
parallel-cli research run "follow-up question" --previous-interaction-id trun_xxx --json
|
|
218
|
+
parallel-cli enrich run --data '[...]' --previous-interaction-id trun_xxx --json
|
|
219
|
+
|
|
205
220
|
# Exit codes: 0=ok, 2=bad input, 3=auth error, 4=api error, 5=timeout
|
|
206
221
|
```
|
|
207
222
|
|
|
223
|
+
### Follow-up research with context reuse
|
|
224
|
+
|
|
225
|
+
Tasks return an `interaction_id` that can be passed as `--previous-interaction-id` on a subsequent research or enrichment run. The new task inherits the context from the prior one, so follow-up questions can reference earlier results without repeating them.
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Step 1: Run initial research (interaction_id is in the JSON output)
|
|
229
|
+
parallel-cli research run "What are the top 3 AI companies?" --json --processor lite-fast
|
|
230
|
+
# → { "run_id": "trun_abc", "interaction_id": "trun_abc", ... }
|
|
231
|
+
|
|
232
|
+
# Step 2: Follow-up research referencing the first task's context
|
|
233
|
+
parallel-cli research run "What products does the #1 company make?" \
|
|
234
|
+
--previous-interaction-id trun_abc --json
|
|
235
|
+
|
|
236
|
+
# Step 3: Use research context for enrichment
|
|
237
|
+
parallel-cli enrich run \
|
|
238
|
+
--data '[{"company": "Anthropic"}, {"company": "OpenAI"}]' \
|
|
239
|
+
--target enriched.csv \
|
|
240
|
+
--source-columns '[{"name": "company", "description": "Company name"}]' \
|
|
241
|
+
--enriched-columns '[{"name": "products", "description": "Main products"}]' \
|
|
242
|
+
--previous-interaction-id trun_abc --json
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
The `interaction_id` is shown in both human-readable and `--json` output for `research run`, `research status`, and `research poll`.
|
|
246
|
+
|
|
208
247
|
### More examples
|
|
209
248
|
|
|
210
249
|
```bash
|
|
@@ -269,9 +308,11 @@ print(result.result)
|
|
|
269
308
|
**DuckDB:**
|
|
270
309
|
```python
|
|
271
310
|
import duckdb
|
|
272
|
-
from parallel_web_tools.integrations.duckdb import enrich_table
|
|
311
|
+
from parallel_web_tools.integrations.duckdb import enrich_table, findall_table
|
|
273
312
|
|
|
274
313
|
conn = duckdb.connect()
|
|
314
|
+
|
|
315
|
+
# Enrich an existing table
|
|
275
316
|
conn.execute("CREATE TABLE companies AS SELECT 'Google' as name")
|
|
276
317
|
result = enrich_table(
|
|
277
318
|
conn,
|
|
@@ -280,6 +321,14 @@ result = enrich_table(
|
|
|
280
321
|
output_columns=["CEO name", "Founding year"],
|
|
281
322
|
)
|
|
282
323
|
print(result.result.fetchdf())
|
|
324
|
+
|
|
325
|
+
# Discover entities with FindAll
|
|
326
|
+
result = findall_table(
|
|
327
|
+
conn,
|
|
328
|
+
"countries that have won the FIFA World Cup and their capital cities",
|
|
329
|
+
match_limit=10,
|
|
330
|
+
)
|
|
331
|
+
result.result.show()
|
|
283
332
|
```
|
|
284
333
|
|
|
285
334
|
## Programmatic Usage
|
|
@@ -300,6 +349,56 @@ run_enrichment_from_dict({
|
|
|
300
349
|
})
|
|
301
350
|
```
|
|
302
351
|
|
|
352
|
+
### Device Authorization (RFC 8628)
|
|
353
|
+
|
|
354
|
+
For headless environments (SSH, containers, CI), use the device authorization flow:
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
from parallel_web_tools import request_device_code, poll_device_token
|
|
358
|
+
|
|
359
|
+
# Step 1: Request a device code
|
|
360
|
+
device_info = request_device_code()
|
|
361
|
+
print(f"Go to: {device_info.verification_uri_complete}")
|
|
362
|
+
|
|
363
|
+
# Step 2: Poll until the user authorizes
|
|
364
|
+
token = poll_device_token(device_info.device_code)
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
### FindAll
|
|
368
|
+
|
|
369
|
+
Discover entities from the web using natural language:
|
|
370
|
+
|
|
371
|
+
```python
|
|
372
|
+
from parallel_web_tools import run_findall
|
|
373
|
+
|
|
374
|
+
# Discover entities (auto-enriches by default)
|
|
375
|
+
result = run_findall("AI startups in healthcare", match_limit=20)
|
|
376
|
+
|
|
377
|
+
# Post-run operations
|
|
378
|
+
from parallel_web_tools import enrich_findall, extend_findall, get_findall_schema
|
|
379
|
+
|
|
380
|
+
schema = get_findall_schema(result.run_id)
|
|
381
|
+
enriched = enrich_findall(result.run_id, ["funding amount", "number of employees"])
|
|
382
|
+
extended = extend_findall(result.run_id, additional_matches=10)
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
### Monitor
|
|
386
|
+
|
|
387
|
+
Track web changes programmatically:
|
|
388
|
+
|
|
389
|
+
```python
|
|
390
|
+
from parallel_web_tools import create_monitor, list_monitors, get_monitor
|
|
391
|
+
|
|
392
|
+
# Create a monitor
|
|
393
|
+
monitor = create_monitor(query="Track Tesla SEC filings", cadence="daily")
|
|
394
|
+
|
|
395
|
+
# List all monitors
|
|
396
|
+
monitors = list_monitors()
|
|
397
|
+
|
|
398
|
+
# Get monitor details and events
|
|
399
|
+
details = get_monitor(monitor.monitor_id)
|
|
400
|
+
```
|
|
401
|
+
|
|
303
402
|
## YAML Configuration Format
|
|
304
403
|
|
|
305
404
|
```yaml
|