parallel-web-tools 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. parallel_web_tools/__init__.py +56 -0
  2. parallel_web_tools/cli/__init__.py +5 -0
  3. parallel_web_tools/cli/commands.py +626 -0
  4. parallel_web_tools/cli/planner.py +438 -0
  5. parallel_web_tools/core/__init__.py +70 -0
  6. parallel_web_tools/core/auth.py +256 -0
  7. parallel_web_tools/core/batch.py +307 -0
  8. parallel_web_tools/core/result.py +29 -0
  9. parallel_web_tools/core/runner.py +75 -0
  10. parallel_web_tools/core/schema.py +169 -0
  11. parallel_web_tools/integrations/__init__.py +17 -0
  12. parallel_web_tools/integrations/bigquery/__init__.py +34 -0
  13. parallel_web_tools/integrations/bigquery/cloud_function/main.py +199 -0
  14. parallel_web_tools/integrations/bigquery/cloud_function/requirements.txt +5 -0
  15. parallel_web_tools/integrations/bigquery/deploy.py +456 -0
  16. parallel_web_tools/integrations/bigquery/sql/create_functions.sql +49 -0
  17. parallel_web_tools/integrations/duckdb/__init__.py +63 -0
  18. parallel_web_tools/integrations/duckdb/batch.py +220 -0
  19. parallel_web_tools/integrations/duckdb/udf.py +159 -0
  20. parallel_web_tools/integrations/polars/__init__.py +37 -0
  21. parallel_web_tools/integrations/polars/enrich.py +218 -0
  22. parallel_web_tools/integrations/snowflake/__init__.py +46 -0
  23. parallel_web_tools/integrations/snowflake/deploy.py +347 -0
  24. parallel_web_tools/integrations/snowflake/sql/01_setup.sql +97 -0
  25. parallel_web_tools/integrations/snowflake/sql/02_create_udf.sql +260 -0
  26. parallel_web_tools/integrations/snowflake/sql/03_cleanup.sql +61 -0
  27. parallel_web_tools/integrations/spark/__init__.py +57 -0
  28. parallel_web_tools/integrations/spark/streaming.py +404 -0
  29. parallel_web_tools/integrations/spark/udf.py +206 -0
  30. parallel_web_tools/integrations/utils.py +32 -0
  31. parallel_web_tools/processors/__init__.py +20 -0
  32. parallel_web_tools/processors/bigquery.py +70 -0
  33. parallel_web_tools/processors/csv.py +32 -0
  34. parallel_web_tools/processors/duckdb.py +25 -0
  35. parallel_web_tools-0.0.1.dist-info/METADATA +346 -0
  36. parallel_web_tools-0.0.1.dist-info/RECORD +38 -0
  37. parallel_web_tools-0.0.1.dist-info/WHEEL +4 -0
  38. parallel_web_tools-0.0.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,56 @@
1
+ """Parallel Data Enrichment package."""
2
+
3
+ # Re-export everything from core for convenience
4
+ from parallel_web_tools.core import (
5
+ # Schema
6
+ AVAILABLE_PROCESSORS,
7
+ Column,
8
+ InputSchema,
9
+ ParseError,
10
+ ProcessorType,
11
+ SourceType,
12
+ # Batch
13
+ enrich_batch,
14
+ enrich_single,
15
+ # Auth
16
+ get_api_key,
17
+ get_async_client,
18
+ get_auth_status,
19
+ get_client,
20
+ load_schema,
21
+ logout,
22
+ parse_input_and_output_models,
23
+ parse_schema,
24
+ # Runner
25
+ run_enrichment,
26
+ run_enrichment_from_dict,
27
+ run_tasks,
28
+ )
29
+
30
+ __version__ = "0.0.1"
31
+
32
+ __all__ = [
33
+ # Auth
34
+ "get_api_key",
35
+ "get_auth_status",
36
+ "get_client",
37
+ "get_async_client",
38
+ "logout",
39
+ # Schema
40
+ "AVAILABLE_PROCESSORS",
41
+ "Column",
42
+ "InputSchema",
43
+ "ParseError",
44
+ "ProcessorType",
45
+ "SourceType",
46
+ "load_schema",
47
+ "parse_schema",
48
+ "parse_input_and_output_models",
49
+ # Batch
50
+ "enrich_batch",
51
+ "enrich_single",
52
+ "run_tasks",
53
+ # Runner
54
+ "run_enrichment",
55
+ "run_enrichment_from_dict",
56
+ ]
@@ -0,0 +1,5 @@
1
+ """CLI for Parallel Data."""
2
+
3
+ from parallel_web_tools.cli.commands import main
4
+
5
+ __all__ = ["main"]
@@ -0,0 +1,626 @@
1
+ """CLI commands for Parallel."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from typing import Any
7
+
8
+ import click
9
+ import httpx
10
+ from dotenv import load_dotenv
11
+ from rich.console import Console
12
+
13
+ from parallel_web_tools import __version__
14
+ from parallel_web_tools.cli.planner import create_config_interactive, save_config
15
+ from parallel_web_tools.core import (
16
+ AVAILABLE_PROCESSORS,
17
+ JSON_SCHEMA_TYPE_MAP,
18
+ get_api_key,
19
+ get_auth_status,
20
+ logout,
21
+ run_enrichment,
22
+ run_enrichment_from_dict,
23
+ )
24
+
25
+ logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+ console = Console()
28
+
29
+ load_dotenv(".env.local")
30
+
31
+
32
+ def parse_columns(columns_json: str | None) -> list[dict[str, str]] | None:
33
+ """Parse columns from JSON string."""
34
+ if not columns_json:
35
+ return None
36
+ try:
37
+ columns = json.loads(columns_json)
38
+ if not isinstance(columns, list):
39
+ raise click.BadParameter("Columns must be a JSON array")
40
+ for col in columns:
41
+ if "name" not in col:
42
+ raise click.BadParameter("Each column must have a 'name' field")
43
+ if "description" not in col:
44
+ raise click.BadParameter("Each column must have a 'description' field")
45
+ return columns
46
+ except json.JSONDecodeError as e:
47
+ raise click.BadParameter(f"Invalid JSON: {e}") from e
48
+
49
+
50
+ def validate_enrich_args(
51
+ source_type: str | None,
52
+ source: str | None,
53
+ target: str | None,
54
+ source_columns: str | None,
55
+ enriched_columns: str | None,
56
+ intent: str | None,
57
+ ) -> None:
58
+ """Validate enrichment CLI arguments.
59
+
60
+ Raises click.Abort with appropriate error messages for invalid combinations.
61
+ """
62
+ if enriched_columns and intent:
63
+ console.print("[bold red]Error: Use either --enriched-columns OR --intent, not both.[/bold red]")
64
+ raise click.Abort()
65
+
66
+ base_args = [source_type, source, target, source_columns]
67
+ has_base = all(arg is not None for arg in base_args)
68
+ has_output_spec = enriched_columns is not None or intent is not None
69
+
70
+ if any(arg is not None for arg in base_args) or has_output_spec:
71
+ if not has_base:
72
+ missing = [
73
+ n
74
+ for n, v in [
75
+ ("--source-type", source_type),
76
+ ("--source", source),
77
+ ("--target", target),
78
+ ("--source-columns", source_columns),
79
+ ]
80
+ if not v
81
+ ]
82
+ console.print(f"[bold red]Error: Missing required options: {', '.join(missing)}[/bold red]")
83
+ raise click.Abort()
84
+ if not has_output_spec:
85
+ console.print("[bold red]Error: Provide --enriched-columns OR --intent.[/bold red]")
86
+ raise click.Abort()
87
+
88
+
89
+ def build_config_from_args(
90
+ source_type: str,
91
+ source: str,
92
+ target: str,
93
+ source_columns: list[dict[str, str]],
94
+ enriched_columns: list[dict[str, str]],
95
+ processor: str,
96
+ ) -> dict[str, Any]:
97
+ """Build configuration dict from CLI arguments."""
98
+ return {
99
+ "source_type": source_type,
100
+ "source": source,
101
+ "target": target,
102
+ "source_columns": source_columns,
103
+ "enriched_columns": enriched_columns,
104
+ "processor": processor,
105
+ }
106
+
107
+
108
+ def suggest_from_intent(
109
+ intent: str,
110
+ source_columns: list[dict[str, str]] | None = None,
111
+ ) -> dict[str, Any]:
112
+ """Use Parallel Ingest API to suggest output columns and processor."""
113
+ api_key = get_api_key()
114
+ base_url = "https://api.parallel.ai"
115
+ headers = {"x-api-key": api_key, "Content-Type": "application/json"}
116
+
117
+ full_intent = intent
118
+ if source_columns:
119
+ col_descriptions = [f"- {col['name']}: {col.get('description', 'no description')}" for col in source_columns]
120
+ full_intent = f"{intent}\n\nInput columns available:\n" + "\n".join(col_descriptions)
121
+
122
+ suggest_body: dict[str, Any] = {"user_intent": full_intent}
123
+
124
+ with httpx.Client(timeout=60) as client:
125
+ response = client.post(f"{base_url}/v1beta/tasks/suggest", json=suggest_body, headers=headers)
126
+ response.raise_for_status()
127
+ data = response.json()
128
+
129
+ output_schema = data.get("output_schema", {})
130
+ properties = output_schema.get("properties", {})
131
+
132
+ enriched_columns = []
133
+ for name, prop in properties.items():
134
+ col_type = prop.get("type", "string")
135
+ mapped_type = JSON_SCHEMA_TYPE_MAP.get(col_type, "str")
136
+ enriched_columns.append({"name": name, "description": prop.get("description", ""), "type": mapped_type})
137
+
138
+ processor = "core-fast"
139
+ try:
140
+ input_schema = data.get("input_schema", {"type": "object", "properties": {}})
141
+ task_spec = {"input_schema": input_schema, "output_schema": output_schema}
142
+
143
+ with httpx.Client(timeout=60) as client:
144
+ processor_response = client.post(
145
+ f"{base_url}/v1beta/tasks/suggest-processor", json={"task_spec": task_spec}, headers=headers
146
+ )
147
+ if processor_response.status_code == 200:
148
+ processor_data = processor_response.json()
149
+ recommended = processor_data.get("recommended_processors", [])
150
+ if recommended:
151
+ processor = recommended[0]
152
+ except Exception:
153
+ pass
154
+
155
+ return {
156
+ "enriched_columns": enriched_columns,
157
+ "processor": processor,
158
+ "title": data.get("title", ""),
159
+ "warnings": data.get("warnings", []),
160
+ }
161
+
162
+
163
+ # =============================================================================
164
+ # Main CLI Group
165
+ # =============================================================================
166
+
167
+
168
+ @click.group()
169
+ @click.version_option(version=__version__, prog_name="parallel-cli")
170
+ def main():
171
+ """Parallel CLI - AI-powered data enrichment and search."""
172
+ pass
173
+
174
+
175
+ # =============================================================================
176
+ # Auth Commands
177
+ # =============================================================================
178
+
179
+
180
+ @main.command()
181
+ def auth():
182
+ """Check authentication status."""
183
+ status = get_auth_status()
184
+
185
+ if status["authenticated"]:
186
+ if status["method"] == "environment":
187
+ console.print("[green]Authenticated via PARALLEL_API_KEY environment variable[/green]")
188
+ else:
189
+ console.print("[green]Authenticated via OAuth[/green]")
190
+ console.print(f" Credentials: {status['token_file']}")
191
+ else:
192
+ console.print("[yellow]Not authenticated[/yellow]")
193
+ console.print("\n[cyan]To authenticate:[/cyan]")
194
+ console.print(" Run: parallel-cli login")
195
+ console.print(" Or set PARALLEL_API_KEY environment variable")
196
+
197
+
198
+ @main.command()
199
+ def login():
200
+ """Authenticate with Parallel API."""
201
+ console.print("[bold cyan]Authenticating with Parallel...[/bold cyan]\n")
202
+
203
+ try:
204
+ get_api_key(force_login=True)
205
+ console.print("\n[bold green]Authentication successful![/bold green]")
206
+ except Exception as e:
207
+ console.print(f"[bold red]Authentication failed: {e}[/bold red]")
208
+ raise click.Abort() from None
209
+
210
+
211
+ @main.command(name="logout")
212
+ def logout_cmd():
213
+ """Remove stored credentials."""
214
+ if logout():
215
+ console.print("[green]Logged out successfully[/green]")
216
+ else:
217
+ console.print("[yellow]No stored credentials found[/yellow]")
218
+
219
+
220
+ # =============================================================================
221
+ # Search Command
222
+ # =============================================================================
223
+
224
+
225
+ @main.command()
226
+ @click.argument("objective", required=False)
227
+ @click.option("-q", "--query", multiple=True, help="Keyword search query (can be repeated)")
228
+ @click.option(
229
+ "--mode", type=click.Choice(["one-shot", "agentic"]), default="one-shot", help="Search mode", show_default=True
230
+ )
231
+ @click.option("--max-results", type=int, default=10, help="Maximum results", show_default=True)
232
+ @click.option("--include-domains", multiple=True, help="Only search these domains")
233
+ @click.option("--exclude-domains", multiple=True, help="Exclude these domains")
234
+ @click.option("--after-date", help="Only results after this date (YYYY-MM-DD)")
235
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
236
+ def search(
237
+ objective: str | None,
238
+ query: tuple[str, ...],
239
+ mode: str,
240
+ max_results: int,
241
+ include_domains: tuple[str, ...],
242
+ exclude_domains: tuple[str, ...],
243
+ after_date: str | None,
244
+ output_json: bool,
245
+ ):
246
+ """Search the web using Parallel's AI-powered search."""
247
+ if not objective and not query:
248
+ console.print("[bold red]Error: Provide an objective or at least one --query.[/bold red]")
249
+ raise click.Abort()
250
+
251
+ try:
252
+ from parallel import Parallel
253
+
254
+ api_key = get_api_key()
255
+ client = Parallel(api_key=api_key)
256
+
257
+ search_kwargs: dict[str, Any] = {"mode": mode, "max_results": max_results}
258
+ if objective:
259
+ search_kwargs["objective"] = objective
260
+ if query:
261
+ search_kwargs["search_queries"] = list(query)
262
+
263
+ source_policy: dict[str, Any] = {}
264
+ if include_domains:
265
+ source_policy["include_domains"] = list(include_domains)
266
+ if exclude_domains:
267
+ source_policy["exclude_domains"] = list(exclude_domains)
268
+ if after_date:
269
+ source_policy["after_date"] = after_date
270
+ if source_policy:
271
+ search_kwargs["source_policy"] = source_policy
272
+
273
+ if not output_json:
274
+ console.print("[dim]Searching...[/dim]\n")
275
+
276
+ result = client.beta.search(**search_kwargs)
277
+
278
+ if output_json:
279
+ output = {
280
+ "search_id": result.search_id,
281
+ "results": [
282
+ {"url": r.url, "title": r.title, "publish_date": r.publish_date, "excerpts": r.excerpts}
283
+ for r in result.results
284
+ ],
285
+ "warnings": result.warnings if hasattr(result, "warnings") else [],
286
+ }
287
+ print(json.dumps(output, indent=2))
288
+ else:
289
+ console.print(f"[bold green]Found {len(result.results)} results[/bold green]\n")
290
+ for i, r in enumerate(result.results, 1):
291
+ console.print(f"[bold cyan]{i}. {r.title}[/bold cyan]")
292
+ console.print(f" [link={r.url}]{r.url}[/link]")
293
+ if r.publish_date:
294
+ console.print(f" [dim]Published: {r.publish_date}[/dim]")
295
+ if r.excerpts:
296
+ excerpt = r.excerpts[0][:200] + "..." if len(r.excerpts[0]) > 200 else r.excerpts[0]
297
+ console.print(f" [dim]{excerpt}[/dim]")
298
+ console.print()
299
+
300
+ except Exception as e:
301
+ console.print(f"[bold red]Error: {e}[/bold red]")
302
+ raise click.Abort() from None
303
+
304
+
305
+ # =============================================================================
306
+ # Extract Command
307
+ # =============================================================================
308
+
309
+
310
+ @main.command()
311
+ @click.argument("urls", nargs=-1, required=True)
312
+ @click.option("--objective", help="Focus extraction on a specific goal")
313
+ @click.option("-q", "--query", multiple=True, help="Keywords to prioritize (can be repeated)")
314
+ @click.option("--full-content", is_flag=True, help="Include complete page content")
315
+ @click.option("--no-excerpts", is_flag=True, help="Exclude excerpts from output")
316
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
317
+ def extract(
318
+ urls: tuple[str, ...],
319
+ objective: str | None,
320
+ query: tuple[str, ...],
321
+ full_content: bool,
322
+ no_excerpts: bool,
323
+ output_json: bool,
324
+ ):
325
+ """Extract content from URLs as clean markdown."""
326
+ try:
327
+ from parallel import Parallel
328
+
329
+ api_key = get_api_key()
330
+ client = Parallel(api_key=api_key)
331
+
332
+ extract_kwargs: dict[str, Any] = {
333
+ "urls": list(urls),
334
+ "betas": ["search-extract-2025-10-10"],
335
+ "excerpts": not no_excerpts,
336
+ "full_content": full_content,
337
+ }
338
+
339
+ if objective:
340
+ extract_kwargs["objective"] = objective
341
+ if query:
342
+ extract_kwargs["search_queries"] = list(query)
343
+
344
+ if not output_json:
345
+ console.print(f"[dim]Extracting content from {len(urls)} URL(s)...[/dim]\n")
346
+
347
+ result = client.beta.extract(**extract_kwargs)
348
+
349
+ if output_json:
350
+ results_list = []
351
+ for r in result.results:
352
+ result_dict: dict[str, Any] = {"url": r.url, "title": r.title}
353
+ if hasattr(r, "excerpts") and r.excerpts:
354
+ result_dict["excerpts"] = r.excerpts
355
+ if hasattr(r, "full_content") and r.full_content:
356
+ result_dict["full_content"] = r.full_content
357
+ results_list.append(result_dict)
358
+
359
+ errors_list = []
360
+ if hasattr(result, "errors") and result.errors:
361
+ for e in result.errors:
362
+ errors_list.append(
363
+ {
364
+ "url": getattr(e, "url", None),
365
+ "error": str(getattr(e, "error", "")),
366
+ "status_code": getattr(e, "status_code", None),
367
+ }
368
+ )
369
+
370
+ output = {"extract_id": result.extract_id, "results": results_list, "errors": errors_list}
371
+ print(json.dumps(output, indent=2))
372
+ else:
373
+ if result.errors:
374
+ console.print(f"[yellow]Warning: {len(result.errors)} URL(s) failed[/yellow]\n")
375
+
376
+ console.print(f"[bold green]Extracted {len(result.results)} page(s)[/bold green]\n")
377
+
378
+ for r in result.results:
379
+ console.print(f"[bold cyan]{r.title}[/bold cyan]")
380
+ console.print(f"[link={r.url}]{r.url}[/link]\n")
381
+
382
+ if hasattr(r, "excerpts") and r.excerpts:
383
+ console.print("[dim]Excerpts:[/dim]")
384
+ for excerpt in r.excerpts[:3]:
385
+ text = excerpt[:300] + "..." if len(excerpt) > 300 else excerpt
386
+ console.print(f" {text}")
387
+ console.print()
388
+
389
+ if hasattr(r, "full_content") and r.full_content:
390
+ console.print("[dim]Full content:[/dim]")
391
+ content = r.full_content[:1000] + "..." if len(r.full_content) > 1000 else r.full_content
392
+ console.print(content)
393
+ console.print()
394
+
395
+ except Exception as e:
396
+ console.print(f"[bold red]Error: {e}[/bold red]")
397
+ raise click.Abort() from None
398
+
399
+
400
+ # =============================================================================
401
+ # Enrich Command Group
402
+ # =============================================================================
403
+
404
+
405
+ @main.group()
406
+ def enrich():
407
+ """Data enrichment commands."""
408
+ pass
409
+
410
+
411
+ @enrich.command(name="run")
412
+ @click.argument("config_file", required=False)
413
+ @click.option("--source-type", type=click.Choice(["csv", "duckdb", "bigquery"]), help="Data source type")
414
+ @click.option("--source", help="Source file path or table name")
415
+ @click.option("--target", help="Target file path or table name")
416
+ @click.option("--source-columns", help="Source columns as JSON")
417
+ @click.option("--enriched-columns", help="Enriched columns as JSON")
418
+ @click.option("--intent", help="Natural language description (AI suggests columns)")
419
+ @click.option("--processor", type=click.Choice(AVAILABLE_PROCESSORS), help="Processor to use")
420
+ def enrich_run(
421
+ config_file: str | None,
422
+ source_type: str | None,
423
+ source: str | None,
424
+ target: str | None,
425
+ source_columns: str | None,
426
+ enriched_columns: str | None,
427
+ intent: str | None,
428
+ processor: str | None,
429
+ ):
430
+ """Run data enrichment from YAML config or CLI arguments."""
431
+ base_args = [source_type, source, target, source_columns]
432
+ has_cli_args = any(arg is not None for arg in base_args) or enriched_columns or intent
433
+
434
+ if config_file and has_cli_args:
435
+ console.print("[bold red]Error: Provide either a config file OR CLI arguments, not both.[/bold red]")
436
+ raise click.Abort()
437
+
438
+ if not config_file and not has_cli_args:
439
+ console.print("[bold red]Error: Provide a config file or CLI arguments.[/bold red]")
440
+ raise click.Abort()
441
+
442
+ if has_cli_args:
443
+ validate_enrich_args(source_type, source, target, source_columns, enriched_columns, intent)
444
+
445
+ try:
446
+ if config_file:
447
+ console.print(f"[bold cyan]Running enrichment from {config_file}...[/bold cyan]\n")
448
+ run_enrichment(config_file)
449
+ else:
450
+ src_cols = parse_columns(source_columns)
451
+
452
+ if intent:
453
+ console.print("[dim]Getting suggestions from Parallel API...[/dim]")
454
+ suggestion = suggest_from_intent(intent, src_cols)
455
+ enr_cols = suggestion["enriched_columns"]
456
+ final_processor = processor or suggestion["processor"]
457
+ console.print(f"[green]AI suggested {len(enr_cols)} columns, processor: {final_processor}[/green]\n")
458
+ else:
459
+ enr_cols = parse_columns(enriched_columns)
460
+ final_processor = processor or "core-fast"
461
+
462
+ config = build_config_from_args(
463
+ source_type=source_type,
464
+ source=source,
465
+ target=target,
466
+ source_columns=src_cols,
467
+ enriched_columns=enr_cols,
468
+ processor=final_processor,
469
+ )
470
+
471
+ console.print(f"[bold cyan]Running enrichment: {source} -> {target}[/bold cyan]\n")
472
+ run_enrichment_from_dict(config)
473
+
474
+ console.print("\n[bold green]Enrichment complete![/bold green]")
475
+
476
+ except FileNotFoundError as e:
477
+ console.print(f"[bold red]Error: {e}[/bold red]")
478
+ raise click.Abort() from None
479
+ except Exception as e:
480
+ console.print(f"[bold red]Error during enrichment: {e}[/bold red]")
481
+ raise
482
+
483
+
484
+ @enrich.command(name="plan")
485
+ @click.option("-o", "--output", default="config.yaml", help="Output YAML file path", show_default=True)
486
+ @click.option("--source-type", type=click.Choice(["csv", "duckdb", "bigquery"]), help="Data source type")
487
+ @click.option("--source", help="Source file path or table name")
488
+ @click.option("--target", help="Target file path or table name")
489
+ @click.option("--source-columns", help="Source columns as JSON")
490
+ @click.option("--enriched-columns", help="Enriched columns as JSON")
491
+ @click.option("--intent", help="Natural language description (AI suggests columns)")
492
+ @click.option("--processor", type=click.Choice(AVAILABLE_PROCESSORS), help="Processor to use")
493
+ def enrich_plan(
494
+ output: str,
495
+ source_type: str | None,
496
+ source: str | None,
497
+ target: str | None,
498
+ source_columns: str | None,
499
+ enriched_columns: str | None,
500
+ intent: str | None,
501
+ processor: str | None,
502
+ ):
503
+ """Create an enrichment configuration file."""
504
+ base_args = [source_type, source, target, source_columns]
505
+ has_cli_args = any(arg is not None for arg in base_args) or enriched_columns or intent
506
+
507
+ if has_cli_args:
508
+ validate_enrich_args(source_type, source, target, source_columns, enriched_columns, intent)
509
+ src_cols = parse_columns(source_columns)
510
+
511
+ if intent:
512
+ console.print("[dim]Getting suggestions from Parallel API...[/dim]")
513
+ suggestion = suggest_from_intent(intent, src_cols)
514
+ enr_cols = suggestion["enriched_columns"]
515
+ final_processor = processor or suggestion["processor"]
516
+ console.print(f"[green]AI suggested {len(enr_cols)} columns, processor: {final_processor}[/green]")
517
+ else:
518
+ enr_cols = parse_columns(enriched_columns)
519
+ final_processor = processor or "core-fast"
520
+
521
+ config = build_config_from_args(
522
+ source_type=source_type,
523
+ source=source,
524
+ target=target,
525
+ source_columns=src_cols,
526
+ enriched_columns=enr_cols,
527
+ processor=final_processor,
528
+ )
529
+
530
+ save_config(config, output)
531
+ console.print(f"[bold green]Configuration saved to {output}[/bold green]")
532
+ else:
533
+ try:
534
+ config = create_config_interactive()
535
+ save_config(config, output)
536
+ except KeyboardInterrupt:
537
+ console.print("\n[yellow]Configuration creation cancelled.[/yellow]")
538
+ raise click.Abort() from None
539
+
540
+
541
+ @enrich.command(name="suggest")
542
+ @click.argument("intent")
543
+ @click.option("--source-columns", help="Source columns as JSON")
544
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
545
+ def enrich_suggest(intent: str, source_columns: str | None, output_json: bool):
546
+ """Use AI to suggest output columns and processor."""
547
+ try:
548
+ src_cols = parse_columns(source_columns) if source_columns else None
549
+
550
+ if not output_json:
551
+ console.print("[dim]Getting suggestions from Parallel API...[/dim]\n")
552
+
553
+ result = suggest_from_intent(intent, src_cols)
554
+
555
+ if output_json:
556
+ print(json.dumps(result, indent=2))
557
+ else:
558
+ if result.get("title"):
559
+ console.print(f"[bold]Task: {result['title']}[/bold]\n")
560
+
561
+ console.print(f"[bold green]Recommended Processor:[/bold green] {result['processor']}\n")
562
+
563
+ console.print("[bold green]Suggested Output Columns:[/bold green]")
564
+ for col in result["enriched_columns"]:
565
+ console.print(f" [cyan]{col['name']}[/cyan] ({col['type']}): {col['description']}")
566
+
567
+ if result.get("warnings"):
568
+ console.print("\n[yellow]Warnings:[/yellow]")
569
+ for warning in result["warnings"]:
570
+ console.print(f" {warning}")
571
+
572
+ console.print("\n[dim]JSON (for --enriched-columns):[/dim]")
573
+ console.print(json.dumps(result["enriched_columns"]))
574
+
575
+ except Exception as e:
576
+ console.print(f"[bold red]Error: {e}[/bold red]")
577
+ raise click.Abort() from None
578
+
579
+
580
+ @enrich.command(name="deploy")
581
+ @click.option("--system", type=click.Choice(["bigquery"]), required=True, help="Target system to deploy to")
582
+ @click.option("--project", "-p", help="Cloud project ID (required for bigquery)")
583
+ @click.option("--region", "-r", default="us-central1", show_default=True, help="Cloud region")
584
+ @click.option("--api-key", "-k", help="Parallel API key (or use PARALLEL_API_KEY env var)")
585
+ @click.option("--dataset", default="parallel_functions", show_default=True, help="Dataset name (BigQuery)")
586
+ def enrich_deploy(system: str, project: str | None, region: str, api_key: str | None, dataset: str):
587
+ """Deploy Parallel enrichment to a cloud system."""
588
+ if system == "bigquery":
589
+ if not project:
590
+ console.print("[bold red]Error: --project is required for BigQuery deployment.[/bold red]")
591
+ raise click.Abort()
592
+
593
+ from parallel_web_tools.integrations.bigquery import deploy_bigquery_integration
594
+
595
+ if not api_key:
596
+ api_key = os.environ.get("PARALLEL_API_KEY")
597
+ if not api_key:
598
+ try:
599
+ api_key = get_api_key()
600
+ except Exception:
601
+ pass
602
+ if not api_key:
603
+ console.print("[bold red]Error: Parallel API key required[/bold red]")
604
+ console.print(" Use --api-key, PARALLEL_API_KEY env var, or run 'parallel-cli login'")
605
+ raise click.Abort()
606
+
607
+ console.print(f"[bold cyan]Deploying to BigQuery in {project}...[/bold cyan]\n")
608
+
609
+ try:
610
+ result = deploy_bigquery_integration(
611
+ project_id=project,
612
+ api_key=api_key,
613
+ region=region,
614
+ dataset_id=dataset,
615
+ )
616
+ console.print("\n[bold green]Deployment complete![/bold green]")
617
+ console.print(f"\nFunction URL: {result['function_url']}")
618
+ console.print("\n[cyan]Example query:[/cyan]")
619
+ console.print(result["example_query"])
620
+ except Exception as e:
621
+ console.print(f"[bold red]Deployment failed: {e}[/bold red]")
622
+ raise click.Abort() from None
623
+
624
+
625
+ if __name__ == "__main__":
626
+ main()