acryl-datahub 1.3.0.1rc2__py3-none-any.whl → 1.3.0.1rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (47) hide show
  1. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/METADATA +2563 -2561
  2. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/RECORD +46 -44
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataproduct/dataproduct.py +26 -0
  5. datahub/cli/config_utils.py +18 -10
  6. datahub/cli/docker_check.py +2 -1
  7. datahub/cli/docker_cli.py +4 -2
  8. datahub/cli/graphql_cli.py +1422 -0
  9. datahub/cli/quickstart_versioning.py +2 -2
  10. datahub/cli/specific/dataproduct_cli.py +2 -4
  11. datahub/cli/specific/user_cli.py +172 -1
  12. datahub/configuration/env_vars.py +331 -0
  13. datahub/configuration/kafka.py +6 -4
  14. datahub/emitter/mce_builder.py +2 -4
  15. datahub/emitter/rest_emitter.py +15 -15
  16. datahub/entrypoints.py +2 -0
  17. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  18. datahub/ingestion/api/source.py +5 -0
  19. datahub/ingestion/graph/client.py +197 -0
  20. datahub/ingestion/graph/config.py +2 -2
  21. datahub/ingestion/sink/datahub_rest.py +6 -5
  22. datahub/ingestion/source/aws/aws_common.py +20 -13
  23. datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -4
  24. datahub/ingestion/source/grafana/models.py +5 -0
  25. datahub/ingestion/source/iceberg/iceberg.py +39 -19
  26. datahub/ingestion/source/kafka_connect/source_connectors.py +4 -1
  27. datahub/ingestion/source/mode.py +13 -0
  28. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  29. datahub/ingestion/source/snowflake/snowflake_schema.py +2 -2
  30. datahub/ingestion/source/sql/mssql/source.py +7 -1
  31. datahub/ingestion/source/sql/teradata.py +80 -65
  32. datahub/ingestion/source/unity/config.py +31 -0
  33. datahub/ingestion/source/unity/proxy.py +73 -0
  34. datahub/ingestion/source/unity/source.py +27 -70
  35. datahub/ingestion/source/unity/usage.py +46 -4
  36. datahub/sql_parsing/sql_parsing_aggregator.py +14 -5
  37. datahub/sql_parsing/sqlglot_lineage.py +7 -0
  38. datahub/telemetry/telemetry.py +8 -3
  39. datahub/utilities/file_backed_collections.py +2 -2
  40. datahub/utilities/is_pytest.py +3 -2
  41. datahub/utilities/logging_manager.py +22 -6
  42. datahub/utilities/sample_data.py +5 -4
  43. datahub/emitter/sql_parsing_builder.py +0 -306
  44. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/WHEEL +0 -0
  45. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/entry_points.txt +0 -0
  46. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/licenses/LICENSE +0 -0
  47. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1422 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import click
8
+
9
+ from datahub.ingestion.graph.client import get_default_graph
10
+ from datahub.ingestion.graph.config import ClientMode
11
+ from datahub.upgrade import upgrade
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # GraphQL introspection queries (split to avoid "bad faith" protection)
16
+ QUERY_INTROSPECTION = """
17
+ query QueryIntrospection {
18
+ __schema {
19
+ queryType {
20
+ name
21
+ fields {
22
+ name
23
+ description
24
+ args {
25
+ name
26
+ type {
27
+ name
28
+ kind
29
+ ofType {
30
+ name
31
+ kind
32
+ ofType {
33
+ name
34
+ kind
35
+ }
36
+ }
37
+ }
38
+ }
39
+ }
40
+ }
41
+ }
42
+ }
43
+ """
44
+
45
+ MUTATION_INTROSPECTION = """
46
+ query MutationIntrospection {
47
+ __schema {
48
+ mutationType {
49
+ name
50
+ fields {
51
+ name
52
+ description
53
+ args {
54
+ name
55
+ type {
56
+ name
57
+ kind
58
+ ofType {
59
+ name
60
+ kind
61
+ ofType {
62
+ name
63
+ kind
64
+ }
65
+ }
66
+ }
67
+ }
68
+ }
69
+ }
70
+ }
71
+ }
72
+ """
73
+
74
+
75
+ def _is_file_path(value: str) -> bool:
76
+ """Check if a string appears to be a file path and the file exists."""
77
+ if not value or len(value) < 2:
78
+ return False
79
+
80
+ resolved_path = Path(value).resolve()
81
+ return resolved_path.exists()
82
+
83
+
84
+ def _load_content_or_file(value: str) -> str:
85
+ """Load content from file if value is a file path, otherwise return value as-is."""
86
+ if _is_file_path(value):
87
+ resolved_path = Path(value).resolve()
88
+
89
+ # Security check: prevent path traversal attacks
90
+ if "../" in str(resolved_path) or "..\\" in str(resolved_path):
91
+ raise ValueError("Invalid file path: path traversal detected")
92
+
93
+ with open(resolved_path, "r") as f:
94
+ return f.read()
95
+ return value
96
+
97
+
98
+ def _parse_variables(variables_str: Optional[str]) -> Optional[Dict[str, Any]]:
99
+ """Parse variables from JSON string or file."""
100
+ if not variables_str:
101
+ return None
102
+
103
+ content = _load_content_or_file(variables_str)
104
+ try:
105
+ return json.loads(content)
106
+ except json.JSONDecodeError as e:
107
+ raise click.ClickException(f"Invalid JSON in variables: {e}") from e
108
+
109
+
110
+ def _get_schema_files_path() -> Path:
111
+ """Get the path to DataHub's GraphQL schema files."""
112
+ # Try to find the schema files relative to the current package
113
+ current_file = Path(__file__)
114
+ repo_root = current_file
115
+
116
+ # Go up directories until we find the repo root (contains datahub-graphql-core)
117
+ for _ in range(10): # Safety limit
118
+ repo_root = repo_root.parent
119
+ graphql_core_path = (
120
+ repo_root / "datahub-graphql-core" / "src" / "main" / "resources"
121
+ )
122
+ if graphql_core_path.exists():
123
+ return graphql_core_path
124
+
125
+ # Fallback: try common relative paths
126
+ possible_paths = [
127
+ Path("../../../datahub-graphql-core/src/main/resources"),
128
+ Path("../../../../datahub-graphql-core/src/main/resources"),
129
+ Path("datahub-graphql-core/src/main/resources"),
130
+ ]
131
+
132
+ for path in possible_paths:
133
+ if path.exists():
134
+ return path.resolve()
135
+
136
+ raise FileNotFoundError("Could not find DataHub GraphQL schema files")
137
+
138
+
139
+ def _parse_graphql_operations_from_files(
140
+ custom_schema_path: Optional[str] = None,
141
+ ) -> Dict[str, Any]:
142
+ """Parse operations from DataHub's GraphQL schema files as fallback."""
143
+ try:
144
+ if custom_schema_path:
145
+ schema_path = Path(custom_schema_path)
146
+ if not schema_path.exists():
147
+ raise FileNotFoundError(
148
+ f"Custom schema path does not exist: {custom_schema_path}"
149
+ )
150
+ logger.debug(f"Using custom GraphQL schema path: {schema_path}")
151
+ else:
152
+ schema_path = _get_schema_files_path()
153
+ logger.debug(f"Found GraphQL schema files at: {schema_path}")
154
+
155
+ queries = []
156
+ mutations = []
157
+
158
+ # Parse all .graphql files in the directory
159
+ for graphql_file in schema_path.glob("*.graphql"):
160
+ content = graphql_file.read_text()
161
+
162
+ # Extract queries using regex
163
+ query_matches = re.finditer(
164
+ r"extend\s+type\s+Query\s*\{([^}]+)\}|type\s+Query\s*\{([^}]+)\}",
165
+ content,
166
+ re.DOTALL | re.IGNORECASE,
167
+ )
168
+
169
+ for match in query_matches:
170
+ query_content = match.group(1) or match.group(2)
171
+ operations = _parse_operations_from_content(query_content, "Query")
172
+ queries.extend(operations)
173
+
174
+ # Extract mutations using regex
175
+ mutation_matches = re.finditer(
176
+ r"extend\s+type\s+Mutation\s*\{([^}]+)\}|type\s+Mutation\s*\{([^}]+)\}",
177
+ content,
178
+ re.DOTALL | re.IGNORECASE,
179
+ )
180
+
181
+ for match in mutation_matches:
182
+ mutation_content = match.group(1) or match.group(2)
183
+ operations = _parse_operations_from_content(
184
+ mutation_content, "Mutation"
185
+ )
186
+ mutations.extend(operations)
187
+
188
+ logger.debug(
189
+ f"Parsed {len(queries)} queries and {len(mutations)} mutations from schema files"
190
+ )
191
+
192
+ return {
193
+ "queryType": {"fields": queries} if queries else None,
194
+ "mutationType": {"fields": mutations} if mutations else None,
195
+ }
196
+
197
+ except Exception as e:
198
+ logger.error(f"Failed to parse GraphQL schema files: {e}")
199
+ logger.error("Cannot proceed without valid schema information.")
200
+ logger.error("Please ensure:")
201
+ logger.error("1. DataHub GMS is accessible for schema introspection")
202
+ logger.error("2. Schema files exist and are valid GraphQL")
203
+ logger.error("3. Network connectivity allows GraphQL requests")
204
+ raise click.ClickException(
205
+ f"Schema loading failed: {e}. Cannot determine available GraphQL operations."
206
+ ) from e
207
+
208
+
209
+ def _parse_operations_from_content(
210
+ content: str, operation_type: str
211
+ ) -> List[Dict[str, Any]]:
212
+ """Parse individual operations from GraphQL content."""
213
+ operations = []
214
+
215
+ # Match field definitions with optional descriptions
216
+ # Pattern matches: fieldName(args): ReturnType or "description" fieldName(args): ReturnType
217
+ field_pattern = (
218
+ r'(?:"""([^"]+)"""\s*|"([^"]+)"\s*)?(\w+)(?:\([^)]*\))?\s*:\s*[^,\n]+'
219
+ )
220
+
221
+ matches = re.finditer(field_pattern, content, re.MULTILINE)
222
+
223
+ for match in matches:
224
+ description1, description2, field_name = match.groups()
225
+ description = description1 or description2 or ""
226
+
227
+ # Skip common GraphQL keywords and types
228
+ if field_name.lower() in [
229
+ "query",
230
+ "mutation",
231
+ "subscription",
232
+ "type",
233
+ "input",
234
+ "enum",
235
+ ]:
236
+ continue
237
+
238
+ operation: Dict[str, Any] = {
239
+ "name": field_name,
240
+ "description": description.strip(),
241
+ "args": [], # We could parse args too, but for now keep it simple
242
+ }
243
+ operations.append(operation)
244
+
245
+ return operations
246
+
247
+
248
+ def _format_operation_list(
249
+ operations: List[Dict[str, Any]], operation_type: str
250
+ ) -> str:
251
+ """Format operations list for display."""
252
+ if not operations:
253
+ return f"No {operation_type.lower()} operations found."
254
+
255
+ lines = [f"{operation_type}:"]
256
+ for op in operations:
257
+ name = op.get("name", "Unknown")
258
+ description = op.get("description", "")
259
+ if description:
260
+ lines.append(f" - {name}: {description}")
261
+ else:
262
+ lines.append(f" - {name}")
263
+
264
+ return "\n".join(lines)
265
+
266
+
267
+ def _find_input_type(
268
+ schema: Dict[str, Any], type_name: str
269
+ ) -> Optional[Dict[str, Any]]:
270
+ """Find an input type definition in the schema."""
271
+ types = schema.get("types", [])
272
+ for type_def in types:
273
+ if type_def.get("name") == type_name and type_def.get("kind") == "INPUT_OBJECT":
274
+ return type_def
275
+ return None
276
+
277
+
278
+ def _format_operation_details(
279
+ operation: Dict[str, Any],
280
+ operation_type: str,
281
+ schema: Optional[Dict[str, Any]] = None,
282
+ ) -> str:
283
+ """Format detailed operation information."""
284
+ name = operation.get("name", "Unknown")
285
+ description = operation.get("description", "No description available")
286
+ args = operation.get("args", [])
287
+
288
+ lines = [
289
+ f"Operation: {name}",
290
+ f"Type: {operation_type}",
291
+ f"Description: {description}",
292
+ ]
293
+
294
+ if args:
295
+ lines.append("Arguments:")
296
+ for arg in args:
297
+ arg_name = arg.get("name", "unknown")
298
+ arg_type = _format_graphql_type(arg.get("type", {}))
299
+ lines.append(f" - {arg_name}: {arg_type}")
300
+
301
+ # If we have schema info, try to show input type fields
302
+ if schema:
303
+ # Extract the base type name (remove ! and [] wrappers)
304
+ base_type_name = _extract_base_type_name(arg.get("type", {}))
305
+ if base_type_name:
306
+ input_type = _find_input_type(schema, base_type_name)
307
+ if input_type:
308
+ input_fields = input_type.get("inputFields", [])
309
+ if input_fields:
310
+ lines.append(f" Fields in {base_type_name}:")
311
+ for field in input_fields:
312
+ field_name = field.get("name", "unknown")
313
+ field_type = _format_graphql_type(field.get("type", {}))
314
+ field_desc = field.get("description", "")
315
+ if field_desc:
316
+ lines.append(
317
+ f" - {field_name}: {field_type} - {field_desc}"
318
+ )
319
+ else:
320
+ lines.append(f" - {field_name}: {field_type}")
321
+ else:
322
+ lines.append("Arguments: None")
323
+
324
+ return "\n".join(lines)
325
+
326
+
327
+ def _format_operation_details_recursive(
328
+ operation: Dict[str, Any], operation_type: str, client: Any
329
+ ) -> str:
330
+ """Format detailed operation information with recursive type exploration."""
331
+ name = operation.get("name", "Unknown")
332
+ description = operation.get("description", "No description available")
333
+ args = operation.get("args", [])
334
+
335
+ lines = [
336
+ f"Operation: {name}",
337
+ f"Type: {operation_type}",
338
+ f"Description: {description}",
339
+ ]
340
+
341
+ if args:
342
+ lines.append("Arguments:")
343
+
344
+ # Collect all input types for recursive exploration
345
+ all_types_to_explore = set()
346
+
347
+ for arg in args:
348
+ arg_name = arg.get("name", "unknown")
349
+ arg_type = _format_graphql_type(arg.get("type", {}))
350
+ lines.append(f" - {arg_name}: {arg_type}")
351
+
352
+ # Collect base type name for recursive exploration
353
+ base_type_name = _extract_base_type_name(arg.get("type", {}))
354
+ if base_type_name and base_type_name not in [
355
+ "String",
356
+ "Int",
357
+ "Float",
358
+ "Boolean",
359
+ "ID",
360
+ ]:
361
+ all_types_to_explore.add(base_type_name)
362
+
363
+ # Recursively explore all collected types
364
+ if all_types_to_explore:
365
+ lines.append("") # Empty line before type details
366
+ lines.append("Input Type Details:")
367
+
368
+ all_explored_types = {}
369
+ for type_name in all_types_to_explore:
370
+ logger.debug(f"Recursively exploring input type: {type_name}")
371
+ try:
372
+ explored_types = _fetch_type_recursive(client, type_name)
373
+ all_explored_types.update(explored_types)
374
+ except Exception as e:
375
+ logger.debug(f"Failed to explore type {type_name}: {e}")
376
+
377
+ # Format all explored types
378
+ if all_explored_types:
379
+ lines.append("")
380
+ for type_name in sorted(all_explored_types.keys()):
381
+ type_info = all_explored_types[type_name]
382
+ lines.append(f"{type_name}:")
383
+ lines.extend(_format_single_type_fields(type_info))
384
+ lines.append("") # Empty line between types
385
+ else:
386
+ lines.append("Arguments: None")
387
+
388
+ return "\n".join(lines).rstrip()
389
+
390
+
391
+ def _format_type_details(input_type: Dict[str, Any]) -> str:
392
+ """Format detailed input type information."""
393
+ type_name = input_type.get("name", "Unknown")
394
+ type_kind = input_type.get("kind", "")
395
+ input_fields = input_type.get("inputFields", [])
396
+ enum_values = input_type.get("enumValues", [])
397
+
398
+ lines = [
399
+ f"Type: {type_name}",
400
+ f"Kind: {type_kind}",
401
+ ]
402
+
403
+ if input_fields:
404
+ lines.append("Fields:")
405
+ for field in input_fields:
406
+ field_name = field.get("name", "unknown")
407
+ field_type = _format_graphql_type(field.get("type", {}))
408
+ field_desc = field.get("description", "")
409
+ if field_desc:
410
+ lines.append(f" - {field_name}: {field_type} - {field_desc}")
411
+ else:
412
+ lines.append(f" - {field_name}: {field_type}")
413
+ elif enum_values:
414
+ lines.append("Enum Values:")
415
+ for enum_value in enum_values:
416
+ value_name = enum_value.get("name", "unknown")
417
+ value_desc = enum_value.get("description", "")
418
+ is_deprecated = enum_value.get("isDeprecated", False)
419
+ deprecation_reason = enum_value.get("deprecationReason", "")
420
+
421
+ value_line = f" - {value_name}"
422
+ if value_desc:
423
+ value_line += f" - {value_desc}"
424
+ if is_deprecated:
425
+ if deprecation_reason:
426
+ value_line += f" (DEPRECATED: {deprecation_reason})"
427
+ else:
428
+ value_line += " (DEPRECATED)"
429
+ lines.append(value_line)
430
+ else:
431
+ if type_kind == "ENUM":
432
+ lines.append("Enum Values: None")
433
+ else:
434
+ lines.append("Fields: None")
435
+
436
+ return "\n".join(lines)
437
+
438
+
439
+ def _collect_nested_types(
440
+ type_info: Dict[str, Any], visited: Optional[set] = None
441
+ ) -> List[str]:
442
+ """Collect all nested type names from a GraphQL type definition."""
443
+ if visited is None:
444
+ visited = set()
445
+
446
+ nested_types = []
447
+ input_fields = type_info.get("inputFields", [])
448
+
449
+ for field in input_fields:
450
+ field_type = field.get("type", {})
451
+ base_type_name = _extract_base_type_name(field_type)
452
+
453
+ if base_type_name and base_type_name not in visited:
454
+ # Only collect custom/complex types (not built-in scalars)
455
+ if base_type_name not in ["String", "Int", "Float", "Boolean", "ID"]:
456
+ nested_types.append(base_type_name)
457
+ # Don't add to visited here - let _fetch_type_recursive handle that
458
+
459
+ return nested_types
460
+
461
+
462
+ def _fetch_type_recursive(
463
+ client: Any, type_name: str, visited: Optional[set] = None
464
+ ) -> Dict[str, Dict[str, Any]]:
465
+ """Recursively fetch a type and all its nested types."""
466
+ if visited is None:
467
+ visited = set()
468
+
469
+ if type_name in visited:
470
+ return {}
471
+
472
+ visited.add(type_name)
473
+ types_map = {}
474
+
475
+ # Fetch the current type
476
+ try:
477
+ targeted_query = f"""
478
+ query DescribeType {{
479
+ __type(name: "{type_name}") {{
480
+ name
481
+ kind
482
+ inputFields {{
483
+ name
484
+ description
485
+ type {{
486
+ name
487
+ kind
488
+ ofType {{
489
+ name
490
+ kind
491
+ ofType {{
492
+ name
493
+ kind
494
+ }}
495
+ }}
496
+ }}
497
+ }}
498
+ enumValues {{
499
+ name
500
+ description
501
+ isDeprecated
502
+ deprecationReason
503
+ }}
504
+ }}
505
+ }}
506
+ """
507
+
508
+ type_result = client.execute_graphql(targeted_query)
509
+ type_info = type_result.get("__type")
510
+
511
+ if type_info:
512
+ types_map[type_name] = type_info
513
+
514
+ # Find nested types
515
+ nested_type_names = _collect_nested_types(type_info, visited)
516
+ logger.debug(f"Type '{type_name}' has nested types: {nested_type_names}")
517
+
518
+ # Recursively fetch nested types
519
+ for nested_type_name in nested_type_names:
520
+ logger.debug(f"Recursively fetching nested type: {nested_type_name}")
521
+ nested_types = _fetch_type_recursive(client, nested_type_name, visited)
522
+ types_map.update(nested_types)
523
+ if nested_type_name in nested_types:
524
+ logger.debug(f"Successfully fetched type: {nested_type_name}")
525
+ else:
526
+ logger.debug(f"Failed to fetch type: {nested_type_name}")
527
+
528
+ except Exception as e:
529
+ logger.debug(f"Failed to fetch type {type_name}: {e}")
530
+
531
+ return types_map
532
+
533
+
534
+ def _format_single_type_fields(
535
+ type_info: Dict[str, Any], indent: str = " "
536
+ ) -> List[str]:
537
+ """Format fields or enum values for a single type."""
538
+ lines = []
539
+ input_fields = type_info.get("inputFields", [])
540
+ enum_values = type_info.get("enumValues", [])
541
+ type_kind = type_info.get("kind", "")
542
+
543
+ if input_fields:
544
+ for field in input_fields:
545
+ field_name = field.get("name", "unknown")
546
+ field_type = _format_graphql_type(field.get("type", {}))
547
+ field_desc = field.get("description", "")
548
+ if field_desc:
549
+ lines.append(f"{indent}{field_name}: {field_type} - {field_desc}")
550
+ else:
551
+ lines.append(f"{indent}{field_name}: {field_type}")
552
+ elif enum_values:
553
+ for enum_value in enum_values:
554
+ value_name = enum_value.get("name", "unknown")
555
+ value_desc = enum_value.get("description", "")
556
+ is_deprecated = enum_value.get("isDeprecated", False)
557
+ deprecation_reason = enum_value.get("deprecationReason", "")
558
+
559
+ value_line = f"{indent}{value_name}"
560
+ if value_desc:
561
+ value_line += f" - {value_desc}"
562
+ if is_deprecated:
563
+ if deprecation_reason:
564
+ value_line += f" (DEPRECATED: {deprecation_reason})"
565
+ else:
566
+ value_line += " (DEPRECATED)"
567
+ lines.append(value_line)
568
+ else:
569
+ if type_kind == "ENUM":
570
+ lines.append(f"{indent}(no enum values)")
571
+ else:
572
+ lines.append(f"{indent}(no fields)")
573
+
574
+ return lines
575
+
576
+
577
+ def _format_recursive_types(
578
+ types_map: Dict[str, Dict[str, Any]], root_type_name: str
579
+ ) -> str:
580
+ """Format multiple types in a hierarchical display."""
581
+ lines = []
582
+
583
+ # Display root type first
584
+ if root_type_name in types_map:
585
+ root_type = types_map[root_type_name]
586
+ lines.append(f"{root_type_name}:")
587
+ lines.extend(_format_single_type_fields(root_type))
588
+ lines.append("") # Empty line after root type
589
+
590
+ # Display nested types
591
+ for type_name, type_info in types_map.items():
592
+ if type_name == root_type_name:
593
+ continue # Already displayed
594
+
595
+ lines.append(f"{type_name}:")
596
+ lines.extend(_format_single_type_fields(type_info))
597
+ lines.append("") # Empty line between types
598
+
599
+ return "\n".join(lines).rstrip()
600
+
601
+
602
+ def _extract_base_type_name(type_info: Dict[str, Any]) -> Optional[str]:
603
+ """Extract the base type name from a GraphQL type (removing NON_NULL and LIST wrappers)."""
604
+ if not type_info:
605
+ return None
606
+
607
+ kind = type_info.get("kind", "")
608
+ name = type_info.get("name")
609
+ of_type = type_info.get("ofType")
610
+
611
+ if kind in ["NON_NULL", "LIST"] and of_type:
612
+ return _extract_base_type_name(of_type)
613
+ elif name:
614
+ return name
615
+ else:
616
+ return None
617
+
618
+
619
+ def _format_graphql_type(type_info: Dict[str, Any]) -> str:
620
+ """Format GraphQL type information for display."""
621
+ kind = type_info.get("kind", "")
622
+ name = type_info.get("name")
623
+ of_type = type_info.get("ofType")
624
+
625
+ if kind == "NON_NULL":
626
+ inner_type = _format_graphql_type(of_type) if of_type else "Unknown"
627
+ return f"{inner_type}!"
628
+ elif kind == "LIST":
629
+ inner_type = _format_graphql_type(of_type) if of_type else "Unknown"
630
+ return f"[{inner_type}]"
631
+ elif name:
632
+ return name
633
+ else:
634
+ return "Unknown"
635
+
636
+
637
+ def _find_operation_by_name(
638
+ schema: Dict[str, Any], operation_name: str
639
+ ) -> Optional[tuple[Dict[str, Any], str]]:
640
+ """Find an operation by name in queries or mutations."""
641
+ # Search in queries
642
+ query_type = schema.get("queryType", {})
643
+ if query_type:
644
+ for field in query_type.get("fields", []):
645
+ if field.get("name") == operation_name:
646
+ return field, "Query"
647
+
648
+ # Search in mutations
649
+ mutation_type = schema.get("mutationType", {})
650
+ if mutation_type:
651
+ for field in mutation_type.get("fields", []):
652
+ if field.get("name") == operation_name:
653
+ return field, "Mutation"
654
+
655
+ return None
656
+
657
+
658
+ def _find_type_by_name(client: Any, type_name: str) -> Optional[Dict[str, Any]]:
659
+ """Find a type by name using GraphQL introspection."""
660
+ try:
661
+ targeted_query = f"""
662
+ query DescribeType {{
663
+ __type(name: "{type_name}") {{
664
+ name
665
+ kind
666
+ inputFields {{
667
+ name
668
+ description
669
+ type {{
670
+ name
671
+ kind
672
+ ofType {{
673
+ name
674
+ kind
675
+ ofType {{
676
+ name
677
+ kind
678
+ }}
679
+ }}
680
+ }}
681
+ }}
682
+ enumValues {{
683
+ name
684
+ description
685
+ isDeprecated
686
+ deprecationReason
687
+ }}
688
+ }}
689
+ }}
690
+ """
691
+
692
+ type_result = client.execute_graphql(targeted_query)
693
+ return type_result.get("__type")
694
+
695
+ except Exception as e:
696
+ logger.debug(f"Failed to fetch type {type_name}: {e}")
697
+ return None
698
+
699
+
700
+ def _search_operation_and_type(
701
+ schema: Dict[str, Any], client: Any, name: str
702
+ ) -> Tuple[Optional[Tuple[Dict[str, Any], str]], Optional[Dict[str, Any]]]:
703
+ """Search for both operation and type with the given name."""
704
+ operation_info = _find_operation_by_name(schema, name)
705
+ type_info = _find_type_by_name(client, name)
706
+ return operation_info, type_info
707
+
708
+
709
+ def _convert_type_to_json(type_info: Dict[str, Any]) -> Dict[str, Any]:
710
+ """Convert GraphQL type info to LLM-friendly JSON format."""
711
+ if not type_info:
712
+ return {}
713
+
714
+ kind = type_info.get("kind", "")
715
+ name = type_info.get("name")
716
+ of_type = type_info.get("ofType")
717
+
718
+ result = {"kind": kind}
719
+
720
+ if name:
721
+ result["name"] = name
722
+
723
+ if kind in ["NON_NULL", "LIST"] and of_type:
724
+ result["ofType"] = _convert_type_to_json(of_type)
725
+ elif kind == "NON_NULL":
726
+ result["nonNull"] = True
727
+ elif kind == "LIST":
728
+ result["list"] = True
729
+
730
+ return result
731
+
732
+
733
+ def _convert_operation_to_json(
734
+ operation: Dict[str, Any], operation_type: str
735
+ ) -> Dict[str, Any]:
736
+ """Convert operation info to LLM-friendly JSON format."""
737
+ result = {
738
+ "name": operation.get("name", ""),
739
+ "type": operation_type,
740
+ "description": operation.get("description", ""),
741
+ "arguments": [],
742
+ }
743
+
744
+ for arg in operation.get("args", []):
745
+ arg_json = {
746
+ "name": arg.get("name", ""),
747
+ "type": _convert_type_to_json(arg.get("type", {})),
748
+ "description": arg.get("description", ""),
749
+ }
750
+
751
+ # Determine if required based on NON_NULL wrapper
752
+ arg_type = arg.get("type", {})
753
+ arg_json["required"] = arg_type.get("kind") == "NON_NULL"
754
+
755
+ result["arguments"].append(arg_json)
756
+
757
+ return result
758
+
759
+
760
+ def _convert_type_details_to_json(type_info: Dict[str, Any]) -> Dict[str, Any]:
761
+ """Convert type details to LLM-friendly JSON format."""
762
+ result = {
763
+ "name": type_info.get("name", ""),
764
+ "kind": type_info.get("kind", ""),
765
+ "description": type_info.get("description", ""),
766
+ }
767
+
768
+ # Handle input fields for INPUT_OBJECT types
769
+ input_fields = type_info.get("inputFields", [])
770
+ if input_fields:
771
+ result["fields"] = []
772
+ for field in input_fields:
773
+ field_json = {
774
+ "name": field.get("name", ""),
775
+ "type": _convert_type_to_json(field.get("type", {})),
776
+ "description": field.get("description", ""),
777
+ }
778
+ result["fields"].append(field_json)
779
+
780
+ # Handle enum values for ENUM types
781
+ enum_values = type_info.get("enumValues", [])
782
+ if enum_values:
783
+ result["values"] = []
784
+ for enum_value in enum_values:
785
+ value_json = {
786
+ "name": enum_value.get("name", ""),
787
+ "description": enum_value.get("description", ""),
788
+ "deprecated": enum_value.get("isDeprecated", False),
789
+ }
790
+ if enum_value.get("deprecationReason"):
791
+ value_json["deprecationReason"] = enum_value.get("deprecationReason")
792
+ result["values"].append(value_json)
793
+
794
+ return result
795
+
796
+
797
+ def _convert_operations_list_to_json(schema: Dict[str, Any]) -> Dict[str, Any]:
798
+ """Convert operations list to LLM-friendly JSON format."""
799
+ result: Dict[str, Any] = {"schema": {"queries": [], "mutations": []}}
800
+
801
+ # Convert queries
802
+ query_type = schema.get("queryType", {})
803
+ if query_type:
804
+ for field in query_type.get("fields", []):
805
+ result["schema"]["queries"].append(
806
+ _convert_operation_to_json(field, "Query")
807
+ )
808
+
809
+ # Convert mutations
810
+ mutation_type = schema.get("mutationType", {})
811
+ if mutation_type:
812
+ for field in mutation_type.get("fields", []):
813
+ result["schema"]["mutations"].append(
814
+ _convert_operation_to_json(field, "Mutation")
815
+ )
816
+
817
+ return result
818
+
819
+
820
+ def _convert_describe_to_json(
821
+ operation_info: Optional[tuple[Dict[str, Any], str]],
822
+ type_info: Optional[Dict[str, Any]],
823
+ types_map: Optional[Dict[str, Dict[str, Any]]] = None,
824
+ ) -> Dict[str, Any]:
825
+ """Convert describe output to LLM-friendly JSON format."""
826
+ result = {}
827
+
828
+ if operation_info:
829
+ operation_details, operation_type = operation_info
830
+ result["operation"] = _convert_operation_to_json(
831
+ operation_details, operation_type
832
+ )
833
+
834
+ if type_info:
835
+ result["type"] = _convert_type_details_to_json(type_info)
836
+
837
+ if types_map:
838
+ result["relatedTypes"] = {}
839
+ for type_name, type_data in types_map.items():
840
+ result["relatedTypes"][type_name] = _convert_type_details_to_json(type_data)
841
+
842
+ return result
843
+
844
+
845
+ def _dict_to_graphql_input(obj: Dict[str, Any]) -> str:
846
+ """Convert a Python dict to GraphQL input syntax."""
847
+ if not isinstance(obj, dict):
848
+ return str(obj)
849
+
850
+ items = []
851
+ for key, value in obj.items():
852
+ if isinstance(value, str):
853
+ items.append(f'{key}: "{value}"')
854
+ elif isinstance(value, dict):
855
+ items.append(f"{key}: {_dict_to_graphql_input(value)}")
856
+ elif isinstance(value, list):
857
+ list_items = []
858
+ for item in value:
859
+ if isinstance(item, str):
860
+ list_items.append(f'"{item}"')
861
+ elif isinstance(item, dict):
862
+ list_items.append(_dict_to_graphql_input(item))
863
+ else:
864
+ list_items.append(str(item))
865
+ items.append(f"{key}: [{', '.join(list_items)}]")
866
+ elif isinstance(value, bool):
867
+ items.append(f"{key}: {str(value).lower()}")
868
+ else:
869
+ items.append(f"{key}: {value}")
870
+
871
+ return "{" + ", ".join(items) + "}"
872
+
873
+
874
+ def _generate_operation_query(
875
+ operation_field: Dict[str, Any],
876
+ operation_type: str,
877
+ variables: Optional[Dict[str, Any]] = None,
878
+ ) -> str:
879
+ """Generate a GraphQL query string from an operation field definition."""
880
+ operation_name = operation_field.get("name", "unknown")
881
+ args = operation_field.get("args", [])
882
+
883
+ # Build arguments string
884
+ args_string = ""
885
+ if args:
886
+ # Check for required arguments
887
+ required_args = []
888
+ optional_args = []
889
+
890
+ for arg in args:
891
+ arg_name = arg.get("name")
892
+ arg_type = arg.get("type", {})
893
+ if arg_type.get("kind") == "NON_NULL":
894
+ required_args.append(arg_name)
895
+ else:
896
+ optional_args.append(arg_name)
897
+
898
+ if variables:
899
+ # Build arguments from provided variables
900
+ valid_args = []
901
+ for arg in args:
902
+ arg_name = arg.get("name")
903
+ if arg_name and arg_name in variables:
904
+ # Use inline value instead of variable syntax for simplicity
905
+ value = variables[arg_name]
906
+ if isinstance(value, str):
907
+ # Handle string values with quotes
908
+ formatted_value = f'"{value}"'
909
+ elif isinstance(value, dict):
910
+ # Handle object/input types - convert to GraphQL syntax
911
+ formatted_value = _dict_to_graphql_input(value)
912
+ else:
913
+ # Handle numbers, booleans, etc.
914
+ formatted_value = (
915
+ str(value).lower()
916
+ if isinstance(value, bool)
917
+ else str(value)
918
+ )
919
+
920
+ valid_args.append(f"{arg_name}: {formatted_value}")
921
+
922
+ if valid_args:
923
+ args_string = f"({', '.join(valid_args)})"
924
+
925
+ # Check if all required arguments are provided
926
+ if required_args:
927
+ missing_required = [
928
+ arg for arg in required_args if not variables or arg not in variables
929
+ ]
930
+ if missing_required:
931
+ raise click.ClickException(
932
+ f"Operation '{operation_name}' requires arguments: {', '.join(missing_required)}. "
933
+ f'Provide them using --variables \'{{"{missing_required[0]}": "value", ...}}\''
934
+ )
935
+
936
+ # Generate basic field selection based on common patterns
937
+ if operation_name == "me":
938
+ # Special case for 'me' query - we know it returns AuthenticatedUser
939
+ field_selection = "{ corpUser { urn username properties { displayName email firstName lastName title } } }"
940
+ elif operation_name.startswith("list"):
941
+ # List operations typically return paginated results
942
+ entity_name = operation_name.replace("list", "").lower()
943
+ if entity_name == "users":
944
+ field_selection = (
945
+ "{ total users { urn username properties { displayName email } } }"
946
+ )
947
+ else:
948
+ # Generic list response
949
+ field_selection = "{ total }"
950
+ elif operation_name in ["corpUser", "dataset", "dashboard", "chart"]:
951
+ # Entity queries typically return the entity with basic fields
952
+ field_selection = "{ urn }"
953
+ else:
954
+ # Default minimal selection
955
+ field_selection = ""
956
+
957
+ # Construct the query
958
+ operation_keyword = operation_type.lower()
959
+ query = f"{operation_keyword} {{ {operation_name}{args_string} {field_selection} }}"
960
+
961
+ return query
962
+
963
+
964
+ def _get_schema_via_introspection(client: Any) -> Dict[str, Any]:
965
+ """Get GraphQL schema via introspection only (no fallback for explicit requests)."""
966
+ try:
967
+ # Make two separate requests to avoid "bad faith" introspection protection
968
+ query_result = client.execute_graphql(QUERY_INTROSPECTION)
969
+ mutation_result = client.execute_graphql(MUTATION_INTROSPECTION)
970
+
971
+ # Combine results
972
+ schema = {}
973
+ if query_result and "__schema" in query_result:
974
+ schema.update(query_result["__schema"])
975
+ if mutation_result and "__schema" in mutation_result:
976
+ schema.update(mutation_result["__schema"])
977
+
978
+ logger.debug("Successfully fetched schema via introspection")
979
+ return schema
980
+ except Exception as e:
981
+ logger.error(f"GraphQL introspection failed: {e}")
982
+ logger.error("Cannot perform introspection. Please ensure:")
983
+ logger.error("1. DataHub GMS is running and accessible")
984
+ logger.error("2. Network connectivity allows GraphQL requests")
985
+ logger.error("3. Authentication credentials are valid")
986
+ raise click.ClickException(
987
+ f"Schema introspection failed: {e}. Cannot retrieve live schema information."
988
+ ) from e
989
+
990
+
991
+ def _handle_list_operations(
992
+ schema: Dict[str, Any],
993
+ format: str,
994
+ pretty: bool,
995
+ ) -> None:
996
+ """Handle --list-operations and combined --list-queries --list-mutations."""
997
+ if format == "json":
998
+ json_output = _convert_operations_list_to_json(schema)
999
+ click.echo(
1000
+ json.dumps(json_output, indent=2 if pretty else None, sort_keys=True)
1001
+ )
1002
+ else:
1003
+ query_fields = (
1004
+ schema.get("queryType", {}).get("fields", [])
1005
+ if schema.get("queryType")
1006
+ else []
1007
+ )
1008
+ mutation_fields = (
1009
+ schema.get("mutationType", {}).get("fields", [])
1010
+ if schema.get("mutationType")
1011
+ else []
1012
+ )
1013
+
1014
+ output = []
1015
+ if query_fields:
1016
+ output.append(_format_operation_list(query_fields, "Queries"))
1017
+ if mutation_fields:
1018
+ output.append(_format_operation_list(mutation_fields, "Mutations"))
1019
+
1020
+ click.echo("\n\n".join(output))
1021
+
1022
+
1023
+ def _handle_list_queries(
1024
+ schema: Dict[str, Any],
1025
+ format: str,
1026
+ pretty: bool,
1027
+ ) -> None:
1028
+ """Handle --list-queries only."""
1029
+ if format == "json":
1030
+ query_type = schema.get("queryType", {})
1031
+ json_output: Dict[str, Any] = {
1032
+ "schema": {
1033
+ "queries": [
1034
+ _convert_operation_to_json(field, "Query")
1035
+ for field in query_type.get("fields", [])
1036
+ ]
1037
+ }
1038
+ }
1039
+ click.echo(
1040
+ json.dumps(json_output, indent=2 if pretty else None, sort_keys=True)
1041
+ )
1042
+ else:
1043
+ query_fields = (
1044
+ schema.get("queryType", {}).get("fields", [])
1045
+ if schema.get("queryType")
1046
+ else []
1047
+ )
1048
+ click.echo(_format_operation_list(query_fields, "Queries"))
1049
+
1050
+
1051
+ def _handle_list_mutations(
1052
+ schema: Dict[str, Any],
1053
+ format: str,
1054
+ pretty: bool,
1055
+ ) -> None:
1056
+ """Handle --list-mutations only."""
1057
+ if format == "json":
1058
+ mutation_type = schema.get("mutationType", {})
1059
+ json_output: Dict[str, Any] = {
1060
+ "schema": {
1061
+ "mutations": [
1062
+ _convert_operation_to_json(field, "Mutation")
1063
+ for field in mutation_type.get("fields", [])
1064
+ ]
1065
+ }
1066
+ }
1067
+ click.echo(
1068
+ json.dumps(json_output, indent=2 if pretty else None, sort_keys=True)
1069
+ )
1070
+ else:
1071
+ mutation_fields = (
1072
+ schema.get("mutationType", {}).get("fields", [])
1073
+ if schema.get("mutationType")
1074
+ else []
1075
+ )
1076
+ click.echo(_format_operation_list(mutation_fields, "Mutations"))
1077
+
1078
+
1079
+ def _get_recursive_types_for_describe(
1080
+ client: Any,
1081
+ operation_info: Optional[Tuple[Dict[str, Any], str]],
1082
+ type_info: Optional[Dict[str, Any]],
1083
+ describe: str,
1084
+ ) -> Optional[Dict[str, Any]]:
1085
+ """Get recursive types for describe functionality."""
1086
+ types_map = None
1087
+ try:
1088
+ if operation_info:
1089
+ # Collect input types from operation arguments
1090
+ operation_details, _ = operation_info
1091
+ all_types = set()
1092
+ for arg in operation_details.get("args", []):
1093
+ base_type_name = _extract_base_type_name(arg.get("type", {}))
1094
+ if base_type_name and base_type_name not in [
1095
+ "String",
1096
+ "Int",
1097
+ "Float",
1098
+ "Boolean",
1099
+ "ID",
1100
+ ]:
1101
+ all_types.add(base_type_name)
1102
+
1103
+ # Fetch all related types recursively
1104
+ all_related_types = {}
1105
+ for type_name in all_types:
1106
+ try:
1107
+ related_types = _fetch_type_recursive(client, type_name)
1108
+ all_related_types.update(related_types)
1109
+ except Exception as e:
1110
+ logger.debug(
1111
+ f"Failed to fetch recursive types for {type_name}: {e}"
1112
+ )
1113
+ types_map = all_related_types
1114
+
1115
+ elif type_info:
1116
+ # Fetch recursive types starting from the type itself
1117
+ try:
1118
+ types_map = _fetch_type_recursive(client, describe)
1119
+ except Exception as e:
1120
+ logger.debug(f"Recursive type fetching failed: {e}")
1121
+ types_map = None
1122
+ except Exception as e:
1123
+ logger.debug(f"Recursive exploration failed: {e}")
1124
+ types_map = None
1125
+
1126
+ return types_map
1127
+
1128
+
1129
+ def _handle_describe_json_output(
1130
+ operation_info: Optional[Tuple[Dict[str, Any], str]],
1131
+ type_info: Optional[Dict[str, Any]],
1132
+ types_map: Optional[Dict[str, Dict[str, Any]]],
1133
+ describe: str,
1134
+ recurse: bool,
1135
+ pretty: bool,
1136
+ ) -> None:
1137
+ """Handle JSON output for describe functionality."""
1138
+ json_output = _convert_describe_to_json(operation_info, type_info, types_map)
1139
+
1140
+ # Add metadata
1141
+ json_output["meta"] = {"query": describe, "recursive": recurse}
1142
+
1143
+ click.echo(json.dumps(json_output, indent=2 if pretty else None, sort_keys=True))
1144
+
1145
+
1146
+ def _handle_describe_human_output(
1147
+ schema: Dict[str, Any],
1148
+ client: Any,
1149
+ operation_info: Optional[Tuple[Dict[str, Any], str]],
1150
+ type_info: Optional[Dict[str, Any]],
1151
+ describe: str,
1152
+ recurse: bool,
1153
+ ) -> None:
1154
+ """Handle human-readable output for describe functionality."""
1155
+ output_sections = []
1156
+
1157
+ # Show operation details if found
1158
+ if operation_info:
1159
+ operation_details, operation_type = operation_info
1160
+
1161
+ if recurse:
1162
+ try:
1163
+ operation_output = _format_operation_details_recursive(
1164
+ operation_details, operation_type, client
1165
+ )
1166
+ except Exception as e:
1167
+ logger.debug(
1168
+ f"Recursive operation details failed ({e}), falling back to standard format"
1169
+ )
1170
+ operation_output = _format_operation_details(
1171
+ operation_details, operation_type, schema
1172
+ )
1173
+ else:
1174
+ operation_output = _format_operation_details(
1175
+ operation_details, operation_type, schema
1176
+ )
1177
+
1178
+ output_sections.append(f"=== OPERATION ===\n{operation_output}")
1179
+
1180
+ # Show type details if found
1181
+ if type_info:
1182
+ if recurse:
1183
+ try:
1184
+ types_map = _fetch_type_recursive(client, describe)
1185
+ if types_map and describe in types_map:
1186
+ type_output = _format_recursive_types(types_map, describe)
1187
+ else:
1188
+ type_output = _format_type_details(type_info)
1189
+ except Exception as e:
1190
+ logger.debug(
1191
+ f"Recursive type details failed ({e}), falling back to standard format"
1192
+ )
1193
+ type_output = _format_type_details(type_info)
1194
+ else:
1195
+ type_output = _format_type_details(type_info)
1196
+
1197
+ output_sections.append(f"=== TYPE ===\n{type_output}")
1198
+
1199
+ # Output results
1200
+ if len(output_sections) > 1:
1201
+ # Both operation and type found - show both with separators
1202
+ click.echo("\n\n".join(output_sections))
1203
+ else:
1204
+ # Only one found - show without section header
1205
+ output = output_sections[0]
1206
+ # Remove the section header
1207
+ if output.startswith("=== OPERATION ===\n"):
1208
+ output = output[len("=== OPERATION ===\n") :]
1209
+ elif output.startswith("=== TYPE ===\n"):
1210
+ output = output[len("=== TYPE ===\n") :]
1211
+ click.echo(output)
1212
+
1213
+
1214
+ def _handle_describe(
1215
+ schema: Dict[str, Any],
1216
+ client: Any,
1217
+ describe: str,
1218
+ recurse: bool,
1219
+ format: str,
1220
+ pretty: bool,
1221
+ ) -> None:
1222
+ """Handle --describe operation/type."""
1223
+ operation_info, type_info = _search_operation_and_type(schema, client, describe)
1224
+
1225
+ if not operation_info and not type_info:
1226
+ raise click.ClickException(
1227
+ f"'{describe}' not found as an operation or type. Use --list-operations to see available operations or try a specific type name."
1228
+ )
1229
+
1230
+ if format == "json":
1231
+ types_map = None
1232
+ if recurse:
1233
+ types_map = _get_recursive_types_for_describe(
1234
+ client, operation_info, type_info, describe
1235
+ )
1236
+
1237
+ _handle_describe_json_output(
1238
+ operation_info,
1239
+ type_info,
1240
+ types_map,
1241
+ describe,
1242
+ recurse,
1243
+ pretty,
1244
+ )
1245
+ else:
1246
+ _handle_describe_human_output(
1247
+ schema,
1248
+ client,
1249
+ operation_info,
1250
+ type_info,
1251
+ describe,
1252
+ recurse,
1253
+ )
1254
+
1255
+
1256
+ def _execute_operation(
1257
+ client: Any, operation: str, variables: Optional[str], schema_path: Optional[str]
1258
+ ) -> Dict[str, Any]:
1259
+ """Execute a named GraphQL operation."""
1260
+ if schema_path:
1261
+ schema = _parse_graphql_operations_from_files(schema_path)
1262
+ else:
1263
+ schema = _get_schema_via_introspection(client)
1264
+
1265
+ # Find the operation
1266
+ operation_info = _find_operation_by_name(schema, operation)
1267
+ if not operation_info:
1268
+ raise click.ClickException(
1269
+ f"Operation '{operation}' not found. Use --list-operations to see available operations."
1270
+ )
1271
+
1272
+ operation_field, operation_type = operation_info
1273
+ variables_dict = _parse_variables(variables)
1274
+
1275
+ try:
1276
+ # Generate the GraphQL query from the operation
1277
+ generated_query = _generate_operation_query(
1278
+ operation_field, operation_type, variables_dict
1279
+ )
1280
+ logger.debug(f"Generated query for operation '{operation}': {generated_query}")
1281
+
1282
+ # Execute the generated query
1283
+ return client.execute_graphql(query=generated_query, variables=variables_dict)
1284
+ except Exception as e:
1285
+ raise click.ClickException(
1286
+ f"Failed to execute operation '{operation}': {e}"
1287
+ ) from e
1288
+
1289
+
1290
+ def _execute_query(client: Any, query: str, variables: Optional[str]) -> Dict[str, Any]:
1291
+ """Execute a raw GraphQL query."""
1292
+ query_content = _load_content_or_file(query)
1293
+ variables_dict = _parse_variables(variables)
1294
+
1295
+ try:
1296
+ return client.execute_graphql(query=query_content, variables=variables_dict)
1297
+ except Exception as e:
1298
+ raise click.ClickException(f"Failed to execute GraphQL query: {e}") from e
1299
+
1300
+
1301
+ @click.command()
1302
+ @click.option(
1303
+ "--query",
1304
+ "-q",
1305
+ help="GraphQL query string or path to .graphql file",
1306
+ )
1307
+ @click.option(
1308
+ "--variables",
1309
+ "-v",
1310
+ help="GraphQL variables as JSON string or path to .json file",
1311
+ )
1312
+ @click.option(
1313
+ "--operation",
1314
+ "-o",
1315
+ help="Execute a named GraphQL operation from the schema",
1316
+ )
1317
+ @click.option(
1318
+ "--list-operations",
1319
+ is_flag=True,
1320
+ help="List all available GraphQL operations (queries and mutations)",
1321
+ )
1322
+ @click.option(
1323
+ "--list-queries",
1324
+ is_flag=True,
1325
+ help="List available GraphQL queries",
1326
+ )
1327
+ @click.option(
1328
+ "--list-mutations",
1329
+ is_flag=True,
1330
+ help="List available GraphQL mutations",
1331
+ )
1332
+ @click.option(
1333
+ "--describe",
1334
+ "-d",
1335
+ help="Describe a specific GraphQL operation",
1336
+ )
1337
+ @click.option(
1338
+ "--recurse",
1339
+ is_flag=True,
1340
+ help="Recursively describe nested types when using --describe",
1341
+ )
1342
+ @click.option(
1343
+ "--schema-path",
1344
+ help="Path to GraphQL schema files directory (uses local files instead of live introspection)",
1345
+ )
1346
+ @click.option(
1347
+ "--no-pretty",
1348
+ is_flag=True,
1349
+ help="Disable pretty-printing of JSON output",
1350
+ )
1351
+ @click.option(
1352
+ "--format",
1353
+ type=click.Choice(["human", "json"]),
1354
+ default="human",
1355
+ help="Output format: human-readable or JSON for LLM consumption",
1356
+ )
1357
+ @upgrade.check_upgrade
1358
+ def graphql(
1359
+ query: Optional[str],
1360
+ variables: Optional[str],
1361
+ operation: Optional[str],
1362
+ list_operations: bool,
1363
+ list_queries: bool,
1364
+ list_mutations: bool,
1365
+ describe: Optional[str],
1366
+ recurse: bool,
1367
+ schema_path: Optional[str],
1368
+ no_pretty: bool,
1369
+ format: str,
1370
+ ) -> None:
1371
+ """Execute GraphQL queries and mutations against DataHub."""
1372
+
1373
+ pretty = not no_pretty
1374
+ client = get_default_graph(ClientMode.CLI)
1375
+
1376
+ # Schema introspection commands
1377
+ if list_operations or list_queries or list_mutations or describe:
1378
+ if schema_path:
1379
+ schema = _parse_graphql_operations_from_files(schema_path)
1380
+ else:
1381
+ schema = _get_schema_via_introspection(client)
1382
+
1383
+ if list_operations or (list_queries and list_mutations):
1384
+ _handle_list_operations(schema, format, pretty)
1385
+ return
1386
+ elif list_queries:
1387
+ _handle_list_queries(schema, format, pretty)
1388
+ return
1389
+ elif list_mutations:
1390
+ _handle_list_mutations(schema, format, pretty)
1391
+ return
1392
+ elif describe:
1393
+ _handle_describe(
1394
+ schema,
1395
+ client,
1396
+ describe,
1397
+ recurse,
1398
+ format,
1399
+ pretty,
1400
+ )
1401
+ return
1402
+
1403
+ # Execution commands
1404
+ if operation:
1405
+ result = _execute_operation(client, operation, variables, schema_path)
1406
+ elif query:
1407
+ result = _execute_query(client, query, variables)
1408
+ else:
1409
+ raise click.ClickException(
1410
+ "Must specify either --query, --operation, or a discovery option "
1411
+ "(--list-operations, --list-queries, --list-mutations, --describe)"
1412
+ )
1413
+
1414
+ # Output result
1415
+ if pretty:
1416
+ click.echo(json.dumps(result, indent=2, sort_keys=True))
1417
+ else:
1418
+ click.echo(json.dumps(result))
1419
+
1420
+
1421
+ if __name__ == "__main__":
1422
+ graphql()