alita-sdk 0.3.465__py3-none-any.whl → 0.3.497__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (103) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +83 -1
  3. alita_sdk/cli/agent_loader.py +22 -4
  4. alita_sdk/cli/agent_ui.py +13 -3
  5. alita_sdk/cli/agents.py +1876 -186
  6. alita_sdk/cli/callbacks.py +96 -25
  7. alita_sdk/cli/cli.py +10 -1
  8. alita_sdk/cli/config.py +151 -9
  9. alita_sdk/cli/context/__init__.py +30 -0
  10. alita_sdk/cli/context/cleanup.py +198 -0
  11. alita_sdk/cli/context/manager.py +731 -0
  12. alita_sdk/cli/context/message.py +285 -0
  13. alita_sdk/cli/context/strategies.py +289 -0
  14. alita_sdk/cli/context/token_estimation.py +127 -0
  15. alita_sdk/cli/input_handler.py +167 -4
  16. alita_sdk/cli/inventory.py +1256 -0
  17. alita_sdk/cli/toolkit.py +14 -17
  18. alita_sdk/cli/toolkit_loader.py +35 -5
  19. alita_sdk/cli/tools/__init__.py +8 -1
  20. alita_sdk/cli/tools/filesystem.py +910 -64
  21. alita_sdk/cli/tools/planning.py +143 -157
  22. alita_sdk/cli/tools/terminal.py +154 -20
  23. alita_sdk/community/__init__.py +64 -8
  24. alita_sdk/community/inventory/__init__.py +224 -0
  25. alita_sdk/community/inventory/config.py +257 -0
  26. alita_sdk/community/inventory/enrichment.py +2137 -0
  27. alita_sdk/community/inventory/extractors.py +1469 -0
  28. alita_sdk/community/inventory/ingestion.py +3172 -0
  29. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  30. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  31. alita_sdk/community/inventory/parsers/base.py +295 -0
  32. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  33. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  34. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  35. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  36. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  37. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  38. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  39. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  40. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  41. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  42. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  43. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  44. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  45. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  46. alita_sdk/community/inventory/patterns/loader.py +348 -0
  47. alita_sdk/community/inventory/patterns/registry.py +198 -0
  48. alita_sdk/community/inventory/presets.py +535 -0
  49. alita_sdk/community/inventory/retrieval.py +1403 -0
  50. alita_sdk/community/inventory/toolkit.py +169 -0
  51. alita_sdk/community/inventory/visualize.py +1370 -0
  52. alita_sdk/configurations/bitbucket.py +0 -3
  53. alita_sdk/runtime/clients/client.py +108 -31
  54. alita_sdk/runtime/langchain/assistant.py +4 -2
  55. alita_sdk/runtime/langchain/constants.py +3 -1
  56. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  57. alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
  58. alita_sdk/runtime/langchain/langraph_agent.py +123 -31
  59. alita_sdk/runtime/llms/preloaded.py +2 -6
  60. alita_sdk/runtime/toolkits/__init__.py +2 -0
  61. alita_sdk/runtime/toolkits/application.py +1 -1
  62. alita_sdk/runtime/toolkits/mcp.py +107 -91
  63. alita_sdk/runtime/toolkits/planning.py +173 -0
  64. alita_sdk/runtime/toolkits/tools.py +59 -7
  65. alita_sdk/runtime/tools/artifact.py +46 -17
  66. alita_sdk/runtime/tools/function.py +2 -1
  67. alita_sdk/runtime/tools/llm.py +320 -32
  68. alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
  69. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  70. alita_sdk/runtime/tools/planning/models.py +246 -0
  71. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  72. alita_sdk/runtime/tools/vectorstore_base.py +44 -9
  73. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  74. alita_sdk/runtime/utils/mcp_client.py +465 -0
  75. alita_sdk/runtime/utils/mcp_oauth.py +80 -0
  76. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  77. alita_sdk/runtime/utils/streamlit.py +6 -10
  78. alita_sdk/runtime/utils/toolkit_utils.py +14 -5
  79. alita_sdk/tools/__init__.py +54 -27
  80. alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
  81. alita_sdk/tools/base_indexer_toolkit.py +99 -20
  82. alita_sdk/tools/bitbucket/__init__.py +2 -2
  83. alita_sdk/tools/chunkers/__init__.py +3 -1
  84. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  85. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  86. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  87. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  88. alita_sdk/tools/code_indexer_toolkit.py +55 -22
  89. alita_sdk/tools/confluence/api_wrapper.py +63 -14
  90. alita_sdk/tools/elitea_base.py +86 -21
  91. alita_sdk/tools/jira/__init__.py +1 -1
  92. alita_sdk/tools/jira/api_wrapper.py +91 -40
  93. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  94. alita_sdk/tools/qtest/__init__.py +1 -1
  95. alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
  96. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  97. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  98. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +2 -1
  99. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +103 -61
  100. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
  101. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
  102. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
  103. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1256 @@
1
+ """
2
+ CLI commands for Inventory Ingestion Pipeline.
3
+
4
+ Provides command-line interface for running knowledge graph ingestion
5
+ from various source toolkits (GitHub, ADO, LocalGit, etc.).
6
+
7
+ Usage:
8
+ # List available presets
9
+ alita inventory presets
10
+
11
+ # Ingest using a preset (recommended!)
12
+ alita inventory ingest --dir ./my-project --graph ./graph.json --preset python
13
+
14
+ # Ingest using a toolkit config file
15
+ alita inventory ingest --toolkit .alita/tools/github.json --graph ./graph.json -w "*.md"
16
+
17
+ # Ingest from a local git repository
18
+ alita inventory ingest --source localgit --path /path/to/repo --graph ./graph.json
19
+
20
+ # Use a config file for LLM/embedding/guardrails settings
21
+ alita inventory ingest --toolkit ./github.json -g ./graph.json --config ingestion-config.yml
22
+
23
+ # Check ingestion status (failed files, progress)
24
+ alita inventory status --graph ./graph.json --name my-source
25
+
26
+ # Retry failed files from previous ingestion
27
+ alita inventory retry --dir ./my-project -g ./graph.json --name my-source
28
+ alita inventory retry --dir ./my-project -g ./graph.json --name my-source --force
29
+
30
+ # Generate config template
31
+ alita inventory init-config
32
+
33
+ # Show graph stats
34
+ alita inventory stats --graph ./graph.json
35
+
36
+ # Search the graph
37
+ alita inventory search "PaymentService" --graph ./graph.json
38
+ """
39
+
40
+ import click
41
+ import json
42
+ import logging
43
+ import os
44
+ import re
45
+ import sys
46
+ from pathlib import Path
47
+ from typing import Optional, List, Dict, Any
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+
52
+ @click.group()
53
+ def inventory():
54
+ """Inventory knowledge graph commands."""
55
+ pass
56
+
57
+
58
+ @inventory.command('presets')
59
+ def presets():
60
+ """
61
+ List available ingestion presets.
62
+
63
+ Presets provide pre-configured whitelist/blacklist patterns for common
64
+ programming languages and project types.
65
+
66
+ Example:
67
+ alita inventory presets
68
+ """
69
+ from alita_sdk.community.inventory import list_presets, get_preset
70
+
71
+ available = list_presets()
72
+
73
+ click.echo(f"\n📋 Available Presets ({len(available)} total):\n")
74
+
75
+ # Group by category
76
+ categories = {
77
+ 'Python': [p for p in available if 'python' in p.lower()],
78
+ 'JavaScript/TypeScript': [p for p in available if any(x in p.lower() for x in ['javascript', 'typescript', 'react', 'next', 'node'])],
79
+ 'Java': [p for p in available if 'java' in p.lower() or 'maven' in p.lower() or 'gradle' in p.lower() or 'spring' in p.lower()],
80
+ '.NET/C#': [p for p in available if 'dotnet' in p.lower() or 'csharp' in p.lower() or 'aspnet' in p.lower()],
81
+ 'Multi-Language': [p for p in available if any(x in p.lower() for x in ['fullstack', 'monorepo', 'documentation'])],
82
+ }
83
+
84
+ for category, preset_names in categories.items():
85
+ if not preset_names:
86
+ continue
87
+
88
+ click.echo(f" {category}:")
89
+ for preset_name in sorted(preset_names):
90
+ preset_config = get_preset(preset_name)
91
+ whitelist = preset_config.get('whitelist', [])
92
+ blacklist = preset_config.get('blacklist', [])
93
+
94
+ # Format whitelist (show first 3 patterns)
95
+ wl_display = ', '.join(whitelist[:3])
96
+ if len(whitelist) > 3:
97
+ wl_display += f', ... (+{len(whitelist)-3})'
98
+
99
+ click.echo(f" • {preset_name:20} - {wl_display}")
100
+
101
+ click.echo()
102
+
103
+ click.echo("Usage:")
104
+ click.echo(" alita inventory ingest --dir ./my-project -g ./graph.json --preset python")
105
+ click.echo(" alita inventory ingest --dir ./src -g ./graph.json -p typescript -w '*.json'")
106
+ click.echo()
107
+
108
+
109
+ @inventory.command('init-config')
110
+ @click.option('--output', '-o', default='./ingestion-config.yml', type=click.Path(),
111
+ help='Output path for config template')
112
+ def init_config(output: str):
113
+ """
114
+ Generate a configuration template file.
115
+
116
+ Example:
117
+ alita inventory init-config -o ./my-config.yml
118
+ """
119
+ from alita_sdk.community.inventory import generate_config_template
120
+
121
+ path = generate_config_template(output)
122
+ click.echo(f"✅ Configuration template created: {path}")
123
+ click.echo("\nEdit this file to configure:")
124
+ click.echo(" - LLM provider and model (openai, azure, anthropic, ollama)")
125
+ click.echo(" - Embeddings for semantic search")
126
+ click.echo(" - Guardrails (rate limits, content filtering, thresholds)")
127
+
128
+
129
+ @inventory.command('ingest')
130
+ @click.option('--toolkit', '-t', type=click.Path(exists=True),
131
+ help='Path to toolkit config JSON (e.g., .alita/tools/github.json)')
132
+ @click.option('--dir', '-d', 'directory', type=click.Path(exists=True, file_okay=False, dir_okay=True),
133
+ help='Local directory to ingest (alternative to --toolkit for local files)')
134
+ @click.option('--graph', '-g', required=True, type=click.Path(),
135
+ help='Path to output graph JSON file')
136
+ @click.option('--config', '-c', type=click.Path(exists=True),
137
+ help='Path to YAML/JSON config file for LLM, embeddings, guardrails')
138
+ @click.option('--preset', '-p', default=None,
139
+ help='Use a preset configuration (e.g., python, typescript, java, dotnet)')
140
+ @click.option('--whitelist', '-w', multiple=True,
141
+ help='File patterns to include (e.g., -w "*.py" -w "*.md")')
142
+ @click.option('--blacklist', '-x', multiple=True,
143
+ help='File patterns to exclude (e.g., -x "*test*" -x "*vendor*")')
144
+ @click.option('--no-relations', is_flag=True,
145
+ help='Skip relation extraction (faster)')
146
+ @click.option('--model', '-m', default=None,
147
+ help='LLM model name (overrides config file)')
148
+ @click.option('--limit', '-l', type=int, default=None,
149
+ help='Limit number of documents to process (for testing)')
150
+ @click.option('--fresh', '-f', is_flag=True,
151
+ help='Start fresh - delete existing graph and create new one')
152
+ @click.option('--name', '-n', default=None,
153
+ help='Source name for the graph (default: directory name or toolkit_name)')
154
+ @click.option('--recursive/--no-recursive', default=True,
155
+ help='Recursively scan subdirectories (default: recursive)')
156
+ @click.pass_context
157
+ def ingest(ctx, toolkit: Optional[str], directory: Optional[str], graph: str,
158
+ config: Optional[str], preset: Optional[str], whitelist: tuple, blacklist: tuple,
159
+ no_relations: bool, model: Optional[str], limit: Optional[int],
160
+ fresh: bool, name: Optional[str], recursive: bool):
161
+ """Run ingestion pipeline to build/update a knowledge graph.
162
+
163
+ Use --toolkit for configured sources (GitHub, ADO, etc.) or --dir for
164
+ local directories (simpler, no config needed).
165
+
166
+ \b
167
+ Examples:
168
+ alita inventory ingest --dir ./src -g graph.json --preset python
169
+ alita inventory ingest --dir ./src -g graph.json -w "*.py" -w "*.md"
170
+ alita inventory ingest --dir ./src -g graph.json -p typescript -w "*.json"
171
+ alita inventory ingest --dir ./docs -g graph.json --name my-docs
172
+ alita inventory ingest -t github.json -g graph.json -w "*.md"
173
+ alita inventory ingest --dir ./src -g graph.json -c config.yml
174
+ """
175
+ # Load preset configuration if specified
176
+ preset_whitelist = []
177
+ preset_blacklist = []
178
+
179
+ if preset:
180
+ from alita_sdk.community.inventory import get_preset, list_presets
181
+
182
+ try:
183
+ preset_config = get_preset(preset)
184
+ preset_whitelist = preset_config.get('whitelist', [])
185
+ preset_blacklist = preset_config.get('blacklist', [])
186
+
187
+ click.echo(f"📋 Using preset: {preset}")
188
+ if preset_whitelist:
189
+ click.echo(f" Whitelist: {', '.join(preset_whitelist)}")
190
+ if preset_blacklist:
191
+ click.echo(f" Blacklist: {', '.join(preset_blacklist)}")
192
+ except ValueError as e:
193
+ available = ', '.join(list_presets())
194
+ raise click.ClickException(f"Unknown preset '{preset}'. Available: {available}")
195
+
196
+ # Merge preset patterns with user-provided patterns
197
+ # User patterns are added after preset patterns (more specific)
198
+ final_whitelist = list(preset_whitelist) + list(whitelist)
199
+ final_blacklist = list(preset_blacklist) + list(blacklist)
200
+
201
+ # Validate: must have either --toolkit or --dir
202
+ if not toolkit and not directory:
203
+ raise click.ClickException("Must specify either --toolkit or --dir")
204
+
205
+ if toolkit and directory:
206
+ raise click.ClickException("Cannot use both --toolkit and --dir. Choose one.")
207
+
208
+ # Handle --dir mode (simple local directory ingestion)
209
+ if directory:
210
+ from pathlib import Path
211
+ dir_path = Path(directory).resolve()
212
+ source_name = name or dir_path.name
213
+ source_type = 'filesystem'
214
+
215
+ click.echo(f"📂 Ingesting local directory: {dir_path}")
216
+ click.echo(f" Name: {source_name}")
217
+ click.echo(f" Recursive: {recursive}")
218
+
219
+ # Create a simple toolkit config for the directory
220
+ toolkit_config = {
221
+ 'type': 'filesystem',
222
+ 'toolkit_name': source_name,
223
+ 'base_directory': str(dir_path),
224
+ 'recursive': recursive,
225
+ }
226
+ branch = None # No branch for filesystem
227
+ else:
228
+ # Load toolkit config
229
+ toolkit_config = _load_toolkit_config(toolkit)
230
+ click.echo(f"📦 Loaded toolkit config: {toolkit}")
231
+
232
+ # Get source type from toolkit
233
+ source_type = toolkit_config.get('type')
234
+ if not source_type:
235
+ raise click.ClickException(f"Toolkit config missing 'type' field: {toolkit}")
236
+ click.echo(f" Type: {source_type}")
237
+
238
+ # Get toolkit name (used as source identifier in the graph)
239
+ source_name = name or toolkit_config.get('toolkit_name') or source_type
240
+ click.echo(f" Name: {source_name}")
241
+
242
+ # Get repo/branch from toolkit config
243
+ repo = toolkit_config.get('repository')
244
+ if repo:
245
+ click.echo(f" Repository: {repo}")
246
+
247
+ branch = toolkit_config.get('active_branch') or toolkit_config.get('base_branch') or 'main'
248
+ click.echo(f" Branch: {branch}")
249
+
250
+ # Get path for local sources (filesystem or localgit)
251
+ path = (
252
+ toolkit_config.get('base_directory') or # filesystem toolkit
253
+ toolkit_config.get('git_root_dir') or # localgit toolkit
254
+ toolkit_config.get('path') # generic path
255
+ )
256
+ if path:
257
+ click.echo(f" Path: {path}")
258
+
259
+ # Validate required fields based on source type
260
+ if source_type in ('github', 'ado') and not repo:
261
+ raise click.ClickException(f"Toolkit config missing 'repository' for source '{source_type}'")
262
+
263
+ if source_type == 'filesystem' and not path:
264
+ raise click.ClickException(f"Toolkit config missing 'base_directory' or 'path' for source '{source_type}'")
265
+
266
+ if source_type == 'localgit' and not path:
267
+ raise click.ClickException(f"Toolkit config missing 'git_root_dir' or 'path' for source '{source_type}'")
268
+
269
+ # Handle --fresh option: delete existing graph
270
+ if fresh and os.path.exists(graph):
271
+ click.echo(f"🗑️ Fresh mode: deleting existing graph at {graph}")
272
+ os.remove(graph)
273
+
274
+ click.echo(f"🚀 Starting ingestion from {source_name} ({source_type})...")
275
+
276
+ # Progress callback
277
+ def progress(message: str, phase: str):
278
+ click.echo(f" [{phase}] {message}")
279
+
280
+ try:
281
+ from alita_sdk.community.inventory import IngestionPipeline, IngestionConfig
282
+
283
+ # Load configuration
284
+ if config:
285
+ click.echo(f"📋 Loading config from {config}")
286
+ if config.endswith('.yml') or config.endswith('.yaml'):
287
+ ingestion_config = IngestionConfig.from_yaml(config)
288
+ else:
289
+ ingestion_config = IngestionConfig.from_json(config)
290
+
291
+ # Override model if specified on command line
292
+ if model:
293
+ ingestion_config.llm_model = model
294
+
295
+ # Override graph path
296
+ ingestion_config.graph_path = graph
297
+
298
+ # Get LLM using the model name and temperature from config
299
+ llm = _get_llm(ctx, ingestion_config.llm_model, ingestion_config.temperature)
300
+
301
+ pipeline = IngestionPipeline(
302
+ llm=llm,
303
+ graph_path=ingestion_config.graph_path,
304
+ guardrails=ingestion_config.guardrails,
305
+ )
306
+ else:
307
+ # Fall back to environment-based config
308
+ click.echo("📋 Loading config from environment")
309
+ llm = _get_llm(ctx, model)
310
+ pipeline = IngestionPipeline(
311
+ llm=llm,
312
+ graph_path=graph,
313
+ )
314
+
315
+ # Set progress callback
316
+ pipeline.progress_callback = progress
317
+
318
+ # Show existing graph status
319
+ graph_stats = pipeline.get_stats()
320
+ if graph_stats['node_count'] > 0:
321
+ click.echo(f"📊 Existing graph: {graph_stats['node_count']} entities, {graph_stats['edge_count']} relations")
322
+ click.echo(" New entities will be ADDED to existing graph")
323
+
324
+ # Get source toolkit from config and register it
325
+ source_toolkit = _get_source_toolkit(toolkit_config)
326
+
327
+ # Create a RunnableConfig for CLI context - this allows dispatch_custom_event to work
328
+ # without being inside a LangChain agent run
329
+ import uuid
330
+ cli_runnable_config = {
331
+ 'run_id': uuid.uuid4(),
332
+ 'tags': ['cli', 'inventory', 'ingest'],
333
+ }
334
+
335
+ # Set the runnable config on the toolkit if it supports it
336
+ if hasattr(source_toolkit, 'set_runnable_config'):
337
+ source_toolkit.set_runnable_config(cli_runnable_config)
338
+
339
+ pipeline.register_toolkit(source_name, source_toolkit)
340
+
341
+ # Run ingestion
342
+ if limit:
343
+ click.echo(f"⚠️ Limiting to {limit} documents (test mode)")
344
+
345
+ result = pipeline.run(
346
+ source=source_name,
347
+ branch=branch,
348
+ whitelist=final_whitelist if final_whitelist else None,
349
+ blacklist=final_blacklist if final_blacklist else None,
350
+ extract_relations=not no_relations,
351
+ max_documents=limit,
352
+ )
353
+
354
+ # Show result
355
+ if result.success:
356
+ click.echo(f"\n✅ Ingestion complete!")
357
+ click.echo(f" Documents processed: {result.documents_processed}")
358
+ click.echo(f" Entities extracted: {result.entities_added}")
359
+ click.echo(f" Relations extracted: {result.relations_added}")
360
+ click.echo(f" Duration: {result.duration_seconds:.1f}s")
361
+ click.echo(f" Graph saved to: {graph}")
362
+
363
+ # Show failed documents info if any
364
+ if result.failed_documents:
365
+ click.echo(f"\n⚠️ {len(result.failed_documents)} documents failed to process")
366
+ click.echo(f" Run 'alita inventory status -g {graph} -n {source_name}' to see details")
367
+ click.echo(f" Run 'alita inventory retry ...' to retry failed files")
368
+ else:
369
+ click.echo(f"\n❌ Ingestion failed!")
370
+ for error in result.errors:
371
+ click.echo(f" Error: {error}")
372
+ sys.exit(1)
373
+
374
+ except Exception as e:
375
+ logger.exception("Ingestion failed")
376
+ raise click.ClickException(str(e))
377
+
378
+
379
+ @inventory.command('retry')
380
+ @click.option('--toolkit', '-t', type=click.Path(exists=True),
381
+ help='Path to toolkit config JSON (e.g., .alita/tools/github.json)')
382
+ @click.option('--dir', '-d', 'directory', type=click.Path(exists=True, file_okay=False, dir_okay=True),
383
+ help='Local directory to ingest (alternative to --toolkit for local files)')
384
+ @click.option('--graph', '-g', required=True, type=click.Path(exists=True),
385
+ help='Path to graph JSON file')
386
+ @click.option('--config', '-c', type=click.Path(exists=True),
387
+ help='Path to YAML/JSON config file for LLM, embeddings, guardrails')
388
+ @click.option('--no-relations', is_flag=True,
389
+ help='Skip relation extraction (faster)')
390
+ @click.option('--model', '-m', default=None,
391
+ help='LLM model name (overrides config file)')
392
+ @click.option('--name', '-n', required=True,
393
+ help='Source name (must match the name used during original ingestion)')
394
+ @click.option('--force', '-f', is_flag=True,
395
+ help='Retry all failed files regardless of attempt count')
396
+ @click.option('--recursive/--no-recursive', default=True,
397
+ help='Recursively scan subdirectories (default: recursive)')
398
+ @click.pass_context
399
+ def retry(ctx, toolkit: Optional[str], directory: Optional[str], graph: str,
400
+ config: Optional[str], no_relations: bool, model: Optional[str],
401
+ name: str, force: bool, recursive: bool):
402
+ """Retry ingestion for files that failed in a previous run.
403
+
404
+ Reads the checkpoint file to find failed files and re-ingests them.
405
+ Use --force to retry all failed files regardless of previous attempt count.
406
+
407
+ \b
408
+ Examples:
409
+ alita inventory retry --dir ./src -g graph.json -n my-source
410
+ alita inventory retry --dir ./src -g graph.json -n my-source --force
411
+ alita inventory retry -t github.json -g graph.json -n github-repo
412
+ """
413
+ # Validate: must have either --toolkit or --dir
414
+ if not toolkit and not directory:
415
+ raise click.ClickException("Must specify either --toolkit or --dir")
416
+
417
+ if toolkit and directory:
418
+ raise click.ClickException("Cannot use both --toolkit and --dir. Choose one.")
419
+
420
+ # Check if checkpoint exists
421
+ checkpoint_path = _get_checkpoint_path(graph, name)
422
+ if not os.path.exists(checkpoint_path):
423
+ click.echo(f"\n❌ No checkpoint found for source '{name}'")
424
+ click.echo(f" Expected checkpoint: {checkpoint_path}")
425
+ click.echo(f"\n This could mean:")
426
+ click.echo(f" - No previous ingestion was run with --name '{name}'")
427
+ click.echo(f" - The previous ingestion completed successfully (checkpoint cleared)")
428
+ click.echo(f" - The checkpoint was manually deleted")
429
+ sys.exit(1)
430
+
431
+ # Load checkpoint to get failed files
432
+ try:
433
+ with open(checkpoint_path, 'r') as f:
434
+ checkpoint_data = json.load(f)
435
+ except Exception as e:
436
+ raise click.ClickException(f"Failed to load checkpoint: {e}")
437
+
438
+ failed_files = checkpoint_data.get('failed_files', [])
439
+
440
+ if not failed_files:
441
+ click.echo(f"\n✅ No failed files to retry for source '{name}'")
442
+ click.echo(f" Processed files: {len(checkpoint_data.get('processed_files', []))}")
443
+ # Clear checkpoint since there's nothing to retry
444
+ os.remove(checkpoint_path)
445
+ click.echo(f" Checkpoint cleared.")
446
+ return
447
+
448
+ # Get files to retry
449
+ if force:
450
+ # Retry all failed files
451
+ files_to_retry = [f['file_path'] for f in failed_files]
452
+ click.echo(f"\n🔄 Force retrying ALL {len(files_to_retry)} failed files...")
453
+ else:
454
+ # Only retry files under max attempts (default: 3)
455
+ max_attempts = 3
456
+ files_to_retry = [
457
+ f['file_path'] for f in failed_files
458
+ if f.get('attempts', 1) < max_attempts
459
+ ]
460
+ skipped = len(failed_files) - len(files_to_retry)
461
+ if skipped > 0:
462
+ click.echo(f"\n⚠️ Skipping {skipped} files that exceeded {max_attempts} attempts")
463
+ click.echo(f" Use --force to retry all failed files")
464
+
465
+ if not files_to_retry:
466
+ click.echo(f"\n❌ No files eligible for retry (all exceeded max attempts)")
467
+ click.echo(f" Use --force to retry anyway")
468
+ sys.exit(1)
469
+
470
+ click.echo(f"\n🔄 Retrying {len(files_to_retry)} failed files...")
471
+
472
+ # Handle --dir mode (simple local directory ingestion)
473
+ if directory:
474
+ from pathlib import Path
475
+ dir_path = Path(directory).resolve()
476
+ source_type = 'filesystem'
477
+
478
+ click.echo(f"📂 Source directory: {dir_path}")
479
+
480
+ # Create a simple toolkit config for the directory
481
+ toolkit_config = {
482
+ 'type': 'filesystem',
483
+ 'toolkit_name': name,
484
+ 'base_directory': str(dir_path),
485
+ 'recursive': recursive,
486
+ }
487
+ else:
488
+ # Load toolkit config
489
+ toolkit_config = _load_toolkit_config(toolkit)
490
+ source_type = toolkit_config.get('type', 'unknown')
491
+ click.echo(f"📦 Source toolkit: {source_type}")
492
+
493
+ # Progress callback
494
+ def progress(message: str, phase: str):
495
+ click.echo(f" [{phase}] {message}")
496
+
497
+ try:
498
+ from alita_sdk.community.inventory import IngestionPipeline, IngestionConfig
499
+
500
+ # Load configuration
501
+ if config:
502
+ click.echo(f"📋 Loading config from {config}")
503
+ if config.endswith('.yml') or config.endswith('.yaml'):
504
+ ingestion_config = IngestionConfig.from_yaml(config)
505
+ else:
506
+ ingestion_config = IngestionConfig.from_json(config)
507
+
508
+ if model:
509
+ ingestion_config.llm_model = model
510
+
511
+ ingestion_config.graph_path = graph
512
+ llm = _get_llm(ctx, ingestion_config.llm_model, ingestion_config.temperature)
513
+
514
+ pipeline = IngestionPipeline(
515
+ llm=llm,
516
+ graph_path=ingestion_config.graph_path,
517
+ guardrails=ingestion_config.guardrails,
518
+ )
519
+ else:
520
+ click.echo("📋 Loading config from environment")
521
+ llm = _get_llm(ctx, model)
522
+ pipeline = IngestionPipeline(
523
+ llm=llm,
524
+ graph_path=graph,
525
+ )
526
+
527
+ pipeline.progress_callback = progress
528
+
529
+ # Get source toolkit and register it
530
+ source_toolkit = _get_source_toolkit(toolkit_config)
531
+
532
+ import uuid
533
+ cli_runnable_config = {
534
+ 'run_id': uuid.uuid4(),
535
+ 'tags': ['cli', 'inventory', 'retry'],
536
+ }
537
+
538
+ if hasattr(source_toolkit, 'set_runnable_config'):
539
+ source_toolkit.set_runnable_config(cli_runnable_config)
540
+
541
+ pipeline.register_toolkit(name, source_toolkit)
542
+
543
+ # Run delta update for failed files
544
+ result = pipeline.delta_update(
545
+ source=name,
546
+ file_paths=files_to_retry,
547
+ extract_relations=not no_relations,
548
+ )
549
+
550
+ # Show result
551
+ if result.success:
552
+ click.echo(f"\n✅ Retry complete!")
553
+ click.echo(f" Files retried: {len(files_to_retry)}")
554
+ click.echo(f" Documents processed: {result.documents_processed}")
555
+ click.echo(f" Entities added: {result.entities_added}")
556
+ click.echo(f" Relations added: {result.relations_added}")
557
+ click.echo(f" Duration: {result.duration_seconds:.1f}s")
558
+
559
+ # Check if there are still failed files
560
+ if result.failed_documents:
561
+ click.echo(f"\n⚠️ {len(result.failed_documents)} files still failing")
562
+ click.echo(f" Run 'alita inventory status -g {graph} -n {name}' to see details")
563
+ else:
564
+ # All retries succeeded - clear checkpoint
565
+ if os.path.exists(checkpoint_path):
566
+ os.remove(checkpoint_path)
567
+ click.echo(f"\n🧹 Checkpoint cleared (all files processed successfully)")
568
+ else:
569
+ click.echo(f"\n❌ Retry failed!")
570
+ for error in result.errors:
571
+ click.echo(f" Error: {error}")
572
+ sys.exit(1)
573
+
574
+ except Exception as e:
575
+ logger.exception("Retry failed")
576
+ raise click.ClickException(str(e))
577
+
578
+
579
+ @inventory.command('status')
580
+ @click.option('--graph', '-g', required=True, type=click.Path(),
581
+ help='Path to graph JSON file')
582
+ @click.option('--name', '-n', required=True,
583
+ help='Source name to check status for')
584
+ def status(graph: str, name: str):
585
+ """
586
+ Show ingestion checkpoint status for a source.
587
+
588
+ Displays information about the last ingestion run including:
589
+ - Number of processed files
590
+ - Number of failed files
591
+ - Current phase
592
+ - Timestamps
593
+
594
+ Example:
595
+ alita inventory status -g ./graph.json -n my-source
596
+ """
597
+ checkpoint_path = _get_checkpoint_path(graph, name)
598
+
599
+ if not os.path.exists(checkpoint_path):
600
+ click.echo(f"\n❌ No checkpoint found for source '{name}'")
601
+ click.echo(f" Expected: {checkpoint_path}")
602
+ click.echo(f"\n No active or failed ingestion for this source.")
603
+ sys.exit(1)
604
+
605
+ try:
606
+ with open(checkpoint_path, 'r') as f:
607
+ checkpoint = json.load(f)
608
+ except Exception as e:
609
+ raise click.ClickException(f"Failed to load checkpoint: {e}")
610
+
611
+ click.echo(f"\n📋 Ingestion Status for '{name}'")
612
+ click.echo(f" Checkpoint: {checkpoint_path}")
613
+
614
+ click.echo(f"\n Run ID: {checkpoint.get('run_id', 'unknown')}")
615
+ click.echo(f" Phase: {checkpoint.get('phase', 'unknown')}")
616
+ click.echo(f" Completed: {'Yes' if checkpoint.get('completed') else 'No'}")
617
+
618
+ click.echo(f"\n Started: {checkpoint.get('started_at', 'unknown')}")
619
+ click.echo(f" Updated: {checkpoint.get('updated_at', 'unknown')}")
620
+
621
+ processed_files = checkpoint.get('processed_files', [])
622
+ failed_files = checkpoint.get('failed_files', [])
623
+
624
+ click.echo(f"\n 📊 Progress:")
625
+ click.echo(f" Documents processed: {checkpoint.get('documents_processed', 0)}")
626
+ click.echo(f" Entities added: {checkpoint.get('entities_added', 0)}")
627
+ click.echo(f" Relations added: {checkpoint.get('relations_added', 0)}")
628
+
629
+ click.echo(f"\n 📁 Files:")
630
+ click.echo(f" Processed: {len(processed_files)}")
631
+ click.echo(f" Failed: {len(failed_files)}")
632
+
633
+ if failed_files:
634
+ # Count by attempts
635
+ by_attempts = {}
636
+ for f in failed_files:
637
+ attempts = f.get('attempts', 1)
638
+ by_attempts[attempts] = by_attempts.get(attempts, 0) + 1
639
+
640
+ click.echo(f"\n ❌ Failed files by attempt count:")
641
+ for attempts, count in sorted(by_attempts.items()):
642
+ click.echo(f" {attempts} attempt(s): {count} files")
643
+
644
+ # Show sample errors
645
+ click.echo(f"\n 📝 Sample errors (first 3):")
646
+ for f in failed_files[:3]:
647
+ file_path = f.get('file_path', 'unknown')
648
+ error = f.get('error', f.get('last_error', 'unknown error'))
649
+ # Truncate long paths and errors
650
+ if len(file_path) > 50:
651
+ file_path = '...' + file_path[-47:]
652
+ if len(error) > 60:
653
+ error = error[:57] + '...'
654
+ click.echo(f" - {file_path}")
655
+ click.echo(f" Error: {error}")
656
+
657
+ errors = checkpoint.get('errors', [])
658
+ if errors:
659
+ click.echo(f"\n ⚠️ Run errors:")
660
+ for error in errors[:3]:
661
+ click.echo(f" - {error[:80]}{'...' if len(error) > 80 else ''}")
662
+
663
+ if failed_files:
664
+ click.echo(f"\n 💡 To retry failed files:")
665
+ click.echo(f" alita inventory retry --dir <path> -g {graph} -n {name}")
666
+ click.echo(f" alita inventory retry --dir <path> -g {graph} -n {name} --force")
667
+
668
+ click.echo()
669
+
670
+
671
+ @inventory.command('stats')
672
+ @click.option('--graph', '-g', required=True, type=click.Path(exists=True),
673
+ help='Path to graph JSON file')
674
+ def stats(graph: str):
675
+ """
676
+ Show knowledge graph statistics.
677
+
678
+ Example:
679
+ alita inventory stats -g ./graph.json
680
+ """
681
+ try:
682
+ from alita_sdk.community.inventory import KnowledgeGraph
683
+
684
+ kg = KnowledgeGraph()
685
+ kg.load_from_json(graph)
686
+ stats = kg.get_stats()
687
+
688
+ click.echo(f"\n📊 Knowledge Graph Statistics")
689
+ click.echo(f" Path: {graph}")
690
+ click.echo(f"\n Entities: {stats['node_count']}")
691
+ click.echo(f" Relations: {stats['edge_count']}")
692
+
693
+ if stats['entity_types']:
694
+ click.echo(f"\n Entity Types:")
695
+ for etype, count in sorted(stats['entity_types'].items(), key=lambda x: -x[1]):
696
+ click.echo(f" - {etype}: {count}")
697
+
698
+ if stats['relation_types']:
699
+ click.echo(f"\n Relation Types:")
700
+ for rtype, count in sorted(stats['relation_types'].items(), key=lambda x: -x[1]):
701
+ click.echo(f" - {rtype}: {count}")
702
+
703
+ if stats['source_toolkits']:
704
+ click.echo(f"\n Sources: {', '.join(stats['source_toolkits'])}")
705
+
706
+ if stats['last_saved']:
707
+ click.echo(f"\n Last updated: {stats['last_saved']}")
708
+
709
+ click.echo()
710
+
711
+ except FileNotFoundError:
712
+ raise click.ClickException(f"Graph file not found: {graph}")
713
+ except Exception as e:
714
+ raise click.ClickException(str(e))
715
+
716
+
717
+ @inventory.command('search')
718
+ @click.argument('query')
719
+ @click.option('--graph', '-g', required=True, type=click.Path(exists=True),
720
+ help='Path to graph JSON file')
721
+ @click.option('--type', '-t', 'entity_type', default=None,
722
+ help='Filter by entity type')
723
+ @click.option('--limit', '-n', default=10, type=int,
724
+ help='Maximum results (default: 10)')
725
+ def search(query: str, graph: str, entity_type: Optional[str], limit: int):
726
+ """
727
+ Search for entities in the knowledge graph.
728
+
729
+ Example:
730
+ alita inventory search "Payment" -g ./graph.json
731
+ alita inventory search "User" -g ./graph.json --type class
732
+ """
733
+ try:
734
+ from alita_sdk.community.inventory import KnowledgeGraph
735
+
736
+ kg = KnowledgeGraph()
737
+ kg.load_from_json(graph)
738
+
739
+ results = kg.search(query, top_k=limit, entity_type=entity_type)
740
+
741
+ if not results:
742
+ click.echo(f"No entities found matching '{query}'")
743
+ return
744
+
745
+ click.echo(f"\n🔍 Found {len(results)} entities matching '{query}':\n")
746
+
747
+ for i, result in enumerate(results, 1):
748
+ entity = result['entity']
749
+ citation = entity.get('citation', {})
750
+
751
+ click.echo(f"{i}. {entity.get('name')} ({entity.get('type')})")
752
+
753
+ if citation:
754
+ file_path = citation.get('file_path', 'unknown')
755
+ line_info = ""
756
+ if citation.get('line_start'):
757
+ line_info = f":{citation['line_start']}"
758
+ if citation.get('line_end'):
759
+ line_info += f"-{citation['line_end']}"
760
+ click.echo(f" 📍 {file_path}{line_info}")
761
+
762
+ # Show description if available
763
+ if entity.get('description'):
764
+ desc = entity['description'][:80]
765
+ if len(entity['description']) > 80:
766
+ desc += "..."
767
+ click.echo(f" {desc}")
768
+
769
+ click.echo()
770
+
771
+ except FileNotFoundError:
772
+ raise click.ClickException(f"Graph file not found: {graph}")
773
+ except Exception as e:
774
+ raise click.ClickException(str(e))
775
+
776
+
777
+ @inventory.command('entity')
778
+ @click.argument('name')
779
+ @click.option('--graph', '-g', required=True, type=click.Path(exists=True),
780
+ help='Path to graph JSON file')
781
+ @click.option('--relations/--no-relations', default=True,
782
+ help='Include relations (default: yes)')
783
+ def entity(name: str, graph: str, relations: bool):
784
+ """
785
+ Get detailed information about an entity.
786
+
787
+ Example:
788
+ alita inventory entity "PaymentProcessor" -g ./graph.json
789
+ """
790
+ try:
791
+ from alita_sdk.community.inventory import InventoryRetrievalApiWrapper
792
+
793
+ api = InventoryRetrievalApiWrapper(graph_path=graph)
794
+ result = api.get_entity(name, include_relations=relations)
795
+
796
+ click.echo(f"\n{result}")
797
+
798
+ except FileNotFoundError:
799
+ raise click.ClickException(f"Graph file not found: {graph}")
800
+ except Exception as e:
801
+ raise click.ClickException(str(e))
802
+
803
+
804
+ @inventory.command('impact')
805
+ @click.argument('name')
806
+ @click.option('--graph', '-g', required=True, type=click.Path(exists=True),
807
+ help='Path to graph JSON file')
808
+ @click.option('--direction', '-d', type=click.Choice(['upstream', 'downstream']),
809
+ default='downstream', help='Analysis direction (default: downstream)')
810
+ @click.option('--depth', default=3, type=int,
811
+ help='Maximum traversal depth (default: 3)')
812
+ def impact(name: str, graph: str, direction: str, depth: int):
813
+ """
814
+ Analyze impact of changes to an entity.
815
+
816
+ Example:
817
+ alita inventory impact "UserService" -g ./graph.json
818
+ alita inventory impact "Database" -g ./graph.json --direction upstream
819
+ """
820
+ try:
821
+ from alita_sdk.community.inventory import InventoryRetrievalApiWrapper
822
+
823
+ api = InventoryRetrievalApiWrapper(graph_path=graph)
824
+ result = api.impact_analysis(name, direction=direction, max_depth=depth)
825
+
826
+ click.echo(f"\n{result}")
827
+
828
+ except FileNotFoundError:
829
+ raise click.ClickException(f"Graph file not found: {graph}")
830
+ except Exception as e:
831
+ raise click.ClickException(str(e))
832
+
833
+
834
+ @inventory.command('visualize')
835
+ @click.option('--graph', '-g', required=True, type=click.Path(exists=True),
836
+ help='Path to graph JSON file')
837
+ @click.option('--output', '-o', default=None, type=click.Path(),
838
+ help='Output HTML file path (default: graph_visualization.html in same dir)')
839
+ @click.option('--open/--no-open', 'open_browser', default=True,
840
+ help='Open in browser after generation (default: yes)')
841
+ @click.option('--title', '-t', default=None,
842
+ help='Title for the visualization')
843
+ def visualize(graph: str, output: Optional[str], open_browser: bool, title: Optional[str]):
844
+ """
845
+ Generate an interactive visualization of the knowledge graph.
846
+
847
+ Creates a standalone HTML file with D3.js-powered graph visualization.
848
+ Features include:
849
+ - Force-directed layout
850
+ - Color-coded entity types
851
+ - Node size based on connections
852
+ - Interactive drag, zoom, and pan
853
+ - Search and filter by entity type
854
+ - Click nodes for detailed info
855
+
856
+ Example:
857
+ alita inventory visualize -g ./graph.json
858
+ alita inventory visualize -g ./graph.json -o my_graph.html
859
+ alita inventory visualize -g ./graph.json --no-open
860
+ """
861
+ try:
862
+ from alita_sdk.community.inventory.visualize import generate_visualization
863
+ from alita_sdk.community.inventory import KnowledgeGraph
864
+ import webbrowser
865
+ import os
866
+
867
+ # Default output path
868
+ if output is None:
869
+ graph_dir = os.path.dirname(os.path.abspath(graph))
870
+ graph_name = os.path.splitext(os.path.basename(graph))[0]
871
+ output = os.path.join(graph_dir, f"{graph_name}_visualization.html")
872
+
873
+ # Default title
874
+ if title is None:
875
+ title = os.path.splitext(os.path.basename(graph))[0].replace('_', ' ').title()
876
+
877
+ click.echo(f"\n🎨 Generating graph visualization...")
878
+ click.echo(f" Source: {graph}")
879
+
880
+ # Generate visualization
881
+ html_path = generate_visualization(graph, output, title)
882
+
883
+ click.echo(f" Output: {html_path}")
884
+
885
+ # Show graph stats
886
+ kg = KnowledgeGraph()
887
+ kg.load_from_json(graph)
888
+ stats = kg.get_stats()
889
+ click.echo(f"\n 📊 Graph contains:")
890
+ click.echo(f" - {stats['node_count']} entities")
891
+ click.echo(f" - {stats['edge_count']} relations")
892
+ if stats['entity_types']:
893
+ click.echo(f" - {len(stats['entity_types'])} entity types")
894
+
895
+ if open_browser:
896
+ click.echo(f"\n Opening in browser...")
897
+ webbrowser.open(f"file://{os.path.abspath(html_path)}")
898
+
899
+ click.echo(f"\n✅ Visualization complete!")
900
+ click.echo()
901
+
902
+ except FileNotFoundError:
903
+ raise click.ClickException(f"Graph file not found: {graph}")
904
+ except ImportError as e:
905
+ raise click.ClickException(f"Visualization module not available: {e}")
906
+ except Exception as e:
907
+ raise click.ClickException(str(e))
908
+
909
+
910
+ @inventory.command('enrich')
911
+ @click.option('--graph', '-g', required=True, type=click.Path(exists=True),
912
+ help='Path to graph JSON file')
913
+ @click.option('--output', '-o', default=None, type=click.Path(),
914
+ help='Output graph file (default: overwrite input)')
915
+ @click.option('--deduplicate/--no-deduplicate', default=False,
916
+ help='Merge entities with exact same name (DISABLED by default, use with caution)')
917
+ @click.option('--cross-source/--no-cross-source', default=True,
918
+ help='Link same-named entities across sources (default: yes)')
919
+ @click.option('--semantic/--no-semantic', default=True,
920
+ help='Create semantic cross-links based on shared concepts (default: yes)')
921
+ @click.option('--orphans/--no-orphans', default=True,
922
+ help='Connect orphan nodes to related entities (default: yes)')
923
+ @click.option('--similarity/--no-similarity', default=False,
924
+ help='Link entities with similar names (default: no)')
925
+ @click.option('--dry-run', is_flag=True, default=False,
926
+ help='Show what would be done without saving')
927
+ def enrich(graph: str, output: Optional[str], deduplicate: bool, cross_source: bool,
928
+ semantic: bool, orphans: bool, similarity: bool, dry_run: bool):
929
+ """
930
+ Enrich a knowledge graph with cross-linking.
931
+
932
+ Post-processes the graph to improve connectivity by creating links:
933
+
934
+ 1. CROSS-SOURCE LINKING: Link entities across sources
935
+ - SDK class ↔ docs concept, code ↔ documentation
936
+ - Automatically determines relationship type
937
+
938
+ 2. SEMANTIC LINKING: Link entities sharing concepts
939
+ - Finds entities with overlapping significant words
940
+ - Creates LINKS between related entities
941
+ - Example: "Artifact Toolkit" --[related_to]--> "Configure Artifact Toolkit"
942
+
943
+ 3. ORPHAN LINKING: Connect isolated nodes
944
+ - Links unconnected nodes to related entities
945
+
946
+ 4. DEDUPLICATION (optional, disabled by default):
947
+ - Use --deduplicate to merge exact name matches
948
+ - Use with caution - can lose semantic meaning
949
+
950
+ Example:
951
+ alita inventory enrich -g ./graph.json
952
+ alita inventory enrich -g ./graph.json -o enriched.json
953
+ alita inventory enrich -g ./graph.json --deduplicate
954
+ alita inventory enrich -g ./graph.json --dry-run
955
+ """
956
+ try:
957
+ from alita_sdk.community.inventory.enrichment import GraphEnricher
958
+
959
+ click.echo(f"\n🔗 Enriching knowledge graph...")
960
+ click.echo(f" Source: {graph}")
961
+
962
+ enricher = GraphEnricher(graph)
963
+
964
+ # Show initial stats
965
+ initial_nodes = len(enricher.nodes_by_id)
966
+ initial_links = len(enricher.graph_data.get("links", []))
967
+ click.echo(f" Initial: {initial_nodes} nodes, {initial_links} links")
968
+
969
+ # Run enrichment
970
+ stats = enricher.enrich(
971
+ deduplicate=deduplicate,
972
+ cross_source=cross_source,
973
+ semantic_links=semantic,
974
+ orphans=orphans,
975
+ similarity=similarity,
976
+ )
977
+
978
+ click.echo(f"\n 📊 Enrichment results:")
979
+
980
+ if deduplicate:
981
+ click.echo(f" Entities merged: {stats.get('entities_merged', 0)} (exact name matches into {stats.get('merge_groups', 0)} groups)")
982
+ final_nodes = len(enricher.nodes_by_id)
983
+ click.echo(f" Node reduction: {initial_nodes} → {final_nodes}")
984
+
985
+ click.echo(f" Cross-source links: +{stats.get('cross_source_links', 0)}")
986
+
987
+ if semantic:
988
+ click.echo(f" Semantic links: +{stats.get('semantic_links', 0)}")
989
+
990
+ click.echo(f" Orphan connections: +{stats.get('orphan_links', 0)}")
991
+
992
+ if similarity:
993
+ click.echo(f" Similarity links: +{stats.get('similarity_links', 0)}")
994
+
995
+ click.echo(f" Total new links: +{len(enricher.new_links)}")
996
+
997
+ if dry_run:
998
+ click.echo(f"\n 🔍 Dry run - no changes saved")
999
+
1000
+ # Show merge examples
1001
+ if deduplicate and enricher.merged_nodes:
1002
+ click.echo(f"\n Sample merged entities:")
1003
+ for merge in enricher.merged_nodes[:5]:
1004
+ new_node = merge["new_node"]
1005
+ types = merge.get("merged_types", [])
1006
+ click.echo(f" '{new_node['name']}' [{' + '.join(set(types))}] → [{new_node['type']}]")
1007
+
1008
+ # Show link examples
1009
+ click.echo(f"\n Sample new links:")
1010
+ for link in enricher.new_links[:10]:
1011
+ src = enricher.nodes_by_id.get(link['source'], {})
1012
+ tgt = enricher.nodes_by_id.get(link['target'], {})
1013
+ click.echo(f" {src.get('name', '?')[:25]:25} --[{link['relation_type']}]--> {tgt.get('name', '?')[:25]}")
1014
+ else:
1015
+ output_path = enricher.save(output)
1016
+ click.echo(f"\n 💾 Saved to: {output_path}")
1017
+
1018
+ click.echo(f"\n✅ Enrichment complete!")
1019
+ click.echo()
1020
+
1021
+ except FileNotFoundError:
1022
+ raise click.ClickException(f"Graph file not found: {graph}")
1023
+ except ImportError as e:
1024
+ raise click.ClickException(f"Enrichment module not available: {e}")
1025
+ except Exception as e:
1026
+ raise click.ClickException(str(e))
1027
+
1028
+
1029
+ # ========== Helper Functions ==========
1030
+
1031
+ def _get_checkpoint_path(graph: str, source_name: str) -> str:
1032
+ """
1033
+ Get the checkpoint file path for a source.
1034
+
1035
+ Checkpoint files are stored in the same directory as the graph file,
1036
+ with naming pattern: .ingestion-checkpoint-{source_name}.json
1037
+
1038
+ Args:
1039
+ graph: Path to the graph JSON file
1040
+ source_name: Name of the source toolkit
1041
+
1042
+ Returns:
1043
+ Absolute path to the checkpoint file
1044
+ """
1045
+ graph_path = Path(graph).resolve()
1046
+ graph_dir = graph_path.parent
1047
+ return str(graph_dir / f".ingestion-checkpoint-{source_name}.json")
1048
+
1049
+
1050
+ def _load_toolkit_config(toolkit_path: str) -> Dict[str, Any]:
1051
+ """
1052
+ Load and parse a toolkit config JSON file.
1053
+
1054
+ Supports environment variable substitution for values like ${GITHUB_PAT}.
1055
+ """
1056
+ with open(toolkit_path, 'r') as f:
1057
+ config = json.load(f)
1058
+
1059
+ # Recursively resolve environment variables
1060
+ def resolve_env_vars(obj):
1061
+ if isinstance(obj, str):
1062
+ # Match ${VAR_NAME} pattern
1063
+ pattern = r'\$\{([^}]+)\}'
1064
+ matches = re.findall(pattern, obj)
1065
+ for var_name in matches:
1066
+ env_value = os.environ.get(var_name, '')
1067
+ obj = obj.replace(f'${{{var_name}}}', env_value)
1068
+ return obj
1069
+ elif isinstance(obj, dict):
1070
+ return {k: resolve_env_vars(v) for k, v in obj.items()}
1071
+ elif isinstance(obj, list):
1072
+ return [resolve_env_vars(item) for item in obj]
1073
+ return obj
1074
+
1075
+ return resolve_env_vars(config)
1076
+
1077
+
1078
+ def _get_llm(ctx, model: Optional[str] = None, temperature: float = 0.0):
1079
+ """Get LLM instance from Alita client context."""
1080
+ from .cli import get_client
1081
+
1082
+ # Get Alita client - this will raise ClickException if not configured
1083
+ client = get_client(ctx)
1084
+
1085
+ # Get model name from parameter or default
1086
+ model_name = model or 'gpt-4o-mini'
1087
+
1088
+ # Use client.get_llm() which is the actual method
1089
+ return client.get_llm(
1090
+ model_name=model_name,
1091
+ model_config={
1092
+ 'temperature': temperature,
1093
+ 'max_tokens': 4096
1094
+ }
1095
+ )
1096
+
1097
+
1098
+ def _get_source_toolkit(toolkit_config: Dict[str, Any]):
1099
+ """
1100
+ Get configured source toolkit instance from toolkit config.
1101
+
1102
+ Uses the SDK's toolkit factory pattern - all toolkits extend BaseCodeToolApiWrapper
1103
+ or BaseVectorStoreToolApiWrapper, which have loader() and chunker() methods.
1104
+
1105
+ Also supports CLI-specific toolkits like 'filesystem' for local document loading.
1106
+
1107
+ Args:
1108
+ toolkit_config: Toolkit configuration dict with 'type' and settings
1109
+
1110
+ Returns:
1111
+ API wrapper instance with loader() method
1112
+ """
1113
+ source = toolkit_config.get('type')
1114
+ if not source:
1115
+ raise click.ClickException("Toolkit config missing 'type' field")
1116
+
1117
+ # Handle filesystem type (CLI-specific, not in AVAILABLE_TOOLS)
1118
+ if source == 'filesystem':
1119
+ from .tools.filesystem import FilesystemApiWrapper
1120
+
1121
+ base_directory = (
1122
+ toolkit_config.get('base_directory') or
1123
+ toolkit_config.get('path') or
1124
+ toolkit_config.get('git_root_dir')
1125
+ )
1126
+
1127
+ if not base_directory:
1128
+ raise click.ClickException(
1129
+ "Filesystem toolkit requires 'base_directory' or 'path' field"
1130
+ )
1131
+
1132
+ return FilesystemApiWrapper(
1133
+ base_directory=base_directory,
1134
+ recursive=toolkit_config.get('recursive', True),
1135
+ follow_symlinks=toolkit_config.get('follow_symlinks', False),
1136
+ )
1137
+
1138
+ # Handle standard SDK toolkits via AVAILABLE_TOOLS registry
1139
+ from alita_sdk.tools import AVAILABLE_TOOLS
1140
+
1141
+ # Check if toolkit type is available
1142
+ if source not in AVAILABLE_TOOLS:
1143
+ raise click.ClickException(
1144
+ f"Unknown toolkit type: {source}. "
1145
+ f"Available: {', '.join(list(AVAILABLE_TOOLS.keys()) + ['filesystem'])}"
1146
+ )
1147
+
1148
+ toolkit_info = AVAILABLE_TOOLS[source]
1149
+
1150
+ # Get the toolkit class
1151
+ if 'toolkit_class' not in toolkit_info:
1152
+ raise click.ClickException(
1153
+ f"Toolkit '{source}' does not have a toolkit_class registered"
1154
+ )
1155
+
1156
+ toolkit_class = toolkit_info['toolkit_class']
1157
+
1158
+ # Build kwargs from toolkit config - we need to map config to API wrapper params
1159
+ kwargs = dict(toolkit_config)
1160
+
1161
+ # Remove fields that aren't needed for the API wrapper
1162
+ kwargs.pop('type', None)
1163
+ kwargs.pop('toolkit_name', None)
1164
+ kwargs.pop('selected_tools', None)
1165
+ kwargs.pop('excluded_tools', None)
1166
+
1167
+ # Handle common config patterns - flatten nested configurations
1168
+ config_key = f"{source}_configuration"
1169
+ if config_key in kwargs:
1170
+ nested_config = kwargs.pop(config_key)
1171
+ if isinstance(nested_config, dict):
1172
+ kwargs.update(nested_config)
1173
+
1174
+ # Handle ADO-specific config pattern
1175
+ if 'ado_configuration' in kwargs:
1176
+ ado_config = kwargs.pop('ado_configuration')
1177
+ if isinstance(ado_config, dict):
1178
+ kwargs.update(ado_config)
1179
+
1180
+ # Expand environment variables in string values (e.g., ${GITHUB_PAT})
1181
+ def expand_env_vars(value):
1182
+ """Recursively expand environment variables in values."""
1183
+ if isinstance(value, str):
1184
+ import re
1185
+ # Match ${VAR} or $VAR patterns
1186
+ pattern = r'\$\{([^}]+)\}|\$([A-Za-z_][A-Za-z0-9_]*)'
1187
+ def replace(match):
1188
+ var_name = match.group(1) or match.group(2)
1189
+ return os.environ.get(var_name, match.group(0))
1190
+ return re.sub(pattern, replace, value)
1191
+ elif isinstance(value, dict):
1192
+ return {k: expand_env_vars(v) for k, v in value.items()}
1193
+ elif isinstance(value, list):
1194
+ return [expand_env_vars(v) for v in value]
1195
+ return value
1196
+
1197
+ kwargs = expand_env_vars(kwargs)
1198
+
1199
+ # Map common field names to API wrapper expected names
1200
+ # GitHub: personal_access_token -> github_access_token
1201
+ if 'personal_access_token' in kwargs and source == 'github':
1202
+ kwargs['github_access_token'] = kwargs.pop('personal_access_token')
1203
+
1204
+ # GitHub: repository -> github_repository
1205
+ if 'repository' in kwargs and source == 'github':
1206
+ kwargs['github_repository'] = kwargs.pop('repository')
1207
+
1208
+ # Ensure active_branch has a default
1209
+ if 'active_branch' not in kwargs:
1210
+ kwargs['active_branch'] = kwargs.get('base_branch', 'main')
1211
+
1212
+ # Get the API wrapper class from the toolkit
1213
+ # Introspect toolkit to find the API wrapper class it uses
1214
+ try:
1215
+ # Try to get the API wrapper class from the toolkit's module
1216
+ import importlib
1217
+ module_path = f"alita_sdk.tools.{source}.api_wrapper"
1218
+ try:
1219
+ wrapper_module = importlib.import_module(module_path)
1220
+ except ImportError:
1221
+ # Try alternate path for nested modules
1222
+ module_path = f"alita_sdk.tools.{source.replace('_', '.')}.api_wrapper"
1223
+ wrapper_module = importlib.import_module(module_path)
1224
+
1225
+ # Find the API wrapper class - look for class containing ApiWrapper/APIWrapper
1226
+ api_wrapper_class = None
1227
+ for name in dir(wrapper_module):
1228
+ obj = getattr(wrapper_module, name)
1229
+ if (isinstance(obj, type) and
1230
+ ('ApiWrapper' in name or 'APIWrapper' in name) and
1231
+ name not in ('BaseCodeToolApiWrapper', 'BaseVectorStoreToolApiWrapper', 'BaseToolApiWrapper')):
1232
+ api_wrapper_class = obj
1233
+ break
1234
+
1235
+ if not api_wrapper_class:
1236
+ raise click.ClickException(
1237
+ f"Could not find API wrapper class in {module_path}"
1238
+ )
1239
+
1240
+ # Instantiate the API wrapper directly
1241
+ api_wrapper = api_wrapper_class(**kwargs)
1242
+
1243
+ # Verify it has loader method
1244
+ if not hasattr(api_wrapper, 'loader'):
1245
+ raise click.ClickException(
1246
+ f"API wrapper '{api_wrapper_class.__name__}' has no loader() method"
1247
+ )
1248
+
1249
+ return api_wrapper
1250
+
1251
+ except ImportError as e:
1252
+ logger.exception(f"Failed to import API wrapper for {source}")
1253
+ raise click.ClickException(f"Failed to import {source} API wrapper: {e}")
1254
+ except Exception as e:
1255
+ logger.exception(f"Failed to instantiate toolkit {source}")
1256
+ raise click.ClickException(f"Failed to create {source} toolkit: {e}")