alita-sdk 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3794 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +11 -0
- alita_sdk/configurations/ado.py +148 -2
- alita_sdk/configurations/azure_search.py +1 -1
- alita_sdk/configurations/bigquery.py +1 -1
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/browser.py +18 -0
- alita_sdk/configurations/carrier.py +19 -0
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/delta_lake.py +1 -1
- alita_sdk/configurations/figma.py +76 -5
- alita_sdk/configurations/github.py +65 -1
- alita_sdk/configurations/gitlab.py +81 -0
- alita_sdk/configurations/google_places.py +17 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +323 -0
- alita_sdk/configurations/postman.py +1 -1
- alita_sdk/configurations/qtest.py +72 -3
- alita_sdk/configurations/report_portal.py +115 -0
- alita_sdk/configurations/salesforce.py +19 -0
- alita_sdk/configurations/service_now.py +1 -12
- alita_sdk/configurations/sharepoint.py +167 -0
- alita_sdk/configurations/sonar.py +18 -0
- alita_sdk/configurations/sql.py +20 -0
- alita_sdk/configurations/testio.py +101 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +94 -1
- alita_sdk/configurations/zephyr_enterprise.py +94 -1
- alita_sdk/configurations/zephyr_essential.py +95 -0
- alita_sdk/runtime/clients/artifact.py +21 -4
- alita_sdk/runtime/clients/client.py +458 -67
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +352 -0
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +183 -43
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
- alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +493 -105
- alita_sdk/runtime/langchain/utils.py +118 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +28 -0
- alita_sdk/runtime/toolkits/application.py +14 -4
- alita_sdk/runtime/toolkits/artifact.py +25 -9
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +782 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +11 -6
- alita_sdk/runtime/toolkits/tools.py +314 -70
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +24 -0
- alita_sdk/runtime/tools/application.py +16 -4
- alita_sdk/runtime/tools/artifact.py +367 -33
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +100 -4
- alita_sdk/runtime/tools/graph.py +81 -0
- alita_sdk/runtime/tools/image_generation.py +218 -0
- alita_sdk/runtime/tools/llm.py +1032 -177
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +375 -0
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +69 -65
- alita_sdk/runtime/tools/vectorstore_base.py +163 -90
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +41 -14
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +48 -0
- alita_sdk/tools/__init__.py +135 -37
- alita_sdk/tools/ado/__init__.py +2 -2
- alita_sdk/tools/ado/repos/__init__.py +16 -19
- alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
- alita_sdk/tools/ado/test_plan/__init__.py +27 -8
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
- alita_sdk/tools/ado/wiki/__init__.py +28 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
- alita_sdk/tools/ado/work_item/__init__.py +28 -12
- alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +13 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +15 -11
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +14 -8
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +454 -110
- alita_sdk/tools/bitbucket/__init__.py +28 -19
- alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
- alita_sdk/tools/browser/__init__.py +41 -16
- alita_sdk/tools/browser/crawler.py +3 -1
- alita_sdk/tools/browser/utils.py +15 -6
- alita_sdk/tools/carrier/__init__.py +18 -17
- alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
- alita_sdk/tools/carrier/excel_reporter.py +8 -4
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/codeparser.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +12 -7
- alita_sdk/tools/cloud/azure/__init__.py +12 -7
- alita_sdk/tools/cloud/gcp/__init__.py +12 -7
- alita_sdk/tools/cloud/k8s/__init__.py +12 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +21 -13
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +22 -14
- alita_sdk/tools/confluence/api_wrapper.py +197 -58
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +546 -64
- alita_sdk/tools/figma/__init__.py +60 -11
- alita_sdk/tools/figma/api_wrapper.py +1400 -167
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +18 -17
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +81 -12
- alita_sdk/tools/github/schemas.py +2 -1
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +19 -13
- alita_sdk/tools/gitlab/api_wrapper.py +256 -80
- alita_sdk/tools/gitlab_org/__init__.py +14 -10
- alita_sdk/tools/google/bigquery/__init__.py +14 -13
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +21 -11
- alita_sdk/tools/jira/__init__.py +22 -11
- alita_sdk/tools/jira/api_wrapper.py +315 -168
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +38 -14
- alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1357 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +40 -45
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +11 -11
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/qtest/__init__.py +22 -14
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +13 -10
- alita_sdk/tools/report_portal/__init__.py +23 -16
- alita_sdk/tools/salesforce/__init__.py +22 -16
- alita_sdk/tools/servicenow/__init__.py +21 -16
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +17 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +13 -8
- alita_sdk/tools/sql/__init__.py +22 -19
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +21 -13
- alita_sdk/tools/testrail/__init__.py +13 -11
- alita_sdk/tools/testrail/api_wrapper.py +214 -46
- alita_sdk/tools/utils/__init__.py +28 -4
- alita_sdk/tools/utils/content_parser.py +241 -55
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
- alita_sdk/tools/xray/__init__.py +18 -14
- alita_sdk/tools/xray/api_wrapper.py +58 -113
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +12 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +16 -9
- alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
- alita_sdk/tools/zephyr_essential/__init__.py +16 -10
- alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
- alita_sdk/tools/zephyr_essential/client.py +6 -4
- alita_sdk/tools/zephyr_scale/__init__.py +13 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
- alita_sdk/tools/zephyr_squad/__init__.py +12 -7
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +184 -37
- alita_sdk-0.3.584.dist-info/RECORD +452 -0
- alita_sdk-0.3.584.dist-info/entry_points.txt +2 -0
- alita_sdk/tools/bitbucket/tools.py +0 -304
- alita_sdk-0.3.257.dist-info/RECORD +0 -343
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1073 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI commands for Inventory Ingestion Pipeline.
|
|
3
|
+
|
|
4
|
+
Provides command-line interface for running knowledge graph ingestion
|
|
5
|
+
from various source toolkits (GitHub, ADO, LocalGit, etc.).
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# List available presets
|
|
9
|
+
alita inventory presets
|
|
10
|
+
|
|
11
|
+
# Ingest using a preset (recommended!)
|
|
12
|
+
alita inventory ingest --dir ./my-project --graph ./graph.json --preset python
|
|
13
|
+
|
|
14
|
+
# Ingest using a toolkit config file
|
|
15
|
+
alita inventory ingest --toolkit .alita/tools/github.json --graph ./graph.json -w "*.md"
|
|
16
|
+
|
|
17
|
+
# Ingest from a local git repository
|
|
18
|
+
alita inventory ingest --source localgit --path /path/to/repo --graph ./graph.json
|
|
19
|
+
|
|
20
|
+
# Use a config file for LLM/embedding/guardrails settings
|
|
21
|
+
alita inventory ingest --toolkit ./github.json -g ./graph.json --config ingestion-config.yml
|
|
22
|
+
|
|
23
|
+
# Check ingestion status (failed files, progress)
|
|
24
|
+
alita inventory status --graph ./graph.json --name my-source
|
|
25
|
+
|
|
26
|
+
# Retry failed files from previous ingestion
|
|
27
|
+
alita inventory retry --dir ./my-project -g ./graph.json --name my-source
|
|
28
|
+
alita inventory retry --dir ./my-project -g ./graph.json --name my-source --force
|
|
29
|
+
|
|
30
|
+
# Generate config template
|
|
31
|
+
alita inventory init-config
|
|
32
|
+
|
|
33
|
+
# Show graph stats
|
|
34
|
+
alita inventory stats --graph ./graph.json
|
|
35
|
+
|
|
36
|
+
# Search the graph
|
|
37
|
+
alita inventory search "PaymentService" --graph ./graph.json
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
import click
|
|
41
|
+
import json
|
|
42
|
+
import logging
|
|
43
|
+
import os
|
|
44
|
+
import re
|
|
45
|
+
import sys
|
|
46
|
+
from pathlib import Path
|
|
47
|
+
from typing import Optional, List, Dict, Any
|
|
48
|
+
|
|
49
|
+
logger = logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@click.group()
|
|
53
|
+
def inventory():
|
|
54
|
+
"""Inventory knowledge graph commands."""
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@inventory.command('presets')
|
|
59
|
+
def presets():
|
|
60
|
+
"""
|
|
61
|
+
List available ingestion presets.
|
|
62
|
+
|
|
63
|
+
Presets provide pre-configured whitelist/blacklist patterns for common
|
|
64
|
+
programming languages and project types.
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
alita inventory presets
|
|
68
|
+
"""
|
|
69
|
+
from alita_sdk.community.inventory import list_presets, get_preset
|
|
70
|
+
|
|
71
|
+
available = list_presets()
|
|
72
|
+
|
|
73
|
+
click.echo(f"\n📋 Available Presets ({len(available)} total):\n")
|
|
74
|
+
|
|
75
|
+
# Group by category
|
|
76
|
+
categories = {
|
|
77
|
+
'Python': [p for p in available if 'python' in p.lower()],
|
|
78
|
+
'JavaScript/TypeScript': [p for p in available if any(x in p.lower() for x in ['javascript', 'typescript', 'react', 'next', 'node'])],
|
|
79
|
+
'Java': [p for p in available if 'java' in p.lower() or 'maven' in p.lower() or 'gradle' in p.lower() or 'spring' in p.lower()],
|
|
80
|
+
'.NET/C#': [p for p in available if 'dotnet' in p.lower() or 'csharp' in p.lower() or 'aspnet' in p.lower()],
|
|
81
|
+
'Multi-Language': [p for p in available if any(x in p.lower() for x in ['fullstack', 'monorepo', 'documentation'])],
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
for category, preset_names in categories.items():
|
|
85
|
+
if not preset_names:
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
click.echo(f" {category}:")
|
|
89
|
+
for preset_name in sorted(preset_names):
|
|
90
|
+
preset_config = get_preset(preset_name)
|
|
91
|
+
whitelist = preset_config.get('whitelist', [])
|
|
92
|
+
blacklist = preset_config.get('blacklist', [])
|
|
93
|
+
|
|
94
|
+
# Format whitelist (show first 3 patterns)
|
|
95
|
+
wl_display = ', '.join(whitelist[:3])
|
|
96
|
+
if len(whitelist) > 3:
|
|
97
|
+
wl_display += f', ... (+{len(whitelist)-3})'
|
|
98
|
+
|
|
99
|
+
click.echo(f" • {preset_name:20} - {wl_display}")
|
|
100
|
+
|
|
101
|
+
click.echo()
|
|
102
|
+
|
|
103
|
+
click.echo("Usage:")
|
|
104
|
+
click.echo(" alita inventory ingest --dir ./my-project -g ./graph.json --preset python")
|
|
105
|
+
click.echo(" alita inventory ingest --dir ./src -g ./graph.json -p typescript -w '*.json'")
|
|
106
|
+
click.echo()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@inventory.command('init-config')
|
|
110
|
+
@click.option('--output', '-o', default='./ingestion-config.yml', type=click.Path(),
|
|
111
|
+
help='Output path for config template')
|
|
112
|
+
def init_config(output: str):
|
|
113
|
+
"""
|
|
114
|
+
Generate a configuration template file.
|
|
115
|
+
|
|
116
|
+
Example:
|
|
117
|
+
alita inventory init-config -o ./my-config.yml
|
|
118
|
+
"""
|
|
119
|
+
from alita_sdk.community.inventory import generate_config_template
|
|
120
|
+
|
|
121
|
+
path = generate_config_template(output)
|
|
122
|
+
click.echo(f"✅ Configuration template created: {path}")
|
|
123
|
+
click.echo("\nEdit this file to configure:")
|
|
124
|
+
click.echo(" - LLM provider and model (openai, azure, anthropic, ollama)")
|
|
125
|
+
click.echo(" - Embeddings for semantic search")
|
|
126
|
+
click.echo(" - Guardrails (rate limits, content filtering, thresholds)")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@inventory.command('ingest')
|
|
130
|
+
@click.option('--toolkit', '-t', type=click.Path(exists=True),
|
|
131
|
+
help='Path to toolkit config JSON (e.g., .alita/tools/github.json)')
|
|
132
|
+
@click.option('--dir', '-d', 'directory', type=click.Path(exists=True, file_okay=False, dir_okay=True),
|
|
133
|
+
help='Local directory to ingest (alternative to --toolkit for local files)')
|
|
134
|
+
@click.option('--graph', '-g', required=True, type=click.Path(),
|
|
135
|
+
help='Path to output graph JSON file')
|
|
136
|
+
@click.option('--config', '-c', type=click.Path(exists=True),
|
|
137
|
+
help='Path to YAML/JSON config file for LLM, embeddings, guardrails')
|
|
138
|
+
@click.option('--preset', '-p', default=None,
|
|
139
|
+
help='Use a preset configuration (e.g., python, typescript, java, dotnet)')
|
|
140
|
+
@click.option('--whitelist', '-w', multiple=True,
|
|
141
|
+
help='File patterns to include (e.g., -w "*.py" -w "*.md")')
|
|
142
|
+
@click.option('--blacklist', '-x', multiple=True,
|
|
143
|
+
help='File patterns to exclude (e.g., -x "*test*" -x "*vendor*")')
|
|
144
|
+
@click.option('--no-relations', is_flag=True,
|
|
145
|
+
help='Skip relation extraction (faster)')
|
|
146
|
+
@click.option('--model', '-m', default=None,
|
|
147
|
+
help='LLM model name (overrides config file)')
|
|
148
|
+
@click.option('--limit', '-l', type=int, default=None,
|
|
149
|
+
help='Limit number of documents to process (for testing)')
|
|
150
|
+
@click.option('--fresh', '-f', is_flag=True,
|
|
151
|
+
help='Start fresh - delete existing graph and create new one')
|
|
152
|
+
@click.option('--name', '-n', default=None,
|
|
153
|
+
help='Source name for the graph (default: directory name or toolkit_name)')
|
|
154
|
+
@click.option('--recursive/--no-recursive', default=True,
|
|
155
|
+
help='Recursively scan subdirectories (default: recursive)')
|
|
156
|
+
@click.pass_context
|
|
157
|
+
def ingest(ctx, toolkit: Optional[str], directory: Optional[str], graph: str,
|
|
158
|
+
config: Optional[str], preset: Optional[str], whitelist: tuple, blacklist: tuple,
|
|
159
|
+
no_relations: bool, model: Optional[str], limit: Optional[int],
|
|
160
|
+
fresh: bool, name: Optional[str], recursive: bool):
|
|
161
|
+
"""Run ingestion pipeline to build/update a knowledge graph.
|
|
162
|
+
|
|
163
|
+
Use --toolkit for configured sources (GitHub, ADO, etc.) or --dir for
|
|
164
|
+
local directories (simpler, no config needed).
|
|
165
|
+
|
|
166
|
+
\b
|
|
167
|
+
Examples:
|
|
168
|
+
alita inventory ingest --dir ./src -g graph.json --preset python
|
|
169
|
+
alita inventory ingest --dir ./src -g graph.json -w "*.py" -w "*.md"
|
|
170
|
+
alita inventory ingest --dir ./src -g graph.json -p typescript -w "*.json"
|
|
171
|
+
alita inventory ingest --dir ./docs -g graph.json --name my-docs
|
|
172
|
+
alita inventory ingest -t github.json -g graph.json -w "*.md"
|
|
173
|
+
alita inventory ingest --dir ./src -g graph.json -c config.yml
|
|
174
|
+
"""
|
|
175
|
+
# Load preset configuration if specified
|
|
176
|
+
preset_whitelist = []
|
|
177
|
+
preset_blacklist = []
|
|
178
|
+
|
|
179
|
+
if preset:
|
|
180
|
+
from alita_sdk.community.inventory import get_preset, list_presets
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
preset_config = get_preset(preset)
|
|
184
|
+
preset_whitelist = preset_config.get('whitelist', [])
|
|
185
|
+
preset_blacklist = preset_config.get('blacklist', [])
|
|
186
|
+
|
|
187
|
+
click.echo(f"📋 Using preset: {preset}")
|
|
188
|
+
if preset_whitelist:
|
|
189
|
+
click.echo(f" Whitelist: {', '.join(preset_whitelist)}")
|
|
190
|
+
if preset_blacklist:
|
|
191
|
+
click.echo(f" Blacklist: {', '.join(preset_blacklist)}")
|
|
192
|
+
except ValueError as e:
|
|
193
|
+
available = ', '.join(list_presets())
|
|
194
|
+
raise click.ClickException(f"Unknown preset '{preset}'. Available: {available}")
|
|
195
|
+
|
|
196
|
+
# Merge preset patterns with user-provided patterns
|
|
197
|
+
# User patterns are added after preset patterns (more specific)
|
|
198
|
+
final_whitelist = list(preset_whitelist) + list(whitelist)
|
|
199
|
+
final_blacklist = list(preset_blacklist) + list(blacklist)
|
|
200
|
+
|
|
201
|
+
# Validate: must have either --toolkit or --dir
|
|
202
|
+
if not toolkit and not directory:
|
|
203
|
+
raise click.ClickException("Must specify either --toolkit or --dir")
|
|
204
|
+
|
|
205
|
+
if toolkit and directory:
|
|
206
|
+
raise click.ClickException("Cannot use both --toolkit and --dir. Choose one.")
|
|
207
|
+
|
|
208
|
+
# Handle --dir mode (simple local directory ingestion)
|
|
209
|
+
if directory:
|
|
210
|
+
from pathlib import Path
|
|
211
|
+
dir_path = Path(directory).resolve()
|
|
212
|
+
source_name = name or dir_path.name
|
|
213
|
+
source_type = 'filesystem'
|
|
214
|
+
|
|
215
|
+
click.echo(f"📂 Ingesting local directory: {dir_path}")
|
|
216
|
+
click.echo(f" Name: {source_name}")
|
|
217
|
+
click.echo(f" Recursive: {recursive}")
|
|
218
|
+
|
|
219
|
+
# Create a simple toolkit config for the directory
|
|
220
|
+
toolkit_config = {
|
|
221
|
+
'type': 'filesystem',
|
|
222
|
+
'toolkit_name': source_name,
|
|
223
|
+
'base_directory': str(dir_path),
|
|
224
|
+
'recursive': recursive,
|
|
225
|
+
}
|
|
226
|
+
branch = None # No branch for filesystem
|
|
227
|
+
else:
|
|
228
|
+
# Load toolkit config
|
|
229
|
+
toolkit_config = _load_toolkit_config(toolkit)
|
|
230
|
+
click.echo(f"📦 Loaded toolkit config: {toolkit}")
|
|
231
|
+
|
|
232
|
+
# Get source type from toolkit
|
|
233
|
+
source_type = toolkit_config.get('type')
|
|
234
|
+
if not source_type:
|
|
235
|
+
raise click.ClickException(f"Toolkit config missing 'type' field: {toolkit}")
|
|
236
|
+
click.echo(f" Type: {source_type}")
|
|
237
|
+
|
|
238
|
+
# Get toolkit name (used as source identifier in the graph)
|
|
239
|
+
source_name = name or toolkit_config.get('toolkit_name') or source_type
|
|
240
|
+
click.echo(f" Name: {source_name}")
|
|
241
|
+
|
|
242
|
+
# Get repo/branch from toolkit config
|
|
243
|
+
repo = toolkit_config.get('repository')
|
|
244
|
+
if repo:
|
|
245
|
+
click.echo(f" Repository: {repo}")
|
|
246
|
+
|
|
247
|
+
branch = toolkit_config.get('active_branch') or toolkit_config.get('base_branch') or 'main'
|
|
248
|
+
click.echo(f" Branch: {branch}")
|
|
249
|
+
|
|
250
|
+
# Get path for local sources (filesystem or localgit)
|
|
251
|
+
path = (
|
|
252
|
+
toolkit_config.get('base_directory') or # filesystem toolkit
|
|
253
|
+
toolkit_config.get('git_root_dir') or # localgit toolkit
|
|
254
|
+
toolkit_config.get('path') # generic path
|
|
255
|
+
)
|
|
256
|
+
if path:
|
|
257
|
+
click.echo(f" Path: {path}")
|
|
258
|
+
|
|
259
|
+
# Validate required fields based on source type
|
|
260
|
+
if source_type in ('github', 'ado') and not repo:
|
|
261
|
+
raise click.ClickException(f"Toolkit config missing 'repository' for source '{source_type}'")
|
|
262
|
+
|
|
263
|
+
if source_type == 'filesystem' and not path:
|
|
264
|
+
raise click.ClickException(f"Toolkit config missing 'base_directory' or 'path' for source '{source_type}'")
|
|
265
|
+
|
|
266
|
+
if source_type == 'localgit' and not path:
|
|
267
|
+
raise click.ClickException(f"Toolkit config missing 'git_root_dir' or 'path' for source '{source_type}'")
|
|
268
|
+
|
|
269
|
+
# Handle --fresh option: delete existing graph
|
|
270
|
+
if fresh and os.path.exists(graph):
|
|
271
|
+
click.echo(f"🗑️ Fresh mode: deleting existing graph at {graph}")
|
|
272
|
+
os.remove(graph)
|
|
273
|
+
|
|
274
|
+
click.echo(f"🚀 Starting ingestion from {source_name} ({source_type})...")
|
|
275
|
+
|
|
276
|
+
# Progress callback
|
|
277
|
+
def progress(message: str, phase: str):
|
|
278
|
+
click.echo(f" [{phase}] {message}")
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
from alita_sdk.community.inventory import IngestionPipeline, IngestionConfig
|
|
282
|
+
|
|
283
|
+
# Load configuration
|
|
284
|
+
if config:
|
|
285
|
+
click.echo(f"📋 Loading config from {config}")
|
|
286
|
+
if config.endswith('.yml') or config.endswith('.yaml'):
|
|
287
|
+
ingestion_config = IngestionConfig.from_yaml(config)
|
|
288
|
+
else:
|
|
289
|
+
ingestion_config = IngestionConfig.from_json(config)
|
|
290
|
+
|
|
291
|
+
# Override model if specified on command line
|
|
292
|
+
if model:
|
|
293
|
+
ingestion_config.llm_model = model
|
|
294
|
+
|
|
295
|
+
# Override graph path
|
|
296
|
+
ingestion_config.graph_path = graph
|
|
297
|
+
|
|
298
|
+
# Get LLM using the model name and temperature from config
|
|
299
|
+
llm = _get_llm(ctx, ingestion_config.llm_model, ingestion_config.temperature)
|
|
300
|
+
|
|
301
|
+
pipeline = IngestionPipeline(
|
|
302
|
+
llm=llm,
|
|
303
|
+
graph_path=ingestion_config.graph_path,
|
|
304
|
+
guardrails=ingestion_config.guardrails,
|
|
305
|
+
)
|
|
306
|
+
else:
|
|
307
|
+
# Fall back to environment-based config
|
|
308
|
+
click.echo("📋 Loading config from environment")
|
|
309
|
+
llm = _get_llm(ctx, model)
|
|
310
|
+
pipeline = IngestionPipeline(
|
|
311
|
+
llm=llm,
|
|
312
|
+
graph_path=graph,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Set progress callback
|
|
316
|
+
pipeline.progress_callback = progress
|
|
317
|
+
|
|
318
|
+
# Show existing graph status
|
|
319
|
+
graph_stats = pipeline.get_stats()
|
|
320
|
+
if graph_stats['node_count'] > 0:
|
|
321
|
+
click.echo(f"📊 Existing graph: {graph_stats['node_count']} entities, {graph_stats['edge_count']} relations")
|
|
322
|
+
click.echo(" New entities will be ADDED to existing graph")
|
|
323
|
+
|
|
324
|
+
# Get source toolkit from config and register it
|
|
325
|
+
source_toolkit = _get_source_toolkit(toolkit_config)
|
|
326
|
+
|
|
327
|
+
# Create a RunnableConfig for CLI context - this allows dispatch_custom_event to work
|
|
328
|
+
# without being inside a LangChain agent run
|
|
329
|
+
import uuid
|
|
330
|
+
cli_runnable_config = {
|
|
331
|
+
'run_id': uuid.uuid4(),
|
|
332
|
+
'tags': ['cli', 'inventory', 'ingest'],
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
# Set the runnable config on the toolkit if it supports it
|
|
336
|
+
if hasattr(source_toolkit, 'set_runnable_config'):
|
|
337
|
+
source_toolkit.set_runnable_config(cli_runnable_config)
|
|
338
|
+
|
|
339
|
+
pipeline.register_toolkit(source_name, source_toolkit)
|
|
340
|
+
|
|
341
|
+
# Run ingestion
|
|
342
|
+
if limit:
|
|
343
|
+
click.echo(f"⚠️ Limiting to {limit} documents (test mode)")
|
|
344
|
+
|
|
345
|
+
result = pipeline.run(
|
|
346
|
+
source=source_name,
|
|
347
|
+
branch=branch,
|
|
348
|
+
whitelist=final_whitelist if final_whitelist else None,
|
|
349
|
+
blacklist=final_blacklist if final_blacklist else None,
|
|
350
|
+
extract_relations=not no_relations,
|
|
351
|
+
max_documents=limit,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Show result
|
|
355
|
+
if result.success:
|
|
356
|
+
click.echo(f"\n✅ Ingestion complete!")
|
|
357
|
+
click.echo(f" Documents processed: {result.documents_processed}")
|
|
358
|
+
click.echo(f" Entities extracted: {result.entities_added}")
|
|
359
|
+
click.echo(f" Relations extracted: {result.relations_added}")
|
|
360
|
+
click.echo(f" Duration: {result.duration_seconds:.1f}s")
|
|
361
|
+
click.echo(f" Graph saved to: {graph}")
|
|
362
|
+
|
|
363
|
+
# Show failed documents info if any
|
|
364
|
+
if result.failed_documents:
|
|
365
|
+
click.echo(f"\n⚠️ {len(result.failed_documents)} documents failed to process")
|
|
366
|
+
click.echo(f" Run 'alita inventory status -g {graph} -n {source_name}' to see details")
|
|
367
|
+
click.echo(f" Run 'alita inventory retry ...' to retry failed files")
|
|
368
|
+
else:
|
|
369
|
+
click.echo(f"\n❌ Ingestion failed!")
|
|
370
|
+
for error in result.errors:
|
|
371
|
+
click.echo(f" Error: {error}")
|
|
372
|
+
sys.exit(1)
|
|
373
|
+
|
|
374
|
+
except Exception as e:
|
|
375
|
+
logger.exception("Ingestion failed")
|
|
376
|
+
raise click.ClickException(str(e))
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
@inventory.command('retry')
|
|
380
|
+
@click.option('--toolkit', '-t', type=click.Path(exists=True),
|
|
381
|
+
help='Path to toolkit config JSON (e.g., .alita/tools/github.json)')
|
|
382
|
+
@click.option('--dir', '-d', 'directory', type=click.Path(exists=True, file_okay=False, dir_okay=True),
|
|
383
|
+
help='Local directory to ingest (alternative to --toolkit for local files)')
|
|
384
|
+
@click.option('--graph', '-g', required=True, type=click.Path(exists=True),
|
|
385
|
+
help='Path to graph JSON file')
|
|
386
|
+
@click.option('--config', '-c', type=click.Path(exists=True),
|
|
387
|
+
help='Path to YAML/JSON config file for LLM, embeddings, guardrails')
|
|
388
|
+
@click.option('--no-relations', is_flag=True,
|
|
389
|
+
help='Skip relation extraction (faster)')
|
|
390
|
+
@click.option('--model', '-m', default=None,
|
|
391
|
+
help='LLM model name (overrides config file)')
|
|
392
|
+
@click.option('--name', '-n', required=True,
|
|
393
|
+
help='Source name (must match the name used during original ingestion)')
|
|
394
|
+
@click.option('--force', '-f', is_flag=True,
|
|
395
|
+
help='Retry all failed files regardless of attempt count')
|
|
396
|
+
@click.option('--recursive/--no-recursive', default=True,
|
|
397
|
+
help='Recursively scan subdirectories (default: recursive)')
|
|
398
|
+
@click.pass_context
|
|
399
|
+
def retry(ctx, toolkit: Optional[str], directory: Optional[str], graph: str,
|
|
400
|
+
config: Optional[str], no_relations: bool, model: Optional[str],
|
|
401
|
+
name: str, force: bool, recursive: bool):
|
|
402
|
+
"""Retry ingestion for files that failed in a previous run.
|
|
403
|
+
|
|
404
|
+
Reads the checkpoint file to find failed files and re-ingests them.
|
|
405
|
+
Use --force to retry all failed files regardless of previous attempt count.
|
|
406
|
+
|
|
407
|
+
\b
|
|
408
|
+
Examples:
|
|
409
|
+
alita inventory retry --dir ./src -g graph.json -n my-source
|
|
410
|
+
alita inventory retry --dir ./src -g graph.json -n my-source --force
|
|
411
|
+
alita inventory retry -t github.json -g graph.json -n github-repo
|
|
412
|
+
"""
|
|
413
|
+
# Validate: must have either --toolkit or --dir
|
|
414
|
+
if not toolkit and not directory:
|
|
415
|
+
raise click.ClickException("Must specify either --toolkit or --dir")
|
|
416
|
+
|
|
417
|
+
if toolkit and directory:
|
|
418
|
+
raise click.ClickException("Cannot use both --toolkit and --dir. Choose one.")
|
|
419
|
+
|
|
420
|
+
# Check if checkpoint exists
|
|
421
|
+
checkpoint_path = _get_checkpoint_path(graph, name)
|
|
422
|
+
if not os.path.exists(checkpoint_path):
|
|
423
|
+
click.echo(f"\n❌ No checkpoint found for source '{name}'")
|
|
424
|
+
click.echo(f" Expected checkpoint: {checkpoint_path}")
|
|
425
|
+
click.echo(f"\n This could mean:")
|
|
426
|
+
click.echo(f" - No previous ingestion was run with --name '{name}'")
|
|
427
|
+
click.echo(f" - The previous ingestion completed successfully (checkpoint cleared)")
|
|
428
|
+
click.echo(f" - The checkpoint was manually deleted")
|
|
429
|
+
sys.exit(1)
|
|
430
|
+
|
|
431
|
+
# Load checkpoint to get failed files
|
|
432
|
+
try:
|
|
433
|
+
with open(checkpoint_path, 'r') as f:
|
|
434
|
+
checkpoint_data = json.load(f)
|
|
435
|
+
except Exception as e:
|
|
436
|
+
raise click.ClickException(f"Failed to load checkpoint: {e}")
|
|
437
|
+
|
|
438
|
+
failed_files = checkpoint_data.get('failed_files', [])
|
|
439
|
+
|
|
440
|
+
if not failed_files:
|
|
441
|
+
click.echo(f"\n✅ No failed files to retry for source '{name}'")
|
|
442
|
+
click.echo(f" Processed files: {len(checkpoint_data.get('processed_files', []))}")
|
|
443
|
+
# Clear checkpoint since there's nothing to retry
|
|
444
|
+
os.remove(checkpoint_path)
|
|
445
|
+
click.echo(f" Checkpoint cleared.")
|
|
446
|
+
return
|
|
447
|
+
|
|
448
|
+
# Get files to retry
|
|
449
|
+
if force:
|
|
450
|
+
# Retry all failed files
|
|
451
|
+
files_to_retry = [f['file_path'] for f in failed_files]
|
|
452
|
+
click.echo(f"\n🔄 Force retrying ALL {len(files_to_retry)} failed files...")
|
|
453
|
+
else:
|
|
454
|
+
# Only retry files under max attempts (default: 3)
|
|
455
|
+
max_attempts = 3
|
|
456
|
+
files_to_retry = [
|
|
457
|
+
f['file_path'] for f in failed_files
|
|
458
|
+
if f.get('attempts', 1) < max_attempts
|
|
459
|
+
]
|
|
460
|
+
skipped = len(failed_files) - len(files_to_retry)
|
|
461
|
+
if skipped > 0:
|
|
462
|
+
click.echo(f"\n⚠️ Skipping {skipped} files that exceeded {max_attempts} attempts")
|
|
463
|
+
click.echo(f" Use --force to retry all failed files")
|
|
464
|
+
|
|
465
|
+
if not files_to_retry:
|
|
466
|
+
click.echo(f"\n❌ No files eligible for retry (all exceeded max attempts)")
|
|
467
|
+
click.echo(f" Use --force to retry anyway")
|
|
468
|
+
sys.exit(1)
|
|
469
|
+
|
|
470
|
+
click.echo(f"\n🔄 Retrying {len(files_to_retry)} failed files...")
|
|
471
|
+
|
|
472
|
+
# Handle --dir mode (simple local directory ingestion)
|
|
473
|
+
if directory:
|
|
474
|
+
from pathlib import Path
|
|
475
|
+
dir_path = Path(directory).resolve()
|
|
476
|
+
source_type = 'filesystem'
|
|
477
|
+
|
|
478
|
+
click.echo(f"📂 Source directory: {dir_path}")
|
|
479
|
+
|
|
480
|
+
# Create a simple toolkit config for the directory
|
|
481
|
+
toolkit_config = {
|
|
482
|
+
'type': 'filesystem',
|
|
483
|
+
'toolkit_name': name,
|
|
484
|
+
'base_directory': str(dir_path),
|
|
485
|
+
'recursive': recursive,
|
|
486
|
+
}
|
|
487
|
+
else:
|
|
488
|
+
# Load toolkit config
|
|
489
|
+
toolkit_config = _load_toolkit_config(toolkit)
|
|
490
|
+
source_type = toolkit_config.get('type', 'unknown')
|
|
491
|
+
click.echo(f"📦 Source toolkit: {source_type}")
|
|
492
|
+
|
|
493
|
+
# Progress callback
|
|
494
|
+
def progress(message: str, phase: str):
|
|
495
|
+
click.echo(f" [{phase}] {message}")
|
|
496
|
+
|
|
497
|
+
try:
|
|
498
|
+
from alita_sdk.community.inventory import IngestionPipeline, IngestionConfig
|
|
499
|
+
|
|
500
|
+
# Load configuration
|
|
501
|
+
if config:
|
|
502
|
+
click.echo(f"📋 Loading config from {config}")
|
|
503
|
+
if config.endswith('.yml') or config.endswith('.yaml'):
|
|
504
|
+
ingestion_config = IngestionConfig.from_yaml(config)
|
|
505
|
+
else:
|
|
506
|
+
ingestion_config = IngestionConfig.from_json(config)
|
|
507
|
+
|
|
508
|
+
if model:
|
|
509
|
+
ingestion_config.llm_model = model
|
|
510
|
+
|
|
511
|
+
ingestion_config.graph_path = graph
|
|
512
|
+
llm = _get_llm(ctx, ingestion_config.llm_model, ingestion_config.temperature)
|
|
513
|
+
|
|
514
|
+
pipeline = IngestionPipeline(
|
|
515
|
+
llm=llm,
|
|
516
|
+
graph_path=ingestion_config.graph_path,
|
|
517
|
+
guardrails=ingestion_config.guardrails,
|
|
518
|
+
)
|
|
519
|
+
else:
|
|
520
|
+
click.echo("📋 Loading config from environment")
|
|
521
|
+
llm = _get_llm(ctx, model)
|
|
522
|
+
pipeline = IngestionPipeline(
|
|
523
|
+
llm=llm,
|
|
524
|
+
graph_path=graph,
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
pipeline.progress_callback = progress
|
|
528
|
+
|
|
529
|
+
# Get source toolkit and register it
|
|
530
|
+
source_toolkit = _get_source_toolkit(toolkit_config)
|
|
531
|
+
|
|
532
|
+
import uuid
|
|
533
|
+
cli_runnable_config = {
|
|
534
|
+
'run_id': uuid.uuid4(),
|
|
535
|
+
'tags': ['cli', 'inventory', 'retry'],
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
if hasattr(source_toolkit, 'set_runnable_config'):
|
|
539
|
+
source_toolkit.set_runnable_config(cli_runnable_config)
|
|
540
|
+
|
|
541
|
+
pipeline.register_toolkit(name, source_toolkit)
|
|
542
|
+
|
|
543
|
+
# Run delta update for failed files
|
|
544
|
+
result = pipeline.delta_update(
|
|
545
|
+
source=name,
|
|
546
|
+
file_paths=files_to_retry,
|
|
547
|
+
extract_relations=not no_relations,
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# Show result
|
|
551
|
+
if result.success:
|
|
552
|
+
click.echo(f"\n✅ Retry complete!")
|
|
553
|
+
click.echo(f" Files retried: {len(files_to_retry)}")
|
|
554
|
+
click.echo(f" Documents processed: {result.documents_processed}")
|
|
555
|
+
click.echo(f" Entities added: {result.entities_added}")
|
|
556
|
+
click.echo(f" Relations added: {result.relations_added}")
|
|
557
|
+
click.echo(f" Duration: {result.duration_seconds:.1f}s")
|
|
558
|
+
|
|
559
|
+
# Check if there are still failed files
|
|
560
|
+
if result.failed_documents:
|
|
561
|
+
click.echo(f"\n⚠️ {len(result.failed_documents)} files still failing")
|
|
562
|
+
click.echo(f" Run 'alita inventory status -g {graph} -n {name}' to see details")
|
|
563
|
+
else:
|
|
564
|
+
# All retries succeeded - clear checkpoint
|
|
565
|
+
if os.path.exists(checkpoint_path):
|
|
566
|
+
os.remove(checkpoint_path)
|
|
567
|
+
click.echo(f"\n🧹 Checkpoint cleared (all files processed successfully)")
|
|
568
|
+
else:
|
|
569
|
+
click.echo(f"\n❌ Retry failed!")
|
|
570
|
+
for error in result.errors:
|
|
571
|
+
click.echo(f" Error: {error}")
|
|
572
|
+
sys.exit(1)
|
|
573
|
+
|
|
574
|
+
except Exception as e:
|
|
575
|
+
logger.exception("Retry failed")
|
|
576
|
+
raise click.ClickException(str(e))
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
@inventory.command('status')
|
|
580
|
+
@click.option('--graph', '-g', required=True, type=click.Path(),
|
|
581
|
+
help='Path to graph JSON file')
|
|
582
|
+
@click.option('--name', '-n', required=True,
|
|
583
|
+
help='Source name to check status for')
|
|
584
|
+
def status(graph: str, name: str):
|
|
585
|
+
"""
|
|
586
|
+
Show ingestion checkpoint status for a source.
|
|
587
|
+
|
|
588
|
+
Displays information about the last ingestion run including:
|
|
589
|
+
- Number of processed files
|
|
590
|
+
- Number of failed files
|
|
591
|
+
- Current phase
|
|
592
|
+
- Timestamps
|
|
593
|
+
|
|
594
|
+
Example:
|
|
595
|
+
alita inventory status -g ./graph.json -n my-source
|
|
596
|
+
"""
|
|
597
|
+
checkpoint_path = _get_checkpoint_path(graph, name)
|
|
598
|
+
|
|
599
|
+
if not os.path.exists(checkpoint_path):
|
|
600
|
+
click.echo(f"\n❌ No checkpoint found for source '{name}'")
|
|
601
|
+
click.echo(f" Expected: {checkpoint_path}")
|
|
602
|
+
click.echo(f"\n No active or failed ingestion for this source.")
|
|
603
|
+
sys.exit(1)
|
|
604
|
+
|
|
605
|
+
try:
|
|
606
|
+
with open(checkpoint_path, 'r') as f:
|
|
607
|
+
checkpoint = json.load(f)
|
|
608
|
+
except Exception as e:
|
|
609
|
+
raise click.ClickException(f"Failed to load checkpoint: {e}")
|
|
610
|
+
|
|
611
|
+
click.echo(f"\n📋 Ingestion Status for '{name}'")
|
|
612
|
+
click.echo(f" Checkpoint: {checkpoint_path}")
|
|
613
|
+
|
|
614
|
+
click.echo(f"\n Run ID: {checkpoint.get('run_id', 'unknown')}")
|
|
615
|
+
click.echo(f" Phase: {checkpoint.get('phase', 'unknown')}")
|
|
616
|
+
click.echo(f" Completed: {'Yes' if checkpoint.get('completed') else 'No'}")
|
|
617
|
+
|
|
618
|
+
click.echo(f"\n Started: {checkpoint.get('started_at', 'unknown')}")
|
|
619
|
+
click.echo(f" Updated: {checkpoint.get('updated_at', 'unknown')}")
|
|
620
|
+
|
|
621
|
+
processed_files = checkpoint.get('processed_files', [])
|
|
622
|
+
failed_files = checkpoint.get('failed_files', [])
|
|
623
|
+
|
|
624
|
+
click.echo(f"\n 📊 Progress:")
|
|
625
|
+
click.echo(f" Documents processed: {checkpoint.get('documents_processed', 0)}")
|
|
626
|
+
click.echo(f" Entities added: {checkpoint.get('entities_added', 0)}")
|
|
627
|
+
click.echo(f" Relations added: {checkpoint.get('relations_added', 0)}")
|
|
628
|
+
|
|
629
|
+
click.echo(f"\n 📁 Files:")
|
|
630
|
+
click.echo(f" Processed: {len(processed_files)}")
|
|
631
|
+
click.echo(f" Failed: {len(failed_files)}")
|
|
632
|
+
|
|
633
|
+
if failed_files:
|
|
634
|
+
# Count by attempts
|
|
635
|
+
by_attempts = {}
|
|
636
|
+
for f in failed_files:
|
|
637
|
+
attempts = f.get('attempts', 1)
|
|
638
|
+
by_attempts[attempts] = by_attempts.get(attempts, 0) + 1
|
|
639
|
+
|
|
640
|
+
click.echo(f"\n ❌ Failed files by attempt count:")
|
|
641
|
+
for attempts, count in sorted(by_attempts.items()):
|
|
642
|
+
click.echo(f" {attempts} attempt(s): {count} files")
|
|
643
|
+
|
|
644
|
+
# Show sample errors
|
|
645
|
+
click.echo(f"\n 📝 Sample errors (first 3):")
|
|
646
|
+
for f in failed_files[:3]:
|
|
647
|
+
file_path = f.get('file_path', 'unknown')
|
|
648
|
+
error = f.get('error', f.get('last_error', 'unknown error'))
|
|
649
|
+
# Truncate long paths and errors
|
|
650
|
+
if len(file_path) > 50:
|
|
651
|
+
file_path = '...' + file_path[-47:]
|
|
652
|
+
if len(error) > 60:
|
|
653
|
+
error = error[:57] + '...'
|
|
654
|
+
click.echo(f" - {file_path}")
|
|
655
|
+
click.echo(f" Error: {error}")
|
|
656
|
+
|
|
657
|
+
errors = checkpoint.get('errors', [])
|
|
658
|
+
if errors:
|
|
659
|
+
click.echo(f"\n ⚠️ Run errors:")
|
|
660
|
+
for error in errors[:3]:
|
|
661
|
+
click.echo(f" - {error[:80]}{'...' if len(error) > 80 else ''}")
|
|
662
|
+
|
|
663
|
+
if failed_files:
|
|
664
|
+
click.echo(f"\n 💡 To retry failed files:")
|
|
665
|
+
click.echo(f" alita inventory retry --dir <path> -g {graph} -n {name}")
|
|
666
|
+
click.echo(f" alita inventory retry --dir <path> -g {graph} -n {name} --force")
|
|
667
|
+
|
|
668
|
+
click.echo()
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
@inventory.command('stats')
|
|
672
|
+
@click.option('--graph', '-g', required=True, type=click.Path(exists=True),
|
|
673
|
+
help='Path to graph JSON file')
|
|
674
|
+
def stats(graph: str):
|
|
675
|
+
"""
|
|
676
|
+
Show knowledge graph statistics.
|
|
677
|
+
|
|
678
|
+
Example:
|
|
679
|
+
alita inventory stats -g ./graph.json
|
|
680
|
+
"""
|
|
681
|
+
try:
|
|
682
|
+
from alita_sdk.community.inventory import KnowledgeGraph
|
|
683
|
+
|
|
684
|
+
kg = KnowledgeGraph()
|
|
685
|
+
kg.load_from_json(graph)
|
|
686
|
+
stats = kg.get_stats()
|
|
687
|
+
|
|
688
|
+
click.echo(f"\n📊 Knowledge Graph Statistics")
|
|
689
|
+
click.echo(f" Path: {graph}")
|
|
690
|
+
click.echo(f"\n Entities: {stats['node_count']}")
|
|
691
|
+
click.echo(f" Relations: {stats['edge_count']}")
|
|
692
|
+
|
|
693
|
+
if stats['entity_types']:
|
|
694
|
+
click.echo(f"\n Entity Types:")
|
|
695
|
+
for etype, count in sorted(stats['entity_types'].items(), key=lambda x: -x[1]):
|
|
696
|
+
click.echo(f" - {etype}: {count}")
|
|
697
|
+
|
|
698
|
+
if stats['relation_types']:
|
|
699
|
+
click.echo(f"\n Relation Types:")
|
|
700
|
+
for rtype, count in sorted(stats['relation_types'].items(), key=lambda x: -x[1]):
|
|
701
|
+
click.echo(f" - {rtype}: {count}")
|
|
702
|
+
|
|
703
|
+
if stats['source_toolkits']:
|
|
704
|
+
click.echo(f"\n Sources: {', '.join(stats['source_toolkits'])}")
|
|
705
|
+
|
|
706
|
+
if stats['last_saved']:
|
|
707
|
+
click.echo(f"\n Last updated: {stats['last_saved']}")
|
|
708
|
+
|
|
709
|
+
click.echo()
|
|
710
|
+
|
|
711
|
+
except FileNotFoundError:
|
|
712
|
+
raise click.ClickException(f"Graph file not found: {graph}")
|
|
713
|
+
except Exception as e:
|
|
714
|
+
raise click.ClickException(str(e))
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
@inventory.command('search')
|
|
718
|
+
@click.argument('query')
|
|
719
|
+
@click.option('--graph', '-g', required=True, type=click.Path(exists=True),
|
|
720
|
+
help='Path to graph JSON file')
|
|
721
|
+
@click.option('--type', '-t', 'entity_type', default=None,
|
|
722
|
+
help='Filter by entity type')
|
|
723
|
+
@click.option('--limit', '-n', default=10, type=int,
|
|
724
|
+
help='Maximum results (default: 10)')
|
|
725
|
+
def search(query: str, graph: str, entity_type: Optional[str], limit: int):
|
|
726
|
+
"""
|
|
727
|
+
Search for entities in the knowledge graph.
|
|
728
|
+
|
|
729
|
+
Example:
|
|
730
|
+
alita inventory search "Payment" -g ./graph.json
|
|
731
|
+
alita inventory search "User" -g ./graph.json --type class
|
|
732
|
+
"""
|
|
733
|
+
try:
|
|
734
|
+
from alita_sdk.community.inventory import KnowledgeGraph
|
|
735
|
+
|
|
736
|
+
kg = KnowledgeGraph()
|
|
737
|
+
kg.load_from_json(graph)
|
|
738
|
+
|
|
739
|
+
results = kg.search(query, top_k=limit, entity_type=entity_type)
|
|
740
|
+
|
|
741
|
+
if not results:
|
|
742
|
+
click.echo(f"No entities found matching '{query}'")
|
|
743
|
+
return
|
|
744
|
+
|
|
745
|
+
click.echo(f"\n🔍 Found {len(results)} entities matching '{query}':\n")
|
|
746
|
+
|
|
747
|
+
for i, result in enumerate(results, 1):
|
|
748
|
+
entity = result['entity']
|
|
749
|
+
citation = entity.get('citation', {})
|
|
750
|
+
|
|
751
|
+
click.echo(f"{i}. {entity.get('name')} ({entity.get('type')})")
|
|
752
|
+
|
|
753
|
+
if citation:
|
|
754
|
+
file_path = citation.get('file_path', 'unknown')
|
|
755
|
+
line_info = ""
|
|
756
|
+
if citation.get('line_start'):
|
|
757
|
+
line_info = f":{citation['line_start']}"
|
|
758
|
+
if citation.get('line_end'):
|
|
759
|
+
line_info += f"-{citation['line_end']}"
|
|
760
|
+
click.echo(f" 📍 {file_path}{line_info}")
|
|
761
|
+
|
|
762
|
+
# Show description if available
|
|
763
|
+
if entity.get('description'):
|
|
764
|
+
desc = entity['description'][:80]
|
|
765
|
+
if len(entity['description']) > 80:
|
|
766
|
+
desc += "..."
|
|
767
|
+
click.echo(f" {desc}")
|
|
768
|
+
|
|
769
|
+
click.echo()
|
|
770
|
+
|
|
771
|
+
except FileNotFoundError:
|
|
772
|
+
raise click.ClickException(f"Graph file not found: {graph}")
|
|
773
|
+
except Exception as e:
|
|
774
|
+
raise click.ClickException(str(e))
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
@inventory.command('entity')
|
|
778
|
+
@click.argument('name')
|
|
779
|
+
@click.option('--graph', '-g', required=True, type=click.Path(exists=True),
|
|
780
|
+
help='Path to graph JSON file')
|
|
781
|
+
@click.option('--relations/--no-relations', default=True,
|
|
782
|
+
help='Include relations (default: yes)')
|
|
783
|
+
def entity(name: str, graph: str, relations: bool):
|
|
784
|
+
"""
|
|
785
|
+
Get detailed information about an entity.
|
|
786
|
+
|
|
787
|
+
Example:
|
|
788
|
+
alita inventory entity "PaymentProcessor" -g ./graph.json
|
|
789
|
+
"""
|
|
790
|
+
try:
|
|
791
|
+
from alita_sdk.community.inventory import InventoryRetrievalApiWrapper
|
|
792
|
+
|
|
793
|
+
api = InventoryRetrievalApiWrapper(graph_path=graph)
|
|
794
|
+
result = api.get_entity(name, include_relations=relations)
|
|
795
|
+
|
|
796
|
+
click.echo(f"\n{result}")
|
|
797
|
+
|
|
798
|
+
except FileNotFoundError:
|
|
799
|
+
raise click.ClickException(f"Graph file not found: {graph}")
|
|
800
|
+
except Exception as e:
|
|
801
|
+
raise click.ClickException(str(e))
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
@inventory.command('impact')
|
|
805
|
+
@click.argument('name')
|
|
806
|
+
@click.option('--graph', '-g', required=True, type=click.Path(exists=True),
|
|
807
|
+
help='Path to graph JSON file')
|
|
808
|
+
@click.option('--direction', '-d', type=click.Choice(['upstream', 'downstream']),
|
|
809
|
+
default='downstream', help='Analysis direction (default: downstream)')
|
|
810
|
+
@click.option('--depth', default=3, type=int,
|
|
811
|
+
help='Maximum traversal depth (default: 3)')
|
|
812
|
+
def impact(name: str, graph: str, direction: str, depth: int):
|
|
813
|
+
"""
|
|
814
|
+
Analyze impact of changes to an entity.
|
|
815
|
+
|
|
816
|
+
Example:
|
|
817
|
+
alita inventory impact "UserService" -g ./graph.json
|
|
818
|
+
alita inventory impact "Database" -g ./graph.json --direction upstream
|
|
819
|
+
"""
|
|
820
|
+
try:
|
|
821
|
+
from alita_sdk.community.inventory import InventoryRetrievalApiWrapper
|
|
822
|
+
|
|
823
|
+
api = InventoryRetrievalApiWrapper(graph_path=graph)
|
|
824
|
+
result = api.impact_analysis(name, direction=direction, max_depth=depth)
|
|
825
|
+
|
|
826
|
+
click.echo(f"\n{result}")
|
|
827
|
+
|
|
828
|
+
except FileNotFoundError:
|
|
829
|
+
raise click.ClickException(f"Graph file not found: {graph}")
|
|
830
|
+
except Exception as e:
|
|
831
|
+
raise click.ClickException(str(e))
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
@inventory.command('visualize')
|
|
835
|
+
@click.option('--graph', '-g', required=True, type=click.Path(exists=True),
|
|
836
|
+
help='Path to graph JSON file')
|
|
837
|
+
@click.option('--output', '-o', default=None, type=click.Path(),
|
|
838
|
+
help='Output HTML file path (default: graph_visualization.html in same dir)')
|
|
839
|
+
@click.option('--open/--no-open', 'open_browser', default=True,
|
|
840
|
+
help='Open in browser after generation (default: yes)')
|
|
841
|
+
@click.option('--title', '-t', default=None,
|
|
842
|
+
help='Title for the visualization')
|
|
843
|
+
def visualize(graph: str, output: Optional[str], open_browser: bool, title: Optional[str]):
|
|
844
|
+
"""
|
|
845
|
+
Generate an interactive visualization of the knowledge graph.
|
|
846
|
+
|
|
847
|
+
Creates a standalone HTML file with D3.js-powered graph visualization.
|
|
848
|
+
Features include:
|
|
849
|
+
- Force-directed layout
|
|
850
|
+
- Color-coded entity types
|
|
851
|
+
- Node size based on connections
|
|
852
|
+
- Interactive drag, zoom, and pan
|
|
853
|
+
- Search and filter by entity type
|
|
854
|
+
- Click nodes for detailed info
|
|
855
|
+
|
|
856
|
+
Example:
|
|
857
|
+
alita inventory visualize -g ./graph.json
|
|
858
|
+
alita inventory visualize -g ./graph.json -o my_graph.html
|
|
859
|
+
alita inventory visualize -g ./graph.json --no-open
|
|
860
|
+
"""
|
|
861
|
+
try:
|
|
862
|
+
from alita_sdk.community.inventory.visualize import generate_visualization
|
|
863
|
+
from alita_sdk.community.inventory import KnowledgeGraph
|
|
864
|
+
import webbrowser
|
|
865
|
+
import os
|
|
866
|
+
|
|
867
|
+
# Default output path
|
|
868
|
+
if output is None:
|
|
869
|
+
graph_dir = os.path.dirname(os.path.abspath(graph))
|
|
870
|
+
graph_name = os.path.splitext(os.path.basename(graph))[0]
|
|
871
|
+
output = os.path.join(graph_dir, f"{graph_name}_visualization.html")
|
|
872
|
+
|
|
873
|
+
# Default title
|
|
874
|
+
if title is None:
|
|
875
|
+
title = os.path.splitext(os.path.basename(graph))[0].replace('_', ' ').title()
|
|
876
|
+
|
|
877
|
+
click.echo(f"\n🎨 Generating graph visualization...")
|
|
878
|
+
click.echo(f" Source: {graph}")
|
|
879
|
+
|
|
880
|
+
# Generate visualization
|
|
881
|
+
html_path = generate_visualization(graph, output, title)
|
|
882
|
+
|
|
883
|
+
click.echo(f" Output: {html_path}")
|
|
884
|
+
|
|
885
|
+
# Show graph stats
|
|
886
|
+
kg = KnowledgeGraph()
|
|
887
|
+
kg.load_from_json(graph)
|
|
888
|
+
stats = kg.get_stats()
|
|
889
|
+
click.echo(f"\n 📊 Graph contains:")
|
|
890
|
+
click.echo(f" - {stats['node_count']} entities")
|
|
891
|
+
click.echo(f" - {stats['edge_count']} relations")
|
|
892
|
+
if stats['entity_types']:
|
|
893
|
+
click.echo(f" - {len(stats['entity_types'])} entity types")
|
|
894
|
+
|
|
895
|
+
if open_browser:
|
|
896
|
+
click.echo(f"\n Opening in browser...")
|
|
897
|
+
webbrowser.open(f"file://{os.path.abspath(html_path)}")
|
|
898
|
+
|
|
899
|
+
click.echo(f"\n✅ Visualization complete!")
|
|
900
|
+
click.echo()
|
|
901
|
+
|
|
902
|
+
except FileNotFoundError:
|
|
903
|
+
raise click.ClickException(f"Graph file not found: {graph}")
|
|
904
|
+
except ImportError as e:
|
|
905
|
+
raise click.ClickException(f"Visualization module not available: {e}")
|
|
906
|
+
except Exception as e:
|
|
907
|
+
raise click.ClickException(str(e))
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
@inventory.command('enrich')
|
|
911
|
+
@click.option('--graph', '-g', required=True, type=click.Path(exists=True),
|
|
912
|
+
help='Path to graph JSON file')
|
|
913
|
+
@click.option('--output', '-o', default=None, type=click.Path(),
|
|
914
|
+
help='Output graph file (default: overwrite input)')
|
|
915
|
+
@click.option('--deduplicate/--no-deduplicate', default=False,
|
|
916
|
+
help='Merge entities with exact same name (DISABLED by default, use with caution)')
|
|
917
|
+
@click.option('--cross-source/--no-cross-source', default=True,
|
|
918
|
+
help='Link same-named entities across sources (default: yes)')
|
|
919
|
+
@click.option('--semantic/--no-semantic', default=True,
|
|
920
|
+
help='Create semantic cross-links based on shared concepts (default: yes)')
|
|
921
|
+
@click.option('--orphans/--no-orphans', default=True,
|
|
922
|
+
help='Connect orphan nodes to related entities (default: yes)')
|
|
923
|
+
@click.option('--similarity/--no-similarity', default=False,
|
|
924
|
+
help='Link entities with similar names (default: no)')
|
|
925
|
+
@click.option('--dry-run', is_flag=True, default=False,
|
|
926
|
+
help='Show what would be done without saving')
|
|
927
|
+
def enrich(graph: str, output: Optional[str], deduplicate: bool, cross_source: bool,
|
|
928
|
+
semantic: bool, orphans: bool, similarity: bool, dry_run: bool):
|
|
929
|
+
"""
|
|
930
|
+
Enrich a knowledge graph with cross-linking.
|
|
931
|
+
|
|
932
|
+
Post-processes the graph to improve connectivity by creating links:
|
|
933
|
+
|
|
934
|
+
1. CROSS-SOURCE LINKING: Link entities across sources
|
|
935
|
+
- SDK class ↔ docs concept, code ↔ documentation
|
|
936
|
+
- Automatically determines relationship type
|
|
937
|
+
|
|
938
|
+
2. SEMANTIC LINKING: Link entities sharing concepts
|
|
939
|
+
- Finds entities with overlapping significant words
|
|
940
|
+
- Creates LINKS between related entities
|
|
941
|
+
- Example: "Artifact Toolkit" --[related_to]--> "Configure Artifact Toolkit"
|
|
942
|
+
|
|
943
|
+
3. ORPHAN LINKING: Connect isolated nodes
|
|
944
|
+
- Links unconnected nodes to related entities
|
|
945
|
+
|
|
946
|
+
4. DEDUPLICATION (optional, disabled by default):
|
|
947
|
+
- Use --deduplicate to merge exact name matches
|
|
948
|
+
- Use with caution - can lose semantic meaning
|
|
949
|
+
|
|
950
|
+
Example:
|
|
951
|
+
alita inventory enrich -g ./graph.json
|
|
952
|
+
alita inventory enrich -g ./graph.json -o enriched.json
|
|
953
|
+
alita inventory enrich -g ./graph.json --deduplicate
|
|
954
|
+
alita inventory enrich -g ./graph.json --dry-run
|
|
955
|
+
"""
|
|
956
|
+
try:
|
|
957
|
+
from alita_sdk.community.inventory.enrichment import GraphEnricher
|
|
958
|
+
|
|
959
|
+
click.echo(f"\n🔗 Enriching knowledge graph...")
|
|
960
|
+
click.echo(f" Source: {graph}")
|
|
961
|
+
|
|
962
|
+
enricher = GraphEnricher(graph)
|
|
963
|
+
|
|
964
|
+
# Show initial stats
|
|
965
|
+
initial_nodes = len(enricher.nodes_by_id)
|
|
966
|
+
initial_links = len(enricher.graph_data.get("links", []))
|
|
967
|
+
click.echo(f" Initial: {initial_nodes} nodes, {initial_links} links")
|
|
968
|
+
|
|
969
|
+
# Run enrichment
|
|
970
|
+
stats = enricher.enrich(
|
|
971
|
+
deduplicate=deduplicate,
|
|
972
|
+
cross_source=cross_source,
|
|
973
|
+
semantic_links=semantic,
|
|
974
|
+
orphans=orphans,
|
|
975
|
+
similarity=similarity,
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
click.echo(f"\n 📊 Enrichment results:")
|
|
979
|
+
|
|
980
|
+
if deduplicate:
|
|
981
|
+
click.echo(f" Entities merged: {stats.get('entities_merged', 0)} (exact name matches into {stats.get('merge_groups', 0)} groups)")
|
|
982
|
+
final_nodes = len(enricher.nodes_by_id)
|
|
983
|
+
click.echo(f" Node reduction: {initial_nodes} → {final_nodes}")
|
|
984
|
+
|
|
985
|
+
click.echo(f" Cross-source links: +{stats.get('cross_source_links', 0)}")
|
|
986
|
+
|
|
987
|
+
if semantic:
|
|
988
|
+
click.echo(f" Semantic links: +{stats.get('semantic_links', 0)}")
|
|
989
|
+
|
|
990
|
+
click.echo(f" Orphan connections: +{stats.get('orphan_links', 0)}")
|
|
991
|
+
|
|
992
|
+
if similarity:
|
|
993
|
+
click.echo(f" Similarity links: +{stats.get('similarity_links', 0)}")
|
|
994
|
+
|
|
995
|
+
click.echo(f" Total new links: +{len(enricher.new_links)}")
|
|
996
|
+
|
|
997
|
+
if dry_run:
|
|
998
|
+
click.echo(f"\n 🔍 Dry run - no changes saved")
|
|
999
|
+
|
|
1000
|
+
# Show merge examples
|
|
1001
|
+
if deduplicate and enricher.merged_nodes:
|
|
1002
|
+
click.echo(f"\n Sample merged entities:")
|
|
1003
|
+
for merge in enricher.merged_nodes[:5]:
|
|
1004
|
+
new_node = merge["new_node"]
|
|
1005
|
+
types = merge.get("merged_types", [])
|
|
1006
|
+
click.echo(f" '{new_node['name']}' [{' + '.join(set(types))}] → [{new_node['type']}]")
|
|
1007
|
+
|
|
1008
|
+
# Show link examples
|
|
1009
|
+
click.echo(f"\n Sample new links:")
|
|
1010
|
+
for link in enricher.new_links[:10]:
|
|
1011
|
+
src = enricher.nodes_by_id.get(link['source'], {})
|
|
1012
|
+
tgt = enricher.nodes_by_id.get(link['target'], {})
|
|
1013
|
+
click.echo(f" {src.get('name', '?')[:25]:25} --[{link['relation_type']}]--> {tgt.get('name', '?')[:25]}")
|
|
1014
|
+
else:
|
|
1015
|
+
output_path = enricher.save(output)
|
|
1016
|
+
click.echo(f"\n 💾 Saved to: {output_path}")
|
|
1017
|
+
|
|
1018
|
+
click.echo(f"\n✅ Enrichment complete!")
|
|
1019
|
+
click.echo()
|
|
1020
|
+
|
|
1021
|
+
except FileNotFoundError:
|
|
1022
|
+
raise click.ClickException(f"Graph file not found: {graph}")
|
|
1023
|
+
except ImportError as e:
|
|
1024
|
+
raise click.ClickException(f"Enrichment module not available: {e}")
|
|
1025
|
+
except Exception as e:
|
|
1026
|
+
raise click.ClickException(str(e))
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
# ========== Helper Functions ==========
|
|
1030
|
+
|
|
1031
|
+
def _get_checkpoint_path(graph: str, source_name: str) -> str:
|
|
1032
|
+
"""
|
|
1033
|
+
Get the checkpoint file path for a source.
|
|
1034
|
+
|
|
1035
|
+
Checkpoint files are stored in the same directory as the graph file,
|
|
1036
|
+
with naming pattern: .ingestion-checkpoint-{source_name}.json
|
|
1037
|
+
|
|
1038
|
+
Args:
|
|
1039
|
+
graph: Path to the graph JSON file
|
|
1040
|
+
source_name: Name of the source toolkit
|
|
1041
|
+
|
|
1042
|
+
Returns:
|
|
1043
|
+
Absolute path to the checkpoint file
|
|
1044
|
+
"""
|
|
1045
|
+
graph_path = Path(graph).resolve()
|
|
1046
|
+
graph_dir = graph_path.parent
|
|
1047
|
+
return str(graph_dir / f".ingestion-checkpoint-{source_name}.json")
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def _load_toolkit_config(toolkit_path: str) -> Dict[str, Any]:
|
|
1051
|
+
"""Deprecated: Use alita_sdk.community.inventory.toolkit_utils.load_toolkit_config instead."""
|
|
1052
|
+
from alita_sdk.community.inventory.toolkit_utils import load_toolkit_config
|
|
1053
|
+
return load_toolkit_config(toolkit_path)
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def _get_llm(ctx, model: Optional[str] = None, temperature: float = 0.0):
|
|
1057
|
+
"""Deprecated: Use alita_sdk.community.inventory.toolkit_utils.get_llm_for_config instead."""
|
|
1058
|
+
from .cli import get_client
|
|
1059
|
+
from alita_sdk.community.inventory.toolkit_utils import get_llm_for_config
|
|
1060
|
+
|
|
1061
|
+
client = get_client(ctx)
|
|
1062
|
+
return get_llm_for_config(client, model=model, temperature=temperature)
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
def _get_source_toolkit(toolkit_config: Dict[str, Any]):
|
|
1066
|
+
"""Deprecated: Use alita_sdk.community.inventory.toolkit_utils.get_source_toolkit instead."""
|
|
1067
|
+
from alita_sdk.community.inventory.toolkit_utils import get_source_toolkit
|
|
1068
|
+
|
|
1069
|
+
try:
|
|
1070
|
+
return get_source_toolkit(toolkit_config)
|
|
1071
|
+
except ValueError as e:
|
|
1072
|
+
# Convert ValueError to ClickException for CLI context
|
|
1073
|
+
raise click.ClickException(str(e))
|