alita-sdk 0.3.257__py3-none-any.whl → 0.3.562__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3601 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +11 -0
- alita_sdk/configurations/ado.py +148 -2
- alita_sdk/configurations/azure_search.py +1 -1
- alita_sdk/configurations/bigquery.py +1 -1
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/browser.py +18 -0
- alita_sdk/configurations/carrier.py +19 -0
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/delta_lake.py +1 -1
- alita_sdk/configurations/figma.py +76 -5
- alita_sdk/configurations/github.py +65 -1
- alita_sdk/configurations/gitlab.py +81 -0
- alita_sdk/configurations/google_places.py +17 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +111 -0
- alita_sdk/configurations/postman.py +1 -1
- alita_sdk/configurations/qtest.py +72 -3
- alita_sdk/configurations/report_portal.py +115 -0
- alita_sdk/configurations/salesforce.py +19 -0
- alita_sdk/configurations/service_now.py +1 -12
- alita_sdk/configurations/sharepoint.py +167 -0
- alita_sdk/configurations/sonar.py +18 -0
- alita_sdk/configurations/sql.py +20 -0
- alita_sdk/configurations/testio.py +101 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +94 -1
- alita_sdk/configurations/zephyr_enterprise.py +94 -1
- alita_sdk/configurations/zephyr_essential.py +95 -0
- alita_sdk/runtime/clients/artifact.py +21 -4
- alita_sdk/runtime/clients/client.py +458 -67
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +352 -0
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +183 -43
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
- alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +407 -92
- alita_sdk/runtime/langchain/utils.py +102 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +28 -0
- alita_sdk/runtime/toolkits/application.py +14 -4
- alita_sdk/runtime/toolkits/artifact.py +24 -9
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +780 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +11 -6
- alita_sdk/runtime/toolkits/tools.py +314 -70
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +24 -0
- alita_sdk/runtime/tools/application.py +16 -4
- alita_sdk/runtime/tools/artifact.py +367 -33
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +100 -4
- alita_sdk/runtime/tools/graph.py +81 -0
- alita_sdk/runtime/tools/image_generation.py +218 -0
- alita_sdk/runtime/tools/llm.py +1013 -177
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +375 -0
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +69 -65
- alita_sdk/runtime/tools/vectorstore_base.py +163 -90
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +41 -14
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +48 -0
- alita_sdk/tools/__init__.py +135 -37
- alita_sdk/tools/ado/__init__.py +2 -2
- alita_sdk/tools/ado/repos/__init__.py +15 -19
- alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
- alita_sdk/tools/ado/test_plan/__init__.py +26 -8
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
- alita_sdk/tools/ado/wiki/__init__.py +27 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
- alita_sdk/tools/ado/work_item/__init__.py +27 -12
- alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +12 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +14 -11
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +13 -8
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +454 -110
- alita_sdk/tools/bitbucket/__init__.py +27 -19
- alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
- alita_sdk/tools/browser/__init__.py +41 -16
- alita_sdk/tools/browser/crawler.py +3 -1
- alita_sdk/tools/browser/utils.py +15 -6
- alita_sdk/tools/carrier/__init__.py +18 -17
- alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
- alita_sdk/tools/carrier/excel_reporter.py +8 -4
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/codeparser.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +11 -7
- alita_sdk/tools/cloud/azure/__init__.py +11 -7
- alita_sdk/tools/cloud/gcp/__init__.py +11 -7
- alita_sdk/tools/cloud/k8s/__init__.py +11 -7
- alita_sdk/tools/code/linter/__init__.py +9 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +20 -13
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +21 -14
- alita_sdk/tools/confluence/api_wrapper.py +197 -58
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +11 -5
- alita_sdk/tools/elastic/__init__.py +10 -8
- alita_sdk/tools/elitea_base.py +546 -64
- alita_sdk/tools/figma/__init__.py +11 -8
- alita_sdk/tools/figma/api_wrapper.py +352 -153
- alita_sdk/tools/github/__init__.py +17 -17
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +81 -12
- alita_sdk/tools/github/schemas.py +2 -1
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +18 -13
- alita_sdk/tools/gitlab/api_wrapper.py +224 -80
- alita_sdk/tools/gitlab_org/__init__.py +13 -10
- alita_sdk/tools/google/bigquery/__init__.py +13 -13
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +20 -11
- alita_sdk/tools/jira/__init__.py +21 -11
- alita_sdk/tools/jira/api_wrapper.py +315 -168
- alita_sdk/tools/keycloak/__init__.py +10 -8
- alita_sdk/tools/localgit/__init__.py +8 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +38 -14
- alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
- alita_sdk/tools/ocr/__init__.py +10 -8
- alita_sdk/tools/openapi/__init__.py +281 -108
- alita_sdk/tools/openapi/api_wrapper.py +883 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +18 -11
- alita_sdk/tools/pandas/api_wrapper.py +40 -45
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +10 -11
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +10 -10
- alita_sdk/tools/qtest/__init__.py +21 -14
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +12 -10
- alita_sdk/tools/report_portal/__init__.py +22 -16
- alita_sdk/tools/salesforce/__init__.py +21 -16
- alita_sdk/tools/servicenow/__init__.py +20 -16
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +16 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +11 -7
- alita_sdk/tools/sql/__init__.py +21 -19
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +20 -13
- alita_sdk/tools/testrail/__init__.py +12 -11
- alita_sdk/tools/testrail/api_wrapper.py +214 -46
- alita_sdk/tools/utils/__init__.py +28 -4
- alita_sdk/tools/utils/content_parser.py +182 -62
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
- alita_sdk/tools/xray/__init__.py +17 -14
- alita_sdk/tools/xray/api_wrapper.py +58 -113
- alita_sdk/tools/yagmail/__init__.py +8 -3
- alita_sdk/tools/zephyr/__init__.py +11 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +15 -9
- alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
- alita_sdk/tools/zephyr_essential/__init__.py +15 -10
- alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
- alita_sdk/tools/zephyr_essential/client.py +6 -4
- alita_sdk/tools/zephyr_scale/__init__.py +12 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
- alita_sdk/tools/zephyr_squad/__init__.py +11 -7
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/METADATA +184 -37
- alita_sdk-0.3.562.dist-info/RECORD +450 -0
- alita_sdk-0.3.562.dist-info/entry_points.txt +2 -0
- alita_sdk/tools/bitbucket/tools.py +0 -304
- alita_sdk-0.3.257.dist-info/RECORD +0 -343
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Inventory Module for Knowledge Graph Construction and Retrieval.
|
|
3
|
+
|
|
4
|
+
This module provides two distinct capabilities:
|
|
5
|
+
|
|
6
|
+
1. **Ingestion Pipeline** - A workflow for building/updating knowledge graphs
|
|
7
|
+
from source code repositories. NOT a toolkit - it's a defined process.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
from alita_sdk.community.inventory import (
|
|
11
|
+
IngestionPipeline,
|
|
12
|
+
ingest_repository,
|
|
13
|
+
PYTHON_PRESET,
|
|
14
|
+
TYPESCRIPT_PRESET,
|
|
15
|
+
get_preset
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Full pipeline with config
|
|
19
|
+
pipeline = IngestionPipeline(
|
|
20
|
+
llm=llm,
|
|
21
|
+
graph_path="./graph.json",
|
|
22
|
+
source_toolkits={'github': github_toolkit}
|
|
23
|
+
)
|
|
24
|
+
result = pipeline.run(source='github', branch='main')
|
|
25
|
+
|
|
26
|
+
# Or one-shot convenience function
|
|
27
|
+
result = ingest_repository(
|
|
28
|
+
llm=llm,
|
|
29
|
+
graph_path="./graph.json",
|
|
30
|
+
source_toolkit=github_toolkit,
|
|
31
|
+
source_name="github"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
2. **Retrieval Toolkit** - A pure query toolkit for retrieving context from
|
|
35
|
+
a pre-built knowledge graph. Can be added to any agent.
|
|
36
|
+
|
|
37
|
+
Usage:
|
|
38
|
+
from alita_sdk.community.inventory import InventoryRetrievalToolkit
|
|
39
|
+
|
|
40
|
+
# As a toolkit for agents
|
|
41
|
+
toolkit = InventoryRetrievalToolkit.get_toolkit(
|
|
42
|
+
graph_path="./graph.json",
|
|
43
|
+
base_directory="/path/to/source" # For local content retrieval
|
|
44
|
+
)
|
|
45
|
+
tools = toolkit.get_tools()
|
|
46
|
+
|
|
47
|
+
Entity Taxonomy (8 layers, 49 types):
|
|
48
|
+
- Product Layer: feature, product, user_story, requirement, epic
|
|
49
|
+
- Domain Layer: domain, subdomain, business_capability, value_stream, process
|
|
50
|
+
- Service Layer: service, microservice, api, api_endpoint, message_queue, event
|
|
51
|
+
- Code Layer: module, package, class, function, method, interface, trait, enum, type, variable, constant
|
|
52
|
+
- Data Layer: database, table, collection, schema, model, entity, field, index, query, migration
|
|
53
|
+
- Testing Layer: test_suite, test_case, test_fixture, mock, stub, assertion
|
|
54
|
+
- Delivery Layer: pipeline, job, stage, environment, deployment, artifact, container
|
|
55
|
+
- Organization Layer: team, repository, project, workspace, organization
|
|
56
|
+
|
|
57
|
+
Relationship Taxonomy (8 categories, 34 types):
|
|
58
|
+
- Structural: CONTAINS, IMPORTS, EXTENDS, IMPLEMENTS, USES, DEPENDS_ON, INSTANTIATES, COMPOSED_OF
|
|
59
|
+
- Behavioral: CALLS, INVOKES, TRIGGERS, HANDLES, SUBSCRIBES_TO, PUBLISHES_TO, RETURNS
|
|
60
|
+
- Data Lineage: READS_FROM, WRITES_TO, TRANSFORMS, QUERIES, STORES_IN, REFERENCES
|
|
61
|
+
- UI/Product: RENDERS, ROUTES_TO, NAVIGATES_TO, DISPLAYS
|
|
62
|
+
- Testing: TESTS, MOCKS, COVERS, ASSERTS
|
|
63
|
+
- Ownership: OWNED_BY, MAINTAINED_BY, CREATED_BY
|
|
64
|
+
- Temporal: PRECEDES, FOLLOWS, SCHEDULED_BY
|
|
65
|
+
- Semantic: RELATED_TO, SIMILAR_TO, ALIAS_OF
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
import logging
|
|
69
|
+
from typing import List, Optional, Dict, Any
|
|
70
|
+
|
|
71
|
+
# Configuration
|
|
72
|
+
from .config import (
|
|
73
|
+
IngestionConfig,
|
|
74
|
+
GuardrailsConfig,
|
|
75
|
+
generate_config_template,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Ingestion Pipeline - workflow for graph building
|
|
79
|
+
from .ingestion import (
|
|
80
|
+
IngestionPipeline,
|
|
81
|
+
IngestionResult,
|
|
82
|
+
ingest_repository,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Retrieval Toolkit - for querying graphs
|
|
86
|
+
from .retrieval import InventoryRetrievalApiWrapper
|
|
87
|
+
|
|
88
|
+
# Toolkit utilities - for configuration and instantiation
|
|
89
|
+
from .toolkit_utils import (
|
|
90
|
+
load_toolkit_config,
|
|
91
|
+
get_llm_for_config,
|
|
92
|
+
get_source_toolkit,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Core graph types
|
|
96
|
+
from .knowledge_graph import KnowledgeGraph, Citation
|
|
97
|
+
|
|
98
|
+
# Extractors (for advanced use)
|
|
99
|
+
from .extractors import (
|
|
100
|
+
ENTITY_TAXONOMY,
|
|
101
|
+
RELATIONSHIP_TAXONOMY,
|
|
102
|
+
EntityExtractor,
|
|
103
|
+
RelationExtractor,
|
|
104
|
+
FactExtractor,
|
|
105
|
+
DocumentClassifier,
|
|
106
|
+
EntitySchemaDiscoverer,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Toolkit wrapper for agent integration
|
|
110
|
+
from .toolkit import InventoryRetrievalToolkit
|
|
111
|
+
|
|
112
|
+
# Ingestion presets
|
|
113
|
+
from .presets import (
|
|
114
|
+
PYTHON_PRESET,
|
|
115
|
+
PYTHON_PRESET_WITH_TESTS,
|
|
116
|
+
JAVASCRIPT_PRESET,
|
|
117
|
+
TYPESCRIPT_PRESET,
|
|
118
|
+
REACT_PRESET,
|
|
119
|
+
NEXTJS_PRESET,
|
|
120
|
+
JAVA_PRESET,
|
|
121
|
+
SPRING_BOOT_PRESET,
|
|
122
|
+
MAVEN_PRESET,
|
|
123
|
+
GRADLE_PRESET,
|
|
124
|
+
DOTNET_PRESET,
|
|
125
|
+
CSHARP_PRESET,
|
|
126
|
+
ASPNET_PRESET,
|
|
127
|
+
FULLSTACK_JS_PRESET,
|
|
128
|
+
MONOREPO_PRESET,
|
|
129
|
+
DOCUMENTATION_PRESET,
|
|
130
|
+
PRESETS,
|
|
131
|
+
get_preset,
|
|
132
|
+
list_presets,
|
|
133
|
+
combine_presets,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
logger = logging.getLogger(__name__)
|
|
137
|
+
|
|
138
|
+
name = "inventory"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def get_tools(tool: dict, tools_list: Optional[List[dict]] = None):
|
|
142
|
+
"""
|
|
143
|
+
Get inventory retrieval tools for agent integration.
|
|
144
|
+
|
|
145
|
+
This function is called by the toolkit loader to get the
|
|
146
|
+
retrieval tools for querying a pre-built knowledge graph.
|
|
147
|
+
|
|
148
|
+
NOTE: For ingestion, use the IngestionPipeline directly, not through
|
|
149
|
+
the agent toolkit system. Ingestion is a workflow, not an agent task.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
tool: The inventory toolkit configuration dict
|
|
153
|
+
tools_list: Optional list of all toolkit configs in the agent
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
List of BaseTool instances for knowledge graph retrieval
|
|
157
|
+
"""
|
|
158
|
+
settings = tool.get('settings', {})
|
|
159
|
+
|
|
160
|
+
# For retrieval, we need the graph path
|
|
161
|
+
graph_path = settings.get('graph_path')
|
|
162
|
+
if not graph_path:
|
|
163
|
+
logger.warning("Inventory toolkit requires graph_path setting for retrieval")
|
|
164
|
+
|
|
165
|
+
toolkit = InventoryRetrievalToolkit.get_toolkit(
|
|
166
|
+
selected_tools=settings.get('selected_tools', []),
|
|
167
|
+
toolkit_name=tool.get('toolkit_name'),
|
|
168
|
+
# Graph location
|
|
169
|
+
graph_path=graph_path,
|
|
170
|
+
# For local content retrieval
|
|
171
|
+
base_directory=settings.get('base_directory'),
|
|
172
|
+
# Source toolkits for remote content retrieval (optional)
|
|
173
|
+
source_toolkits=settings.get('source_toolkits', {}),
|
|
174
|
+
)
|
|
175
|
+
return toolkit.get_tools()
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
__all__ = [
|
|
179
|
+
# Module name
|
|
180
|
+
'name',
|
|
181
|
+
'get_tools',
|
|
182
|
+
|
|
183
|
+
# Configuration
|
|
184
|
+
'IngestionConfig',
|
|
185
|
+
'GuardrailsConfig',
|
|
186
|
+
'generate_config_template',
|
|
187
|
+
|
|
188
|
+
# Ingestion (workflow)
|
|
189
|
+
'IngestionPipeline',
|
|
190
|
+
'IngestionResult',
|
|
191
|
+
'ingest_repository',
|
|
192
|
+
|
|
193
|
+
# Retrieval (toolkit)
|
|
194
|
+
'InventoryRetrievalToolkit',
|
|
195
|
+
'InventoryRetrievalApiWrapper',
|
|
196
|
+
|
|
197
|
+
# Toolkit utilities
|
|
198
|
+
'load_toolkit_config',
|
|
199
|
+
'get_llm_for_config',
|
|
200
|
+
'get_source_toolkit',
|
|
201
|
+
|
|
202
|
+
# Core types
|
|
203
|
+
'KnowledgeGraph',
|
|
204
|
+
'Citation',
|
|
205
|
+
|
|
206
|
+
# Extractors
|
|
207
|
+
'ENTITY_TAXONOMY',
|
|
208
|
+
'RELATIONSHIP_TAXONOMY',
|
|
209
|
+
'EntityExtractor',
|
|
210
|
+
'RelationExtractor',
|
|
211
|
+
'FactExtractor',
|
|
212
|
+
'DocumentClassifier',
|
|
213
|
+
'EntitySchemaDiscoverer',
|
|
214
|
+
|
|
215
|
+
# Presets
|
|
216
|
+
'PYTHON_PRESET',
|
|
217
|
+
'PYTHON_PRESET_WITH_TESTS',
|
|
218
|
+
'JAVASCRIPT_PRESET',
|
|
219
|
+
'TYPESCRIPT_PRESET',
|
|
220
|
+
'REACT_PRESET',
|
|
221
|
+
'NEXTJS_PRESET',
|
|
222
|
+
'JAVA_PRESET',
|
|
223
|
+
'SPRING_BOOT_PRESET',
|
|
224
|
+
'MAVEN_PRESET',
|
|
225
|
+
'GRADLE_PRESET',
|
|
226
|
+
'DOTNET_PRESET',
|
|
227
|
+
'CSHARP_PRESET',
|
|
228
|
+
'ASPNET_PRESET',
|
|
229
|
+
'FULLSTACK_JS_PRESET',
|
|
230
|
+
'MONOREPO_PRESET',
|
|
231
|
+
'DOCUMENTATION_PRESET',
|
|
232
|
+
'PRESETS',
|
|
233
|
+
'get_preset',
|
|
234
|
+
'list_presets',
|
|
235
|
+
'combine_presets',
|
|
236
|
+
]
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration for Inventory Ingestion Pipeline.
|
|
3
|
+
|
|
4
|
+
Since the ingestion runs within Alita, the LLM and embeddings are provided
|
|
5
|
+
by the Alita client. Configuration only needs model names, not providers.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# From YAML config file
|
|
9
|
+
config = IngestionConfig.from_yaml("./ingestion-config.yml")
|
|
10
|
+
|
|
11
|
+
# Programmatic
|
|
12
|
+
config = IngestionConfig(
|
|
13
|
+
llm_model="gpt-4o-mini",
|
|
14
|
+
embedding_model="text-embedding-3-small",
|
|
15
|
+
guardrails=GuardrailsConfig(
|
|
16
|
+
max_tokens_per_doc=8000,
|
|
17
|
+
max_entities_per_doc=50,
|
|
18
|
+
)
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Use in pipeline (Alita client provides LLM/embeddings)
|
|
22
|
+
pipeline = IngestionPipeline(
|
|
23
|
+
llm=alita.get_langchain_llm(config.llm_model),
|
|
24
|
+
embedding=alita.get_embeddings(config.embedding_model),
|
|
25
|
+
graph_path=config.graph_path,
|
|
26
|
+
guardrails=config.guardrails,
|
|
27
|
+
)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import os
|
|
31
|
+
import logging
|
|
32
|
+
from typing import Any, Optional, Dict, List
|
|
33
|
+
from pydantic import BaseModel, Field
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class GuardrailsConfig(BaseModel):
|
|
39
|
+
"""Guardrails configuration for safe and controlled extraction."""
|
|
40
|
+
|
|
41
|
+
# Token/content limits
|
|
42
|
+
max_tokens_per_doc: int = Field(
|
|
43
|
+
default=8000,
|
|
44
|
+
description="Maximum tokens per document before chunking"
|
|
45
|
+
)
|
|
46
|
+
max_entities_per_doc: int = Field(
|
|
47
|
+
default=50,
|
|
48
|
+
description="Maximum entities to extract from a single document"
|
|
49
|
+
)
|
|
50
|
+
max_relations_per_doc: int = Field(
|
|
51
|
+
default=100,
|
|
52
|
+
description="Maximum relations to extract per document"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Content filtering
|
|
56
|
+
content_filter_enabled: bool = Field(
|
|
57
|
+
default=True,
|
|
58
|
+
description="Enable content filtering for PII/secrets"
|
|
59
|
+
)
|
|
60
|
+
filter_patterns: List[str] = Field(
|
|
61
|
+
default_factory=lambda: [
|
|
62
|
+
r'(?i)(password|secret|api[_-]?key|token)\s*[=:]\s*["\'][^"\']+["\']',
|
|
63
|
+
r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
|
|
64
|
+
r'-----BEGIN [A-Z]+ PRIVATE KEY-----',
|
|
65
|
+
],
|
|
66
|
+
description="Regex patterns to filter from content before LLM processing"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Rate limiting
|
|
70
|
+
rate_limit_requests_per_minute: Optional[int] = Field(
|
|
71
|
+
default=None,
|
|
72
|
+
description="Max LLM requests per minute (None = unlimited)"
|
|
73
|
+
)
|
|
74
|
+
rate_limit_tokens_per_minute: Optional[int] = Field(
|
|
75
|
+
default=None,
|
|
76
|
+
description="Max tokens per minute (None = unlimited)"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Error handling
|
|
80
|
+
max_retries: int = Field(default=3, description="Max retries on LLM errors")
|
|
81
|
+
retry_delay_seconds: float = Field(default=1.0, description="Delay between retries")
|
|
82
|
+
skip_on_error: bool = Field(
|
|
83
|
+
default=True,
|
|
84
|
+
description="Skip document on extraction error vs fail pipeline"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Validation
|
|
88
|
+
validate_entity_types: bool = Field(
|
|
89
|
+
default=True,
|
|
90
|
+
description="Validate extracted entities against taxonomy"
|
|
91
|
+
)
|
|
92
|
+
validate_relation_types: bool = Field(
|
|
93
|
+
default=True,
|
|
94
|
+
description="Validate extracted relations against taxonomy"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Deduplication
|
|
98
|
+
deduplicate_entities: bool = Field(
|
|
99
|
+
default=True,
|
|
100
|
+
description="Merge duplicate entities by name+type+file"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Confidence thresholds
|
|
104
|
+
entity_confidence_threshold: float = Field(
|
|
105
|
+
default=0.5,
|
|
106
|
+
description="Minimum confidence for entity extraction"
|
|
107
|
+
)
|
|
108
|
+
relation_confidence_threshold: float = Field(
|
|
109
|
+
default=0.5,
|
|
110
|
+
description="Minimum confidence for relation extraction"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class IngestionConfig(BaseModel):
|
|
115
|
+
"""
|
|
116
|
+
Configuration for the ingestion pipeline.
|
|
117
|
+
|
|
118
|
+
Since ingestion runs within Alita, only model names are needed.
|
|
119
|
+
The Alita client handles provider details, API keys, etc.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
# Model names (Alita provides the actual LLM/embedding instances)
|
|
123
|
+
llm_model: str = Field(
|
|
124
|
+
default="gpt-4o-mini",
|
|
125
|
+
description="LLM model name (e.g., gpt-4o-mini, claude-3-sonnet)"
|
|
126
|
+
)
|
|
127
|
+
embedding_model: Optional[str] = Field(
|
|
128
|
+
default=None,
|
|
129
|
+
description="Embedding model name (optional, for semantic search)"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Model parameters
|
|
133
|
+
temperature: float = Field(default=0.0, description="LLM temperature")
|
|
134
|
+
|
|
135
|
+
# Guardrails configuration
|
|
136
|
+
guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig)
|
|
137
|
+
|
|
138
|
+
# Graph configuration
|
|
139
|
+
graph_path: str = Field(default="./knowledge_graph.json", description="Path to persist graph")
|
|
140
|
+
auto_save: bool = Field(default=True, description="Auto-save after mutations")
|
|
141
|
+
|
|
142
|
+
# Extraction settings
|
|
143
|
+
extract_relations: bool = Field(default=True, description="Extract relations between entities")
|
|
144
|
+
chunk_size: int = Field(default=4000, description="Document chunk size for processing")
|
|
145
|
+
chunk_overlap: int = Field(default=200, description="Overlap between chunks")
|
|
146
|
+
|
|
147
|
+
# Concurrency
|
|
148
|
+
max_concurrent_extractions: int = Field(
|
|
149
|
+
default=1,
|
|
150
|
+
description="Max parallel extraction tasks (1 = sequential)"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
@classmethod
|
|
154
|
+
def from_yaml(cls, path: str) -> "IngestionConfig":
|
|
155
|
+
"""Load configuration from YAML file."""
|
|
156
|
+
import yaml
|
|
157
|
+
|
|
158
|
+
with open(path, 'r') as f:
|
|
159
|
+
data = yaml.safe_load(f)
|
|
160
|
+
|
|
161
|
+
return cls(**data)
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def from_json(cls, path: str) -> "IngestionConfig":
|
|
165
|
+
"""Load configuration from JSON file."""
|
|
166
|
+
import json
|
|
167
|
+
|
|
168
|
+
with open(path, 'r') as f:
|
|
169
|
+
data = json.load(f)
|
|
170
|
+
|
|
171
|
+
return cls(**data)
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def from_env(cls) -> "IngestionConfig":
|
|
175
|
+
"""
|
|
176
|
+
Create configuration from environment variables.
|
|
177
|
+
|
|
178
|
+
Environment variables:
|
|
179
|
+
LLM_MODEL: Model name (default: gpt-4o-mini)
|
|
180
|
+
EMBEDDING_MODEL: Embedding model name (optional)
|
|
181
|
+
LLM_TEMPERATURE: Temperature (default: 0.0)
|
|
182
|
+
GRAPH_PATH: Path to save graph (default: ./knowledge_graph.json)
|
|
183
|
+
MAX_TOKENS_PER_DOC: Max tokens per doc (default: 8000)
|
|
184
|
+
MAX_ENTITIES_PER_DOC: Max entities per doc (default: 50)
|
|
185
|
+
CONTENT_FILTER_ENABLED: true/false (default: true)
|
|
186
|
+
EXTRACT_RELATIONS: true/false (default: true)
|
|
187
|
+
"""
|
|
188
|
+
guardrails = GuardrailsConfig(
|
|
189
|
+
max_tokens_per_doc=int(os.environ.get('MAX_TOKENS_PER_DOC', '8000')),
|
|
190
|
+
max_entities_per_doc=int(os.environ.get('MAX_ENTITIES_PER_DOC', '50')),
|
|
191
|
+
content_filter_enabled=os.environ.get('CONTENT_FILTER_ENABLED', 'true').lower() == 'true',
|
|
192
|
+
max_retries=int(os.environ.get('MAX_RETRIES', '3')),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
return cls(
|
|
196
|
+
llm_model=os.environ.get('LLM_MODEL', 'gpt-4o-mini'),
|
|
197
|
+
embedding_model=os.environ.get('EMBEDDING_MODEL'),
|
|
198
|
+
temperature=float(os.environ.get('LLM_TEMPERATURE', '0.0')),
|
|
199
|
+
guardrails=guardrails,
|
|
200
|
+
graph_path=os.environ.get('GRAPH_PATH', './knowledge_graph.json'),
|
|
201
|
+
extract_relations=os.environ.get('EXTRACT_RELATIONS', 'true').lower() == 'true',
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def to_yaml(self, path: str) -> None:
|
|
205
|
+
"""Save configuration to YAML file."""
|
|
206
|
+
import yaml
|
|
207
|
+
|
|
208
|
+
with open(path, 'w') as f:
|
|
209
|
+
yaml.safe_dump(self.model_dump(), f, default_flow_style=False)
|
|
210
|
+
|
|
211
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
212
|
+
"""Convert to dictionary."""
|
|
213
|
+
return self.model_dump()
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
# Example YAML configuration template
|
|
217
|
+
EXAMPLE_CONFIG_YAML = """# Inventory Ingestion Configuration
|
|
218
|
+
# Model names only - Alita provides the actual LLM/embedding instances
|
|
219
|
+
|
|
220
|
+
# LLM model name (required)
|
|
221
|
+
llm_model: gpt-4o-mini
|
|
222
|
+
temperature: 0.0
|
|
223
|
+
|
|
224
|
+
# Embedding model (optional, for semantic search)
|
|
225
|
+
embedding_model: text-embedding-3-small
|
|
226
|
+
|
|
227
|
+
# Guardrails - safety and control
|
|
228
|
+
guardrails:
|
|
229
|
+
max_tokens_per_doc: 8000
|
|
230
|
+
max_entities_per_doc: 50
|
|
231
|
+
max_relations_per_doc: 100
|
|
232
|
+
content_filter_enabled: true
|
|
233
|
+
max_retries: 3
|
|
234
|
+
retry_delay_seconds: 1.0
|
|
235
|
+
skip_on_error: true
|
|
236
|
+
entity_confidence_threshold: 0.5
|
|
237
|
+
relation_confidence_threshold: 0.5
|
|
238
|
+
deduplicate_entities: true
|
|
239
|
+
# rate_limit_requests_per_minute: 60 # Uncomment to rate limit
|
|
240
|
+
|
|
241
|
+
# Graph persistence
|
|
242
|
+
graph_path: ./knowledge_graph.json
|
|
243
|
+
auto_save: true
|
|
244
|
+
|
|
245
|
+
# Extraction settings
|
|
246
|
+
extract_relations: true
|
|
247
|
+
chunk_size: 4000
|
|
248
|
+
chunk_overlap: 200
|
|
249
|
+
max_concurrent_extractions: 1
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def generate_config_template(output_path: str = "./ingestion-config.yml") -> str:
|
|
254
|
+
"""Generate a configuration template file."""
|
|
255
|
+
with open(output_path, 'w') as f:
|
|
256
|
+
f.write(EXAMPLE_CONFIG_YAML)
|
|
257
|
+
return output_path
|