alita-sdk 0.3.497__py3-none-any.whl → 0.3.516__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/inventory.py +12 -195
- alita_sdk/community/inventory/__init__.py +12 -0
- alita_sdk/community/inventory/toolkit.py +9 -5
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/configurations/ado.py +144 -0
- alita_sdk/configurations/confluence.py +76 -42
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +2 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/runtime/clients/artifact.py +2 -2
- alita_sdk/runtime/clients/client.py +24 -19
- alita_sdk/runtime/clients/sandbox_client.py +14 -0
- alita_sdk/runtime/langchain/assistant.py +64 -23
- alita_sdk/runtime/langchain/constants.py +270 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +2 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +2 -1
- alita_sdk/runtime/langchain/langraph_agent.py +8 -9
- alita_sdk/runtime/langchain/utils.py +6 -1
- alita_sdk/runtime/toolkits/artifact.py +14 -5
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +26 -157
- alita_sdk/runtime/toolkits/planning.py +10 -5
- alita_sdk/runtime/toolkits/tools.py +23 -7
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/artifact.py +139 -6
- alita_sdk/runtime/tools/llm.py +20 -10
- alita_sdk/runtime/tools/mcp_remote_tool.py +2 -3
- alita_sdk/runtime/tools/mcp_server_tool.py +2 -4
- alita_sdk/runtime/utils/AlitaCallback.py +30 -1
- alita_sdk/runtime/utils/mcp_client.py +33 -6
- alita_sdk/runtime/utils/mcp_oauth.py +125 -8
- alita_sdk/runtime/utils/mcp_sse_client.py +35 -6
- alita_sdk/runtime/utils/utils.py +2 -0
- alita_sdk/tools/__init__.py +15 -0
- alita_sdk/tools/ado/repos/__init__.py +10 -12
- alita_sdk/tools/ado/test_plan/__init__.py +23 -8
- alita_sdk/tools/ado/wiki/__init__.py +24 -8
- alita_sdk/tools/ado/wiki/ado_wrapper.py +21 -7
- alita_sdk/tools/ado/work_item/__init__.py +24 -8
- alita_sdk/tools/advanced_jira_mining/__init__.py +10 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +12 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +9 -7
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +25 -0
- alita_sdk/tools/bitbucket/__init__.py +14 -10
- alita_sdk/tools/bitbucket/api_wrapper.py +50 -2
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/cloud/aws/__init__.py +9 -7
- alita_sdk/tools/cloud/azure/__init__.py +9 -7
- alita_sdk/tools/cloud/gcp/__init__.py +9 -7
- alita_sdk/tools/cloud/k8s/__init__.py +9 -7
- alita_sdk/tools/code/linter/__init__.py +9 -8
- alita_sdk/tools/code/sonar/__init__.py +9 -7
- alita_sdk/tools/confluence/__init__.py +15 -10
- alita_sdk/tools/custom_open_api/__init__.py +11 -5
- alita_sdk/tools/elastic/__init__.py +10 -8
- alita_sdk/tools/elitea_base.py +387 -9
- alita_sdk/tools/figma/__init__.py +8 -7
- alita_sdk/tools/github/__init__.py +12 -14
- alita_sdk/tools/github/github_client.py +68 -2
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +14 -11
- alita_sdk/tools/gitlab/api_wrapper.py +81 -1
- alita_sdk/tools/gitlab_org/__init__.py +9 -8
- alita_sdk/tools/google/bigquery/__init__.py +12 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +9 -8
- alita_sdk/tools/jira/__init__.py +15 -10
- alita_sdk/tools/keycloak/__init__.py +10 -8
- alita_sdk/tools/localgit/__init__.py +8 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +11 -3
- alita_sdk/tools/ocr/__init__.py +10 -8
- alita_sdk/tools/openapi/__init__.py +6 -2
- alita_sdk/tools/pandas/__init__.py +9 -7
- alita_sdk/tools/postman/__init__.py +10 -11
- alita_sdk/tools/pptx/__init__.py +9 -9
- alita_sdk/tools/qtest/__init__.py +9 -8
- alita_sdk/tools/rally/__init__.py +9 -8
- alita_sdk/tools/report_portal/__init__.py +11 -9
- alita_sdk/tools/salesforce/__init__.py +9 -9
- alita_sdk/tools/servicenow/__init__.py +10 -8
- alita_sdk/tools/sharepoint/__init__.py +9 -8
- alita_sdk/tools/slack/__init__.py +8 -7
- alita_sdk/tools/sql/__init__.py +9 -8
- alita_sdk/tools/testio/__init__.py +9 -8
- alita_sdk/tools/testrail/__init__.py +10 -8
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/xray/__init__.py +10 -8
- alita_sdk/tools/yagmail/__init__.py +8 -3
- alita_sdk/tools/zephyr/__init__.py +8 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +10 -8
- alita_sdk/tools/zephyr_essential/__init__.py +9 -8
- alita_sdk/tools/zephyr_scale/__init__.py +9 -8
- alita_sdk/tools/zephyr_squad/__init__.py +9 -8
- {alita_sdk-0.3.497.dist-info → alita_sdk-0.3.516.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.497.dist-info → alita_sdk-0.3.516.dist-info}/RECORD +109 -106
- {alita_sdk-0.3.497.dist-info → alita_sdk-0.3.516.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.497.dist-info → alita_sdk-0.3.516.dist-info}/entry_points.txt +0 -0
- {alita_sdk-0.3.497.dist-info → alita_sdk-0.3.516.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.497.dist-info → alita_sdk-0.3.516.dist-info}/top_level.txt +0 -0
alita_sdk/cli/inventory.py
CHANGED
|
@@ -1048,209 +1048,26 @@ def _get_checkpoint_path(graph: str, source_name: str) -> str:
|
|
|
1048
1048
|
|
|
1049
1049
|
|
|
1050
1050
|
def _load_toolkit_config(toolkit_path: str) -> Dict[str, Any]:
|
|
1051
|
-
"""
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
Supports environment variable substitution for values like ${GITHUB_PAT}.
|
|
1055
|
-
"""
|
|
1056
|
-
with open(toolkit_path, 'r') as f:
|
|
1057
|
-
config = json.load(f)
|
|
1058
|
-
|
|
1059
|
-
# Recursively resolve environment variables
|
|
1060
|
-
def resolve_env_vars(obj):
|
|
1061
|
-
if isinstance(obj, str):
|
|
1062
|
-
# Match ${VAR_NAME} pattern
|
|
1063
|
-
pattern = r'\$\{([^}]+)\}'
|
|
1064
|
-
matches = re.findall(pattern, obj)
|
|
1065
|
-
for var_name in matches:
|
|
1066
|
-
env_value = os.environ.get(var_name, '')
|
|
1067
|
-
obj = obj.replace(f'${{{var_name}}}', env_value)
|
|
1068
|
-
return obj
|
|
1069
|
-
elif isinstance(obj, dict):
|
|
1070
|
-
return {k: resolve_env_vars(v) for k, v in obj.items()}
|
|
1071
|
-
elif isinstance(obj, list):
|
|
1072
|
-
return [resolve_env_vars(item) for item in obj]
|
|
1073
|
-
return obj
|
|
1074
|
-
|
|
1075
|
-
return resolve_env_vars(config)
|
|
1051
|
+
"""Deprecated: Use alita_sdk.community.inventory.toolkit_utils.load_toolkit_config instead."""
|
|
1052
|
+
from alita_sdk.community.inventory.toolkit_utils import load_toolkit_config
|
|
1053
|
+
return load_toolkit_config(toolkit_path)
|
|
1076
1054
|
|
|
1077
1055
|
|
|
1078
1056
|
def _get_llm(ctx, model: Optional[str] = None, temperature: float = 0.0):
|
|
1079
|
-
"""
|
|
1057
|
+
"""Deprecated: Use alita_sdk.community.inventory.toolkit_utils.get_llm_for_config instead."""
|
|
1080
1058
|
from .cli import get_client
|
|
1059
|
+
from alita_sdk.community.inventory.toolkit_utils import get_llm_for_config
|
|
1081
1060
|
|
|
1082
|
-
# Get Alita client - this will raise ClickException if not configured
|
|
1083
1061
|
client = get_client(ctx)
|
|
1084
|
-
|
|
1085
|
-
# Get model name from parameter or default
|
|
1086
|
-
model_name = model or 'gpt-4o-mini'
|
|
1087
|
-
|
|
1088
|
-
# Use client.get_llm() which is the actual method
|
|
1089
|
-
return client.get_llm(
|
|
1090
|
-
model_name=model_name,
|
|
1091
|
-
model_config={
|
|
1092
|
-
'temperature': temperature,
|
|
1093
|
-
'max_tokens': 4096
|
|
1094
|
-
}
|
|
1095
|
-
)
|
|
1062
|
+
return get_llm_for_config(client, model=model, temperature=temperature)
|
|
1096
1063
|
|
|
1097
1064
|
|
|
1098
1065
|
def _get_source_toolkit(toolkit_config: Dict[str, Any]):
|
|
1099
|
-
"""
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
Uses the SDK's toolkit factory pattern - all toolkits extend BaseCodeToolApiWrapper
|
|
1103
|
-
or BaseVectorStoreToolApiWrapper, which have loader() and chunker() methods.
|
|
1104
|
-
|
|
1105
|
-
Also supports CLI-specific toolkits like 'filesystem' for local document loading.
|
|
1066
|
+
"""Deprecated: Use alita_sdk.community.inventory.toolkit_utils.get_source_toolkit instead."""
|
|
1067
|
+
from alita_sdk.community.inventory.toolkit_utils import get_source_toolkit
|
|
1106
1068
|
|
|
1107
|
-
Args:
|
|
1108
|
-
toolkit_config: Toolkit configuration dict with 'type' and settings
|
|
1109
|
-
|
|
1110
|
-
Returns:
|
|
1111
|
-
API wrapper instance with loader() method
|
|
1112
|
-
"""
|
|
1113
|
-
source = toolkit_config.get('type')
|
|
1114
|
-
if not source:
|
|
1115
|
-
raise click.ClickException("Toolkit config missing 'type' field")
|
|
1116
|
-
|
|
1117
|
-
# Handle filesystem type (CLI-specific, not in AVAILABLE_TOOLS)
|
|
1118
|
-
if source == 'filesystem':
|
|
1119
|
-
from .tools.filesystem import FilesystemApiWrapper
|
|
1120
|
-
|
|
1121
|
-
base_directory = (
|
|
1122
|
-
toolkit_config.get('base_directory') or
|
|
1123
|
-
toolkit_config.get('path') or
|
|
1124
|
-
toolkit_config.get('git_root_dir')
|
|
1125
|
-
)
|
|
1126
|
-
|
|
1127
|
-
if not base_directory:
|
|
1128
|
-
raise click.ClickException(
|
|
1129
|
-
"Filesystem toolkit requires 'base_directory' or 'path' field"
|
|
1130
|
-
)
|
|
1131
|
-
|
|
1132
|
-
return FilesystemApiWrapper(
|
|
1133
|
-
base_directory=base_directory,
|
|
1134
|
-
recursive=toolkit_config.get('recursive', True),
|
|
1135
|
-
follow_symlinks=toolkit_config.get('follow_symlinks', False),
|
|
1136
|
-
)
|
|
1137
|
-
|
|
1138
|
-
# Handle standard SDK toolkits via AVAILABLE_TOOLS registry
|
|
1139
|
-
from alita_sdk.tools import AVAILABLE_TOOLS
|
|
1140
|
-
|
|
1141
|
-
# Check if toolkit type is available
|
|
1142
|
-
if source not in AVAILABLE_TOOLS:
|
|
1143
|
-
raise click.ClickException(
|
|
1144
|
-
f"Unknown toolkit type: {source}. "
|
|
1145
|
-
f"Available: {', '.join(list(AVAILABLE_TOOLS.keys()) + ['filesystem'])}"
|
|
1146
|
-
)
|
|
1147
|
-
|
|
1148
|
-
toolkit_info = AVAILABLE_TOOLS[source]
|
|
1149
|
-
|
|
1150
|
-
# Get the toolkit class
|
|
1151
|
-
if 'toolkit_class' not in toolkit_info:
|
|
1152
|
-
raise click.ClickException(
|
|
1153
|
-
f"Toolkit '{source}' does not have a toolkit_class registered"
|
|
1154
|
-
)
|
|
1155
|
-
|
|
1156
|
-
toolkit_class = toolkit_info['toolkit_class']
|
|
1157
|
-
|
|
1158
|
-
# Build kwargs from toolkit config - we need to map config to API wrapper params
|
|
1159
|
-
kwargs = dict(toolkit_config)
|
|
1160
|
-
|
|
1161
|
-
# Remove fields that aren't needed for the API wrapper
|
|
1162
|
-
kwargs.pop('type', None)
|
|
1163
|
-
kwargs.pop('toolkit_name', None)
|
|
1164
|
-
kwargs.pop('selected_tools', None)
|
|
1165
|
-
kwargs.pop('excluded_tools', None)
|
|
1166
|
-
|
|
1167
|
-
# Handle common config patterns - flatten nested configurations
|
|
1168
|
-
config_key = f"{source}_configuration"
|
|
1169
|
-
if config_key in kwargs:
|
|
1170
|
-
nested_config = kwargs.pop(config_key)
|
|
1171
|
-
if isinstance(nested_config, dict):
|
|
1172
|
-
kwargs.update(nested_config)
|
|
1173
|
-
|
|
1174
|
-
# Handle ADO-specific config pattern
|
|
1175
|
-
if 'ado_configuration' in kwargs:
|
|
1176
|
-
ado_config = kwargs.pop('ado_configuration')
|
|
1177
|
-
if isinstance(ado_config, dict):
|
|
1178
|
-
kwargs.update(ado_config)
|
|
1179
|
-
|
|
1180
|
-
# Expand environment variables in string values (e.g., ${GITHUB_PAT})
|
|
1181
|
-
def expand_env_vars(value):
|
|
1182
|
-
"""Recursively expand environment variables in values."""
|
|
1183
|
-
if isinstance(value, str):
|
|
1184
|
-
import re
|
|
1185
|
-
# Match ${VAR} or $VAR patterns
|
|
1186
|
-
pattern = r'\$\{([^}]+)\}|\$([A-Za-z_][A-Za-z0-9_]*)'
|
|
1187
|
-
def replace(match):
|
|
1188
|
-
var_name = match.group(1) or match.group(2)
|
|
1189
|
-
return os.environ.get(var_name, match.group(0))
|
|
1190
|
-
return re.sub(pattern, replace, value)
|
|
1191
|
-
elif isinstance(value, dict):
|
|
1192
|
-
return {k: expand_env_vars(v) for k, v in value.items()}
|
|
1193
|
-
elif isinstance(value, list):
|
|
1194
|
-
return [expand_env_vars(v) for v in value]
|
|
1195
|
-
return value
|
|
1196
|
-
|
|
1197
|
-
kwargs = expand_env_vars(kwargs)
|
|
1198
|
-
|
|
1199
|
-
# Map common field names to API wrapper expected names
|
|
1200
|
-
# GitHub: personal_access_token -> github_access_token
|
|
1201
|
-
if 'personal_access_token' in kwargs and source == 'github':
|
|
1202
|
-
kwargs['github_access_token'] = kwargs.pop('personal_access_token')
|
|
1203
|
-
|
|
1204
|
-
# GitHub: repository -> github_repository
|
|
1205
|
-
if 'repository' in kwargs and source == 'github':
|
|
1206
|
-
kwargs['github_repository'] = kwargs.pop('repository')
|
|
1207
|
-
|
|
1208
|
-
# Ensure active_branch has a default
|
|
1209
|
-
if 'active_branch' not in kwargs:
|
|
1210
|
-
kwargs['active_branch'] = kwargs.get('base_branch', 'main')
|
|
1211
|
-
|
|
1212
|
-
# Get the API wrapper class from the toolkit
|
|
1213
|
-
# Introspect toolkit to find the API wrapper class it uses
|
|
1214
1069
|
try:
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
wrapper_module = importlib.import_module(module_path)
|
|
1220
|
-
except ImportError:
|
|
1221
|
-
# Try alternate path for nested modules
|
|
1222
|
-
module_path = f"alita_sdk.tools.{source.replace('_', '.')}.api_wrapper"
|
|
1223
|
-
wrapper_module = importlib.import_module(module_path)
|
|
1224
|
-
|
|
1225
|
-
# Find the API wrapper class - look for class containing ApiWrapper/APIWrapper
|
|
1226
|
-
api_wrapper_class = None
|
|
1227
|
-
for name in dir(wrapper_module):
|
|
1228
|
-
obj = getattr(wrapper_module, name)
|
|
1229
|
-
if (isinstance(obj, type) and
|
|
1230
|
-
('ApiWrapper' in name or 'APIWrapper' in name) and
|
|
1231
|
-
name not in ('BaseCodeToolApiWrapper', 'BaseVectorStoreToolApiWrapper', 'BaseToolApiWrapper')):
|
|
1232
|
-
api_wrapper_class = obj
|
|
1233
|
-
break
|
|
1234
|
-
|
|
1235
|
-
if not api_wrapper_class:
|
|
1236
|
-
raise click.ClickException(
|
|
1237
|
-
f"Could not find API wrapper class in {module_path}"
|
|
1238
|
-
)
|
|
1239
|
-
|
|
1240
|
-
# Instantiate the API wrapper directly
|
|
1241
|
-
api_wrapper = api_wrapper_class(**kwargs)
|
|
1242
|
-
|
|
1243
|
-
# Verify it has loader method
|
|
1244
|
-
if not hasattr(api_wrapper, 'loader'):
|
|
1245
|
-
raise click.ClickException(
|
|
1246
|
-
f"API wrapper '{api_wrapper_class.__name__}' has no loader() method"
|
|
1247
|
-
)
|
|
1248
|
-
|
|
1249
|
-
return api_wrapper
|
|
1250
|
-
|
|
1251
|
-
except ImportError as e:
|
|
1252
|
-
logger.exception(f"Failed to import API wrapper for {source}")
|
|
1253
|
-
raise click.ClickException(f"Failed to import {source} API wrapper: {e}")
|
|
1254
|
-
except Exception as e:
|
|
1255
|
-
logger.exception(f"Failed to instantiate toolkit {source}")
|
|
1256
|
-
raise click.ClickException(f"Failed to create {source} toolkit: {e}")
|
|
1070
|
+
return get_source_toolkit(toolkit_config)
|
|
1071
|
+
except ValueError as e:
|
|
1072
|
+
# Convert ValueError to ClickException for CLI context
|
|
1073
|
+
raise click.ClickException(str(e))
|
|
@@ -85,6 +85,13 @@ from .ingestion import (
|
|
|
85
85
|
# Retrieval Toolkit - for querying graphs
|
|
86
86
|
from .retrieval import InventoryRetrievalApiWrapper
|
|
87
87
|
|
|
88
|
+
# Toolkit utilities - for configuration and instantiation
|
|
89
|
+
from .toolkit_utils import (
|
|
90
|
+
load_toolkit_config,
|
|
91
|
+
get_llm_for_config,
|
|
92
|
+
get_source_toolkit,
|
|
93
|
+
)
|
|
94
|
+
|
|
88
95
|
# Core graph types
|
|
89
96
|
from .knowledge_graph import KnowledgeGraph, Citation
|
|
90
97
|
|
|
@@ -187,6 +194,11 @@ __all__ = [
|
|
|
187
194
|
'InventoryRetrievalToolkit',
|
|
188
195
|
'InventoryRetrievalApiWrapper',
|
|
189
196
|
|
|
197
|
+
# Toolkit utilities
|
|
198
|
+
'load_toolkit_config',
|
|
199
|
+
'get_llm_for_config',
|
|
200
|
+
'get_source_toolkit',
|
|
201
|
+
|
|
190
202
|
# Core types
|
|
191
203
|
'KnowledgeGraph',
|
|
192
204
|
'Citation',
|
|
@@ -15,7 +15,7 @@ from pydantic import BaseModel, Field, ConfigDict, create_model
|
|
|
15
15
|
|
|
16
16
|
from .retrieval import InventoryRetrievalApiWrapper
|
|
17
17
|
from ...tools.base.tool import BaseAction
|
|
18
|
-
from ...tools.utils import clean_string,
|
|
18
|
+
from ...tools.utils import clean_string, get_max_toolkit_length
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class InventoryRetrievalToolkit(BaseToolkit):
|
|
@@ -144,17 +144,21 @@ class InventoryRetrievalToolkit(BaseToolkit):
|
|
|
144
144
|
# Build tool mapping
|
|
145
145
|
tool_map = {t['name']: t for t in available_tools}
|
|
146
146
|
|
|
147
|
-
#
|
|
148
|
-
|
|
147
|
+
# Use clean toolkit name for context (max 1000 chars in description)
|
|
148
|
+
toolkit_context = f" [Toolkit: {clean_string(toolkit_name, 0)}]" if toolkit_name else ''
|
|
149
149
|
|
|
150
150
|
tools = []
|
|
151
151
|
for tool_name in selected_tools:
|
|
152
152
|
if tool_name in tool_map:
|
|
153
153
|
tool_info = tool_map[tool_name]
|
|
154
|
+
# Add toolkit context to description with character limit
|
|
155
|
+
description = tool_info['description']
|
|
156
|
+
if toolkit_context and len(description + toolkit_context) <= 1000:
|
|
157
|
+
description = description + toolkit_context
|
|
154
158
|
tools.append(BaseAction(
|
|
155
159
|
api_wrapper=api_wrapper,
|
|
156
|
-
name=
|
|
157
|
-
description=
|
|
160
|
+
name=tool_name,
|
|
161
|
+
description=description,
|
|
158
162
|
args_schema=tool_info['args_schema']
|
|
159
163
|
))
|
|
160
164
|
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Toolkit configuration and instantiation utilities for inventory ingestion.
|
|
3
|
+
|
|
4
|
+
This module provides functions to load toolkit configurations, instantiate source
|
|
5
|
+
toolkits from various sources (filesystem, GitHub, ADO), and get LLM instances
|
|
6
|
+
for entity extraction.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, Optional
|
|
14
|
+
|
|
15
|
+
from alita_sdk.alita_client import AlitaClient
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_toolkit_config(toolkit_path: str) -> Dict[str, Any]:
|
|
19
|
+
"""
|
|
20
|
+
Load and parse a toolkit config JSON file.
|
|
21
|
+
|
|
22
|
+
Supports environment variable substitution for values like ${GITHUB_PAT}.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
toolkit_path: Path to the toolkit configuration JSON file
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Dictionary containing the parsed and environment-resolved configuration
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> config = load_toolkit_config("configs/github_toolkit.json")
|
|
32
|
+
>>> config['type']
|
|
33
|
+
'github'
|
|
34
|
+
"""
|
|
35
|
+
with open(toolkit_path, 'r') as f:
|
|
36
|
+
config = json.load(f)
|
|
37
|
+
|
|
38
|
+
# Recursively resolve environment variables
|
|
39
|
+
def resolve_env_vars(obj):
|
|
40
|
+
if isinstance(obj, str):
|
|
41
|
+
# Match ${VAR_NAME} pattern
|
|
42
|
+
pattern = r'\$\{([^}]+)\}'
|
|
43
|
+
matches = re.findall(pattern, obj)
|
|
44
|
+
for var_name in matches:
|
|
45
|
+
env_value = os.environ.get(var_name, '')
|
|
46
|
+
obj = obj.replace(f'${{{var_name}}}', env_value)
|
|
47
|
+
return obj
|
|
48
|
+
elif isinstance(obj, dict):
|
|
49
|
+
return {k: resolve_env_vars(v) for k, v in obj.items()}
|
|
50
|
+
elif isinstance(obj, list):
|
|
51
|
+
return [resolve_env_vars(item) for item in obj]
|
|
52
|
+
return obj
|
|
53
|
+
|
|
54
|
+
return resolve_env_vars(config)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_llm_for_config(
|
|
58
|
+
client: AlitaClient,
|
|
59
|
+
model: Optional[str] = None,
|
|
60
|
+
temperature: float = 0.0
|
|
61
|
+
):
|
|
62
|
+
"""
|
|
63
|
+
Get LLM instance from Alita client for entity extraction.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
client: AlitaClient instance
|
|
67
|
+
model: Model name (defaults to 'gpt-4o-mini' if not specified)
|
|
68
|
+
temperature: Temperature for the model (default 0.0 for deterministic output)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
LLM instance configured with the specified model and parameters
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
>>> client = AlitaClient(...)
|
|
75
|
+
>>> llm = get_llm_for_config(client, model='gpt-4o', temperature=0.0)
|
|
76
|
+
"""
|
|
77
|
+
model_name = model or 'gpt-4o-mini'
|
|
78
|
+
|
|
79
|
+
return client.get_llm(
|
|
80
|
+
model_name=model_name,
|
|
81
|
+
model_config={
|
|
82
|
+
'temperature': temperature,
|
|
83
|
+
'max_tokens': 4096
|
|
84
|
+
}
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_source_toolkit(toolkit_config: Dict[str, Any]):
|
|
89
|
+
"""
|
|
90
|
+
Instantiate a source toolkit from configuration.
|
|
91
|
+
|
|
92
|
+
Supports filesystem, GitHub, and Azure DevOps (ADO) toolkit types. For SDK-based
|
|
93
|
+
toolkits (GitHub, ADO), automatically handles configuration mapping and toolkit
|
|
94
|
+
instantiation from the registry.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
toolkit_config: Toolkit configuration dictionary with 'type' key
|
|
98
|
+
and type-specific parameters
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Instantiated toolkit object ready for ingestion
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
ValueError: If toolkit type is unsupported or configuration is invalid
|
|
105
|
+
|
|
106
|
+
Example:
|
|
107
|
+
>>> # Filesystem toolkit
|
|
108
|
+
>>> config = {'type': 'filesystem', 'base_path': '/path/to/code'}
|
|
109
|
+
>>> toolkit = get_source_toolkit(config)
|
|
110
|
+
|
|
111
|
+
>>> # GitHub toolkit
|
|
112
|
+
>>> config = {
|
|
113
|
+
... 'type': 'github',
|
|
114
|
+
... 'github_token': 'ghp_...',
|
|
115
|
+
... 'github_repository': 'owner/repo',
|
|
116
|
+
... 'github_branch': 'main'
|
|
117
|
+
... }
|
|
118
|
+
>>> toolkit = get_source_toolkit(config)
|
|
119
|
+
"""
|
|
120
|
+
from alita_sdk.community.inventory.filesystem_toolkit import FilesystemToolkit
|
|
121
|
+
from alita_sdk.community.toolkits import AVAILABLE_TOOLS
|
|
122
|
+
|
|
123
|
+
toolkit_type = toolkit_config.get('type')
|
|
124
|
+
|
|
125
|
+
if toolkit_type == 'filesystem':
|
|
126
|
+
base_path = toolkit_config.get('base_path')
|
|
127
|
+
if not base_path:
|
|
128
|
+
raise ValueError("Filesystem toolkit requires 'base_path' configuration")
|
|
129
|
+
return FilesystemToolkit(base_path=Path(base_path))
|
|
130
|
+
|
|
131
|
+
# Handle SDK toolkits (GitHub, ADO)
|
|
132
|
+
if toolkit_type not in AVAILABLE_TOOLS:
|
|
133
|
+
raise ValueError(
|
|
134
|
+
f"Unknown toolkit type: {toolkit_type}. "
|
|
135
|
+
f"Available types: filesystem, {', '.join(AVAILABLE_TOOLS.keys())}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
toolkit_class = AVAILABLE_TOOLS[toolkit_type]
|
|
139
|
+
|
|
140
|
+
# Flatten nested config if needed
|
|
141
|
+
config_for_init = {}
|
|
142
|
+
for key, value in toolkit_config.items():
|
|
143
|
+
if key == 'type':
|
|
144
|
+
continue
|
|
145
|
+
if isinstance(value, dict):
|
|
146
|
+
# Flatten nested dicts
|
|
147
|
+
config_for_init.update(value)
|
|
148
|
+
else:
|
|
149
|
+
config_for_init[key] = value
|
|
150
|
+
|
|
151
|
+
# Map field names for specific toolkit types
|
|
152
|
+
if toolkit_type == 'github':
|
|
153
|
+
field_mapping = {
|
|
154
|
+
'github_token': 'token',
|
|
155
|
+
'github_repository': 'repository',
|
|
156
|
+
'github_branch': 'branch'
|
|
157
|
+
}
|
|
158
|
+
config_for_init = {
|
|
159
|
+
field_mapping.get(k, k): v
|
|
160
|
+
for k, v in config_for_init.items()
|
|
161
|
+
}
|
|
162
|
+
elif toolkit_type == 'ado':
|
|
163
|
+
field_mapping = {
|
|
164
|
+
'ado_token': 'token',
|
|
165
|
+
'ado_organization': 'organization',
|
|
166
|
+
'ado_project': 'project',
|
|
167
|
+
'ado_repository': 'repository',
|
|
168
|
+
'ado_branch': 'branch'
|
|
169
|
+
}
|
|
170
|
+
config_for_init = {
|
|
171
|
+
field_mapping.get(k, k): v
|
|
172
|
+
for k, v in config_for_init.items()
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
# Instantiate toolkit
|
|
176
|
+
return toolkit_class(**config_for_init)
|
alita_sdk/configurations/ado.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from typing import Optional
|
|
3
|
+
from urllib.parse import quote
|
|
2
4
|
|
|
5
|
+
import requests
|
|
3
6
|
from pydantic import BaseModel, ConfigDict, Field, SecretStr
|
|
4
7
|
|
|
5
8
|
|
|
@@ -19,6 +22,147 @@ class AdoConfiguration(BaseModel):
|
|
|
19
22
|
project: str = Field(description="ADO project")
|
|
20
23
|
token: Optional[SecretStr] = Field(description="ADO Token")
|
|
21
24
|
|
|
25
|
+
@staticmethod
|
|
26
|
+
def check_connection(settings: dict) -> str | None:
|
|
27
|
+
"""
|
|
28
|
+
Test the connection to Azure DevOps API.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
settings: Dictionary containing 'organization_url', 'project', and optionally 'token'
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
None if connection is successful, error message string otherwise
|
|
35
|
+
"""
|
|
36
|
+
organization_url = settings.get("organization_url")
|
|
37
|
+
if organization_url is None or organization_url == "":
|
|
38
|
+
if organization_url == "":
|
|
39
|
+
return "Organization URL cannot be empty"
|
|
40
|
+
return "Organization URL is required"
|
|
41
|
+
|
|
42
|
+
# Validate organization URL format
|
|
43
|
+
if not isinstance(organization_url, str):
|
|
44
|
+
return "Organization URL must be a string"
|
|
45
|
+
|
|
46
|
+
organization_url = organization_url.strip()
|
|
47
|
+
if not organization_url:
|
|
48
|
+
return "Organization URL cannot be empty"
|
|
49
|
+
|
|
50
|
+
if not organization_url.startswith(("http://", "https://")):
|
|
51
|
+
return "Organization URL must start with http:// or https://"
|
|
52
|
+
|
|
53
|
+
# Remove trailing slash for consistency
|
|
54
|
+
organization_url = organization_url.rstrip("/")
|
|
55
|
+
|
|
56
|
+
project = settings.get("project")
|
|
57
|
+
if project is None or project == "":
|
|
58
|
+
if project == "":
|
|
59
|
+
return "Project cannot be empty"
|
|
60
|
+
return "Project is required"
|
|
61
|
+
|
|
62
|
+
# Validate project format
|
|
63
|
+
if not isinstance(project, str):
|
|
64
|
+
return "Project must be a string"
|
|
65
|
+
|
|
66
|
+
project = project.strip()
|
|
67
|
+
if not project:
|
|
68
|
+
return "Project cannot be empty"
|
|
69
|
+
|
|
70
|
+
token = settings.get("token")
|
|
71
|
+
|
|
72
|
+
# Extract secret value if it's a SecretStr
|
|
73
|
+
if token is not None and hasattr(token, "get_secret_value"):
|
|
74
|
+
token = token.get_secret_value()
|
|
75
|
+
|
|
76
|
+
# Validate token if provided
|
|
77
|
+
if token is not None and (not token or not token.strip()):
|
|
78
|
+
return "Token cannot be empty if provided"
|
|
79
|
+
|
|
80
|
+
# NOTE on verification strategy:
|
|
81
|
+
# - Project endpoints can work anonymously for public projects.
|
|
82
|
+
# That makes them a weak signal for detecting a bad/expired token.
|
|
83
|
+
# - If a token is provided, first validate it against a profile endpoint
|
|
84
|
+
# that requires authentication, then check project access.
|
|
85
|
+
|
|
86
|
+
# Strictly require a canonical organization URL so we can build reliable API URLs.
|
|
87
|
+
# Supported formats:
|
|
88
|
+
# - https://dev.azure.com/<org>
|
|
89
|
+
# - https://<org>.visualstudio.com
|
|
90
|
+
org_name: str | None = None
|
|
91
|
+
org_url_kind: str | None = None # 'dev.azure.com' | '*.visualstudio.com'
|
|
92
|
+
m = re.match(r"^https?://dev\.azure\.com/(?P<org>[^/]+)$", organization_url, flags=re.IGNORECASE)
|
|
93
|
+
if m:
|
|
94
|
+
org_name = m.group('org')
|
|
95
|
+
org_url_kind = 'dev.azure.com'
|
|
96
|
+
else:
|
|
97
|
+
m = re.match(r"^https?://(?P<org>[^/.]+)\.visualstudio\.com$", organization_url, flags=re.IGNORECASE)
|
|
98
|
+
if m:
|
|
99
|
+
org_name = m.group('org')
|
|
100
|
+
org_url_kind = '*.visualstudio.com'
|
|
101
|
+
|
|
102
|
+
if org_name is None:
|
|
103
|
+
return (
|
|
104
|
+
"Organization URL format is invalid. Use 'https://dev.azure.com/<org>' "
|
|
105
|
+
"(recommended) or 'https://<org>.visualstudio.com'."
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
project_encoded = quote(project, safe="")
|
|
109
|
+
project_url = f"{organization_url}/_apis/projects/{project_encoded}?api-version=7.0"
|
|
110
|
+
# Auth-required endpoint to validate PAT (works regardless of project visibility)
|
|
111
|
+
if org_url_kind == 'dev.azure.com':
|
|
112
|
+
profile_url = f"https://vssps.dev.azure.com/{org_name}/_apis/profile/profiles/me?api-version=7.1-preview.3"
|
|
113
|
+
else:
|
|
114
|
+
# For legacy org URLs, use the matching vssps host
|
|
115
|
+
profile_url = f"https://{org_name}.vssps.visualstudio.com/_apis/profile/profiles/me?api-version=7.1-preview.3"
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
headers = {}
|
|
119
|
+
if token:
|
|
120
|
+
# Use Basic Auth with PAT token (username can be empty)
|
|
121
|
+
from requests.auth import HTTPBasicAuth
|
|
122
|
+
auth = HTTPBasicAuth("", token)
|
|
123
|
+
|
|
124
|
+
# 1) Validate token first (strong signal)
|
|
125
|
+
profile_resp = requests.get(profile_url, auth=auth, timeout=10)
|
|
126
|
+
if profile_resp.status_code == 200:
|
|
127
|
+
pass
|
|
128
|
+
elif profile_resp.status_code == 401:
|
|
129
|
+
return "Invalid or expired token (PAT). Please generate a new token and try again."
|
|
130
|
+
elif profile_resp.status_code == 403:
|
|
131
|
+
return "Token is valid but lacks permission to access profile. Check PAT scopes/permissions."
|
|
132
|
+
elif profile_resp.status_code == 404:
|
|
133
|
+
return "Organization not found. Verify the Organization URL."
|
|
134
|
+
else:
|
|
135
|
+
return f"Token validation failed (HTTP {profile_resp.status_code})."
|
|
136
|
+
|
|
137
|
+
# 2) Validate project access
|
|
138
|
+
response = requests.get(project_url, auth=auth, timeout=10)
|
|
139
|
+
else:
|
|
140
|
+
# Try without authentication (works for public projects)
|
|
141
|
+
response = requests.get(project_url, headers=headers, timeout=10)
|
|
142
|
+
|
|
143
|
+
if response.status_code == 200:
|
|
144
|
+
return None # Connection successful
|
|
145
|
+
elif response.status_code == 401:
|
|
146
|
+
if token:
|
|
147
|
+
return "Not authorized. Token may be invalid for this organization or expired."
|
|
148
|
+
else:
|
|
149
|
+
return "Authentication required - project may be private"
|
|
150
|
+
elif response.status_code == 403:
|
|
151
|
+
return "Access forbidden - token may lack required permissions for this project"
|
|
152
|
+
elif response.status_code == 404:
|
|
153
|
+
return f"Project '{project}' not found or not accessible. Check project name and organization URL."
|
|
154
|
+
else:
|
|
155
|
+
return f"Connection failed (HTTP {response.status_code})."
|
|
156
|
+
|
|
157
|
+
except requests.exceptions.Timeout:
|
|
158
|
+
return "Connection timeout - Azure DevOps did not respond within 10 seconds"
|
|
159
|
+
except requests.exceptions.ConnectionError:
|
|
160
|
+
return "Connection error - unable to reach Azure DevOps. Check the Organization URL and your network."
|
|
161
|
+
except requests.exceptions.RequestException as e:
|
|
162
|
+
return f"Request failed: {str(e)}"
|
|
163
|
+
except Exception:
|
|
164
|
+
return "Unexpected error during Azure DevOps connection check"
|
|
165
|
+
|
|
22
166
|
|
|
23
167
|
class AdoReposConfiguration(BaseModel):
|
|
24
168
|
model_config = ConfigDict(
|