alita-sdk 0.3.465__py3-none-any.whl → 0.3.497__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (103) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +83 -1
  3. alita_sdk/cli/agent_loader.py +22 -4
  4. alita_sdk/cli/agent_ui.py +13 -3
  5. alita_sdk/cli/agents.py +1876 -186
  6. alita_sdk/cli/callbacks.py +96 -25
  7. alita_sdk/cli/cli.py +10 -1
  8. alita_sdk/cli/config.py +151 -9
  9. alita_sdk/cli/context/__init__.py +30 -0
  10. alita_sdk/cli/context/cleanup.py +198 -0
  11. alita_sdk/cli/context/manager.py +731 -0
  12. alita_sdk/cli/context/message.py +285 -0
  13. alita_sdk/cli/context/strategies.py +289 -0
  14. alita_sdk/cli/context/token_estimation.py +127 -0
  15. alita_sdk/cli/input_handler.py +167 -4
  16. alita_sdk/cli/inventory.py +1256 -0
  17. alita_sdk/cli/toolkit.py +14 -17
  18. alita_sdk/cli/toolkit_loader.py +35 -5
  19. alita_sdk/cli/tools/__init__.py +8 -1
  20. alita_sdk/cli/tools/filesystem.py +910 -64
  21. alita_sdk/cli/tools/planning.py +143 -157
  22. alita_sdk/cli/tools/terminal.py +154 -20
  23. alita_sdk/community/__init__.py +64 -8
  24. alita_sdk/community/inventory/__init__.py +224 -0
  25. alita_sdk/community/inventory/config.py +257 -0
  26. alita_sdk/community/inventory/enrichment.py +2137 -0
  27. alita_sdk/community/inventory/extractors.py +1469 -0
  28. alita_sdk/community/inventory/ingestion.py +3172 -0
  29. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  30. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  31. alita_sdk/community/inventory/parsers/base.py +295 -0
  32. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  33. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  34. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  35. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  36. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  37. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  38. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  39. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  40. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  41. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  42. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  43. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  44. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  45. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  46. alita_sdk/community/inventory/patterns/loader.py +348 -0
  47. alita_sdk/community/inventory/patterns/registry.py +198 -0
  48. alita_sdk/community/inventory/presets.py +535 -0
  49. alita_sdk/community/inventory/retrieval.py +1403 -0
  50. alita_sdk/community/inventory/toolkit.py +169 -0
  51. alita_sdk/community/inventory/visualize.py +1370 -0
  52. alita_sdk/configurations/bitbucket.py +0 -3
  53. alita_sdk/runtime/clients/client.py +108 -31
  54. alita_sdk/runtime/langchain/assistant.py +4 -2
  55. alita_sdk/runtime/langchain/constants.py +3 -1
  56. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  57. alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
  58. alita_sdk/runtime/langchain/langraph_agent.py +123 -31
  59. alita_sdk/runtime/llms/preloaded.py +2 -6
  60. alita_sdk/runtime/toolkits/__init__.py +2 -0
  61. alita_sdk/runtime/toolkits/application.py +1 -1
  62. alita_sdk/runtime/toolkits/mcp.py +107 -91
  63. alita_sdk/runtime/toolkits/planning.py +173 -0
  64. alita_sdk/runtime/toolkits/tools.py +59 -7
  65. alita_sdk/runtime/tools/artifact.py +46 -17
  66. alita_sdk/runtime/tools/function.py +2 -1
  67. alita_sdk/runtime/tools/llm.py +320 -32
  68. alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
  69. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  70. alita_sdk/runtime/tools/planning/models.py +246 -0
  71. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  72. alita_sdk/runtime/tools/vectorstore_base.py +44 -9
  73. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  74. alita_sdk/runtime/utils/mcp_client.py +465 -0
  75. alita_sdk/runtime/utils/mcp_oauth.py +80 -0
  76. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  77. alita_sdk/runtime/utils/streamlit.py +6 -10
  78. alita_sdk/runtime/utils/toolkit_utils.py +14 -5
  79. alita_sdk/tools/__init__.py +54 -27
  80. alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
  81. alita_sdk/tools/base_indexer_toolkit.py +99 -20
  82. alita_sdk/tools/bitbucket/__init__.py +2 -2
  83. alita_sdk/tools/chunkers/__init__.py +3 -1
  84. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  85. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  86. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  87. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  88. alita_sdk/tools/code_indexer_toolkit.py +55 -22
  89. alita_sdk/tools/confluence/api_wrapper.py +63 -14
  90. alita_sdk/tools/elitea_base.py +86 -21
  91. alita_sdk/tools/jira/__init__.py +1 -1
  92. alita_sdk/tools/jira/api_wrapper.py +91 -40
  93. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  94. alita_sdk/tools/qtest/__init__.py +1 -1
  95. alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
  96. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  97. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  98. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +2 -1
  99. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +103 -61
  100. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
  101. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
  102. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
  103. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0
@@ -287,7 +287,6 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
287
287
  model_config={
288
288
  "temperature": 0.1,
289
289
  "max_tokens": 1000,
290
- "top_p": 1.0
291
290
  }
292
291
  )
293
292
  except Exception as e:
@@ -1256,7 +1255,6 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
1256
1255
  model_config={
1257
1256
  "temperature": 0.1,
1258
1257
  "max_tokens": 1000,
1259
- "top_p": 1.0
1260
1258
  }
1261
1259
  )
1262
1260
  except Exception as e:
@@ -1387,20 +1385,18 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
1387
1385
  help="Maximum number of tokens in the AI response"
1388
1386
  )
1389
1387
 
1390
- top_p = st.slider(
1391
- "Top-p:",
1392
- min_value=0.1,
1393
- max_value=1.0,
1394
- value=1.0,
1395
- step=0.1,
1396
- help="Controls diversity via nucleus sampling"
1388
+ reasoning_effort = st.selectbox(
1389
+ "Reasoning effort:",
1390
+ options=['null', 'low', 'medium', 'high'],
1391
+ index=0,
1392
+ help="Higher effort better reasoning, slower response"
1397
1393
  )
1398
1394
 
1399
1395
  # Create LLM config
1400
1396
  llm_config = {
1401
1397
  'max_tokens': max_tokens,
1402
1398
  'temperature': temperature,
1403
- 'top_p': top_p
1399
+ 'reasoning_effort': reasoning_effort
1404
1400
  }
1405
1401
 
1406
1402
  col1, col2 = st.columns([3, 1])
@@ -12,7 +12,9 @@ logger = logging.getLogger(__name__)
12
12
 
13
13
  def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
14
14
  llm_client: Any,
15
- alita_client: Optional[Any] = None) -> List[Any]:
15
+ alita_client: Optional[Any] = None,
16
+ mcp_tokens: Optional[Dict[str, Any]] = None,
17
+ use_prefix: bool = False) -> List[Any]:
16
18
  """
17
19
  Instantiate a toolkit with LLM client support.
18
20
 
@@ -22,7 +24,11 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
22
24
  Args:
23
25
  toolkit_config: Configuration dictionary for the toolkit
24
26
  llm_client: LLM client instance for tools that need LLM capabilities
25
- client: Optional additional client instance
27
+ alita_client: Optional additional client instance
28
+ mcp_tokens: Optional dictionary of MCP OAuth tokens by server URL
29
+ use_prefix: If True, tools get prefixed with toolkit_name to prevent collisions
30
+ (for agent use). If False, tools use base names only (for testing interface).
31
+ Default False for backward compatibility with testing.
26
32
 
27
33
  Returns:
28
34
  List of instantiated tools from the toolkit
@@ -52,16 +58,17 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
52
58
  toolkit_type = toolkit_config.get('type', toolkit_name.lower())
53
59
 
54
60
  # Create a tool configuration dict with required fields
61
+ # Note: MCP toolkit always requires toolkit_name, other toolkits respect use_prefix flag
55
62
  tool_config = {
56
63
  'id': toolkit_config.get('id', random.randint(1, 1000000)),
57
64
  'type': toolkit_config.get('type', toolkit_type),
58
65
  'settings': settings,
59
- 'toolkit_name': toolkit_name
66
+ 'toolkit_name': toolkit_name if (use_prefix or toolkit_type == 'mcp') else None
60
67
  }
61
68
 
62
69
  # Get tools using the toolkit configuration with clients
63
- # Parameter order: get_tools(tools_list, alita_client, llm, memory_store)
64
- tools = get_tools([tool_config], alita_client, llm_client)
70
+ # Parameter order: get_tools(tools_list, alita_client, llm, memory_store, debug_mode, mcp_tokens)
71
+ tools = get_tools([tool_config], alita_client, llm_client, mcp_tokens=mcp_tokens)
65
72
 
66
73
  if not tools:
67
74
  logger.warning(f"No tools returned for toolkit {toolkit_name}")
@@ -73,9 +80,11 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
73
80
  except Exception as e:
74
81
  # Re-raise McpAuthorizationRequired without logging as error
75
82
  from ..utils.mcp_oauth import McpAuthorizationRequired
83
+
76
84
  if isinstance(e, McpAuthorizationRequired):
77
85
  logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization")
78
86
  raise
87
+
79
88
  # Log and re-raise other errors
80
89
  logger.error(f"Error instantiating toolkit {toolkit_name} with client: {str(e)}")
81
90
  raise
@@ -13,6 +13,30 @@ AVAILABLE_TOOLS = {}
13
13
  AVAILABLE_TOOLKITS = {}
14
14
  FAILED_IMPORTS = {}
15
15
 
16
+
17
+ def _inject_toolkit_id(tool_conf: dict, toolkit_tools) -> None:
18
+ """Inject `toolkit_id` into tools that expose `api_wrapper.toolkit_id`.
19
+
20
+ This reads 'id' from the tool configuration and, if it is an integer,
21
+ assigns it to the 'toolkit_id' attribute of the 'api_wrapper' for each
22
+ tool in 'toolkit_tools' that supports it.
23
+
24
+ Args:
25
+ tool_conf: Raw tool configuration item from 'tools_list'.
26
+ toolkit_tools: List of instantiated tools produced by a toolkit.
27
+ """
28
+ toolkit_id = tool_conf.get('id')
29
+ if isinstance(toolkit_id, int):
30
+ for t in toolkit_tools:
31
+ if hasattr(t, 'api_wrapper') and hasattr(t.api_wrapper, 'toolkit_id'):
32
+ t.api_wrapper.toolkit_id = toolkit_id
33
+ else:
34
+ logger.error(
35
+ f"Toolkit ID is missing or not an integer for tool "
36
+ f"`{tool_conf.get('type', '')}` with name `{tool_conf.get('name', '')}`"
37
+ )
38
+
39
+
16
40
  def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class_name=None):
17
41
  """Safely import a tool module and register available functions/classes."""
18
42
  try:
@@ -34,6 +58,7 @@ def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class
34
58
  FAILED_IMPORTS[tool_name] = str(e)
35
59
  logger.debug(f"Failed to import {tool_name}: {e}")
36
60
 
61
+
37
62
  # Safe imports for all tools
38
63
  _safe_import_tool('github', 'github', 'get_tools', 'AlitaGitHubToolkit')
39
64
  _safe_import_tool('openapi', 'openapi', 'get_tools')
@@ -90,11 +115,19 @@ available_count = len(AVAILABLE_TOOLS)
90
115
  total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
91
116
  logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
92
117
 
118
+ # Import community module to trigger community toolkit registration
119
+ try:
120
+ from alita_sdk import community # noqa: F401
121
+ logger.debug("Community toolkits registered successfully")
122
+ except ImportError as e:
123
+ logger.debug(f"Community module not available: {e}")
124
+
93
125
 
94
126
  def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
95
127
  tools = []
96
128
 
97
129
  for tool in tools_list:
130
+ toolkit_tools = []
98
131
  settings = tool.get('settings')
99
132
 
100
133
  # Skip tools without settings early
@@ -116,53 +149,47 @@ def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args,
116
149
 
117
150
  # Set pgvector collection schema if present
118
151
  if settings.get('pgvector_configuration'):
119
- settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
152
+ # Use tool id if available, otherwise use toolkit_name or type as fallback
153
+ collection_id = tool.get('id') or tool.get('toolkit_name') or tool_type
154
+ settings['pgvector_configuration']['collection_schema'] = str(collection_id)
120
155
 
121
156
  # Handle ADO special cases
122
157
  if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
123
- tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
124
- continue
125
-
126
- # Handle ADO repos aliases
127
- if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
158
+ toolkit_tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
159
+ elif tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
128
160
  try:
129
- tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
161
+ toolkit_tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
130
162
  except Exception as e:
131
163
  logger.error(f"Error getting ADO repos tools: {e}")
132
- continue
133
-
134
- # Skip MCP toolkit - it's handled by runtime/toolkits/tools.py to avoid duplicate loading
135
- if tool_type == 'mcp':
164
+ elif tool_type == 'mcp':
136
165
  logger.debug(f"Skipping MCP toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
137
- continue
138
-
139
- # Handle standard tools
140
- if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
166
+ elif tool_type == 'planning':
167
+ logger.debug(f"Skipping planning toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
168
+ elif tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
141
169
  try:
142
- tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
170
+ toolkit_tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
143
171
  except Exception as e:
144
172
  logger.error(f"Error getting tools for {tool_type}: {e}")
145
173
  raise ToolException(f"Error getting tools for {tool_type}: {e}")
146
- continue
147
-
148
- # Handle custom modules
149
- if settings.get("module"):
174
+ elif settings.get("module"):
150
175
  try:
151
176
  mod = import_module(settings.pop("module"))
152
177
  tkitclass = getattr(mod, settings.pop("class"))
153
178
  get_toolkit_params = settings.copy()
154
179
  get_toolkit_params["name"] = tool.get("name")
155
180
  toolkit = tkitclass.get_toolkit(**get_toolkit_params)
156
- tools.extend(toolkit.get_tools())
181
+ toolkit_tools.extend(toolkit.get_tools())
157
182
  except Exception as e:
158
183
  logger.error(f"Error in getting custom toolkit: {e}")
159
- continue
160
-
161
- # Tool not available
162
- if tool_type in FAILED_IMPORTS:
163
- logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
164
184
  else:
165
- logger.warning(f"Unknown tool type: {tool_type}")
185
+ if tool_type in FAILED_IMPORTS:
186
+ logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
187
+ else:
188
+ logger.warning(f"Unknown tool type: {tool_type}")
189
+ #
190
+ # Always inject toolkit_id to each tool
191
+ _inject_toolkit_id(tool, toolkit_tools)
192
+ tools.extend(toolkit_tools)
166
193
 
167
194
  return tools
168
195
 
@@ -111,8 +111,7 @@ class ArgsSchema(Enum):
111
111
  Field(
112
112
  description=(
113
113
  "Branch to be used for read file operation."
114
- ),
115
- default=None
114
+ )
116
115
  ),
117
116
  )
118
117
  )
@@ -2,6 +2,7 @@ import copy
2
2
  import json
3
3
  import logging
4
4
  import time
5
+ from enum import Enum
5
6
  from typing import Any, Optional, List, Dict, Generator
6
7
 
7
8
  from langchain_core.callbacks import dispatch_custom_event
@@ -16,7 +17,17 @@ from ..runtime.utils.utils import IndexerKeywords
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
19
- DEFAULT_CUT_OFF = 0.2
20
+ DEFAULT_CUT_OFF = 0.1
21
+ INDEX_META_UPDATE_INTERVAL = 600.0
22
+
23
+ class IndexTools(str, Enum):
24
+ """Enum for index-related tool names."""
25
+ INDEX_DATA = "index_data"
26
+ SEARCH_INDEX = "search_index"
27
+ STEPBACK_SEARCH_INDEX = "stepback_search_index"
28
+ STEPBACK_SUMMARY_INDEX = "stepback_summary_index"
29
+ REMOVE_INDEX = "remove_index"
30
+ LIST_COLLECTIONS = "list_collections"
20
31
 
21
32
  # Base Vector Store Schema Models
22
33
  BaseIndexParams = create_model(
@@ -157,6 +168,16 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
157
168
  clean_index = kwargs.get("clean_index")
158
169
  chunking_tool = kwargs.get("chunking_tool")
159
170
  chunking_config = kwargs.get("chunking_config")
171
+
172
+ # Store the interval in a private dict to avoid Pydantic field errors
173
+ if not hasattr(self, "_index_meta_config"):
174
+ self._index_meta_config: Dict[str, Any] = {}
175
+
176
+ self._index_meta_config["update_interval"] = kwargs.get(
177
+ "meta_update_interval",
178
+ INDEX_META_UPDATE_INTERVAL,
179
+ )
180
+
160
181
  result = {"count": 0}
161
182
  #
162
183
  try:
@@ -164,6 +185,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
164
185
  self._clean_index(index_name)
165
186
  #
166
187
  self.index_meta_init(index_name, kwargs)
188
+ self._emit_index_event(index_name)
167
189
  #
168
190
  self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
169
191
  self._log_tool_event(f"Loading the documents to index...{kwargs}")
@@ -179,18 +201,26 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
179
201
  self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
180
202
  #
181
203
  results_count = result["count"]
182
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
204
+ # Final update should always be forced
205
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count, update_force=True)
183
206
  self._emit_index_event(index_name)
184
207
  #
185
208
  return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
186
209
  else "no new documents to index"}
187
210
  except Exception as e:
188
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
189
- self._emit_index_event(index_name, error=str(e))
211
+ # Do maximum effort at least send custom event for supposed changed status
212
+ msg = str(e)
213
+ try:
214
+ # Error update should also be forced
215
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"], update_force=True)
216
+ except Exception as ie:
217
+ logger.error(f"Failed to update index meta status to FAILED for index '{index_name}': {ie}")
218
+ msg = f"{msg}; additionally failed to update index meta status to FAILED: {ie}"
219
+ self._emit_index_event(index_name, error=msg)
190
220
  raise e
191
-
192
221
 
193
222
  def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
223
+ self._ensure_vectorstore_initialized()
194
224
  self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
195
225
  from ..runtime.langchain.interfaces.llm_processor import add_documents
196
226
  #
@@ -243,6 +273,11 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
243
273
  logger.debug(msg)
244
274
  self._log_tool_event(msg)
245
275
  result["count"] += dependent_docs_counter
276
+ # After each base document, try a non-forced meta update; throttling handled inside index_meta_update
277
+ try:
278
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_IN_PROGRESS.value, result["count"], update_force=False)
279
+ except Exception as exc: # best-effort, do not break indexing
280
+ logger.warning(f"Failed to update index meta during indexing process for index '{index_name}': {exc}")
246
281
  if pg_vector_add_docs_chunk:
247
282
  add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
248
283
 
@@ -308,6 +343,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
308
343
  log_msg: str = "Verification of documents to index started"
309
344
  ) -> Generator[Document, None, None]:
310
345
  """Generic duplicate reduction logic for documents."""
346
+ self._ensure_vectorstore_initialized()
311
347
  self._log_tool_event(log_msg, tool_name="index_documents")
312
348
  indexed_data = self._get_indexed_data(index_name)
313
349
  indexed_keys = set(indexed_data.keys())
@@ -350,7 +386,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
350
386
 
351
387
  def remove_index(self, index_name: str = ""):
352
388
  """Cleans the indexed data in the collection."""
353
- super()._clean_collection(index_name=index_name)
389
+ super()._clean_collection(index_name=index_name, including_index_meta=True)
354
390
  return (f"Collection '{index_name}' has been removed from the vector store.\n"
355
391
  f"Available collections: {self.list_collections()}") if index_name \
356
392
  else "All collections have been removed from the vector store."
@@ -463,6 +499,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
463
499
  )
464
500
 
465
501
  def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
502
+ self._ensure_vectorstore_initialized()
466
503
  index_meta = super().get_index_meta(index_name)
467
504
  if not index_meta:
468
505
  self._log_tool_event(
@@ -482,12 +519,53 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
482
519
  "updated_on": created_on,
483
520
  "task_id": None,
484
521
  "conversation_id": None,
522
+ "toolkit_id": self.toolkit_id,
485
523
  }
486
524
  metadata["history"] = json.dumps([metadata])
487
525
  index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
488
526
  add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
489
527
 
490
- def index_meta_update(self, index_name: str, state: str, result: int):
528
+ def index_meta_update(self, index_name: str, state: str, result: int, update_force: bool = True, interval: Optional[float] = None):
529
+ """Update `index_meta` document with optional time-based throttling.
530
+
531
+ Args:
532
+ index_name: Index name to update meta for.
533
+ state: New state value for the `index_meta` record.
534
+ result: Number of processed documents to store in the `updated` field.
535
+ update_force: If `True`, perform the update unconditionally, ignoring throttling.
536
+ If `False`, perform the update only when the effective time interval has passed.
537
+ interval: Optional custom interval (in seconds) for this call when `update_force` is `False`.
538
+ If `None`, falls back to the value stored in `self._index_meta_config["update_interval"]`
539
+ if present, otherwise uses `INDEX_META_UPDATE_INTERVAL`.
540
+ """
541
+ self._ensure_vectorstore_initialized()
542
+ if not hasattr(self, "_index_meta_last_update_time"):
543
+ self._index_meta_last_update_time: Dict[str, float] = {}
544
+
545
+ if not update_force:
546
+ # Resolve effective interval:
547
+ # 1\) explicit arg
548
+ # 2\) value from `_index_meta_config`
549
+ # 3\) default constant
550
+ cfg_interval = None
551
+ if hasattr(self, "_index_meta_config"):
552
+ cfg_interval = self._index_meta_config.get("update_interval")
553
+
554
+ eff_interval = (
555
+ interval
556
+ if interval is not None
557
+ else (cfg_interval if cfg_interval is not None else INDEX_META_UPDATE_INTERVAL)
558
+ )
559
+
560
+ last_time = self._index_meta_last_update_time.get(index_name)
561
+ now = time.time()
562
+ if last_time is not None and (now - last_time) < eff_interval:
563
+ return
564
+ self._index_meta_last_update_time[index_name] = now
565
+ else:
566
+ # For forced updates, always refresh last update time
567
+ self._index_meta_last_update_time[index_name] = time.time()
568
+
491
569
  index_meta_raw = super().get_index_meta(index_name)
492
570
  from ..runtime.langchain.interfaces.llm_processor import add_documents
493
571
  #
@@ -545,11 +623,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
545
623
  event_data = {
546
624
  "id": index_meta.get("id"),
547
625
  "index_name": index_name,
548
- "state": metadata.get("state"),
626
+ "state": "failed" if error is not None else metadata.get("state"),
549
627
  "error": error,
550
628
  "reindex": is_reindex,
551
629
  "indexed": metadata.get("indexed", 0),
552
630
  "updated": metadata.get("updated", 0),
631
+ "toolkit_id": metadata.get("toolkit_id"),
553
632
  }
554
633
 
555
634
  # Emit the event
@@ -572,8 +651,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
572
651
  """
573
652
  return [
574
653
  {
575
- "name": "index_data",
576
- "mode": "index_data",
654
+ "name": IndexTools.INDEX_DATA.value,
655
+ "mode": IndexTools.INDEX_DATA.value,
577
656
  "ref": self.index_data,
578
657
  "description": "Loads data to index.",
579
658
  "args_schema": create_model(
@@ -583,36 +662,36 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
583
662
  )
584
663
  },
585
664
  {
586
- "name": "search_index",
587
- "mode": "search_index",
665
+ "name": IndexTools.SEARCH_INDEX.value,
666
+ "mode": IndexTools.SEARCH_INDEX.value,
588
667
  "ref": self.search_index,
589
668
  "description": self.search_index.__doc__,
590
669
  "args_schema": BaseSearchParams
591
670
  },
592
671
  {
593
- "name": "stepback_search_index",
594
- "mode": "stepback_search_index",
672
+ "name": IndexTools.STEPBACK_SEARCH_INDEX.value,
673
+ "mode": IndexTools.STEPBACK_SEARCH_INDEX.value,
595
674
  "ref": self.stepback_search_index,
596
675
  "description": self.stepback_search_index.__doc__,
597
676
  "args_schema": BaseStepbackSearchParams
598
677
  },
599
678
  {
600
- "name": "stepback_summary_index",
601
- "mode": "stepback_summary_index",
679
+ "name": IndexTools.STEPBACK_SUMMARY_INDEX.value,
680
+ "mode": IndexTools.STEPBACK_SUMMARY_INDEX.value,
602
681
  "ref": self.stepback_summary_index,
603
682
  "description": self.stepback_summary_index.__doc__,
604
683
  "args_schema": BaseStepbackSearchParams
605
684
  },
606
685
  {
607
- "name": "remove_index",
608
- "mode": "remove_index",
686
+ "name": IndexTools.REMOVE_INDEX.value,
687
+ "mode": IndexTools.REMOVE_INDEX.value,
609
688
  "ref": self.remove_index,
610
689
  "description": self.remove_index.__doc__,
611
690
  "args_schema": RemoveIndexParams
612
691
  },
613
692
  {
614
- "name": "list_collections",
615
- "mode": "list_collections",
693
+ "name": IndexTools.LIST_COLLECTIONS.value,
694
+ "mode": IndexTools.LIST_COLLECTIONS.value,
616
695
  "ref": self.list_collections,
617
696
  "description": self.list_collections.__doc__,
618
697
  # No parameters
@@ -47,8 +47,8 @@ class AlitaBitbucketToolkit(BaseToolkit):
47
47
  AlitaBitbucketToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
48
48
  m = create_model(
49
49
  name,
50
- project=(str, Field(description="Project/Workspace", json_schema_extra={'configuration': True})),
51
- repository=(str, Field(description="Repository", json_schema_extra={'max_toolkit_length': AlitaBitbucketToolkit.toolkit_max_length, 'configuration': True})),
50
+ project=(str, Field(description="Project/Workspace")),
51
+ repository=(str, Field(description="Repository")),
52
52
  branch=(str, Field(description="Main branch", default="main")),
53
53
  cloud=(Optional[bool], Field(description="Hosting Option", default=None)),
54
54
  bitbucket_configuration=(BitbucketConfiguration, Field(description="Bitbucket Configuration", json_schema_extra={'configuration_types': ['bitbucket']})),
@@ -3,6 +3,7 @@ from .sematic.statistical_chunker import statistical_chunker
3
3
  from .sematic.markdown_chunker import markdown_chunker
4
4
  from .sematic.proposal_chunker import proposal_chunker
5
5
  from .sematic.json_chunker import json_chunker
6
+ from .universal_chunker import universal_chunker, chunk_single_document, get_file_type
6
7
  from .models import StatisticalChunkerConfig, MarkdownChunkerConfig, ProposalChunkerConfig
7
8
 
8
9
  __all__ = {
@@ -10,7 +11,8 @@ __all__ = {
10
11
  'statistical': statistical_chunker,
11
12
  'markdown': markdown_chunker,
12
13
  'proposal': proposal_chunker,
13
- 'json': json_chunker
14
+ 'json': json_chunker,
15
+ 'universal': universal_chunker,
14
16
  }
15
17
 
16
18
  __confluence_chunkers__ = {
@@ -17,6 +17,7 @@ def json_chunker(file_content_generator: Generator[Document, None, None], config
17
17
  for chunk in chunks:
18
18
  metadata = doc.metadata.copy()
19
19
  metadata['chunk_id'] = chunk_id
20
+ metadata['method_name'] = 'json'
20
21
  chunk_id += 1
21
22
  yield Document(page_content=json.dumps(chunk), metadata=metadata)
22
23
  except Exception as e: