alita-sdk 0.3.390__py3-none-any.whl → 0.3.417__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. alita_sdk/configurations/bitbucket.py +95 -0
  2. alita_sdk/configurations/confluence.py +96 -1
  3. alita_sdk/configurations/gitlab.py +79 -0
  4. alita_sdk/configurations/jira.py +103 -0
  5. alita_sdk/configurations/testrail.py +88 -0
  6. alita_sdk/configurations/xray.py +93 -0
  7. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  8. alita_sdk/configurations/zephyr_essential.py +75 -0
  9. alita_sdk/runtime/clients/client.py +3 -2
  10. alita_sdk/runtime/langchain/assistant.py +29 -5
  11. alita_sdk/runtime/langchain/constants.py +2 -0
  12. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  13. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  14. alita_sdk/runtime/langchain/document_loaders/constants.py +8 -8
  15. alita_sdk/runtime/langchain/langraph_agent.py +46 -24
  16. alita_sdk/runtime/langchain/utils.py +11 -4
  17. alita_sdk/runtime/toolkits/application.py +8 -1
  18. alita_sdk/runtime/toolkits/tools.py +72 -62
  19. alita_sdk/runtime/tools/application.py +7 -0
  20. alita_sdk/runtime/tools/function.py +11 -4
  21. alita_sdk/runtime/tools/llm.py +142 -116
  22. alita_sdk/runtime/tools/sandbox.py +15 -31
  23. alita_sdk/tools/__init__.py +41 -31
  24. alita_sdk/tools/base_indexer_toolkit.py +27 -2
  25. alita_sdk/tools/code_indexer_toolkit.py +13 -3
  26. alita_sdk/tools/confluence/loader.py +10 -0
  27. alita_sdk/tools/gitlab/api_wrapper.py +8 -9
  28. alita_sdk/tools/jira/api_wrapper.py +1 -1
  29. alita_sdk/tools/qtest/api_wrapper.py +7 -10
  30. alita_sdk/tools/sharepoint/api_wrapper.py +81 -28
  31. alita_sdk/tools/sharepoint/authorization_helper.py +131 -1
  32. alita_sdk/tools/sharepoint/utils.py +8 -2
  33. alita_sdk/tools/utils/content_parser.py +27 -16
  34. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +10 -2
  35. {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/METADATA +1 -1
  36. {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/RECORD +39 -39
  37. {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/WHEEL +0 -0
  38. {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/licenses/LICENSE +0 -0
  39. {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from langchain_core.runnables import RunnableConfig
7
7
  from langchain_core.tools import BaseTool, ToolException
8
8
  from pydantic import Field
9
9
 
10
+ from ..langchain.constants import ELITEA_RS
10
11
  from ..langchain.utils import create_pydantic_model, propagate_the_input_mapping
11
12
 
12
13
  logger = logging.getLogger(__name__)
@@ -30,6 +31,7 @@ class LLMNode(BaseTool):
30
31
  structured_output: Optional[bool] = Field(default=False, description='Whether to use structured output')
31
32
  available_tools: Optional[List[BaseTool]] = Field(default=None, description='Available tools for binding')
32
33
  tool_names: Optional[List[str]] = Field(default=None, description='Specific tool names to filter')
34
+ steps_limit: Optional[int] = Field(default=25, description='Maximum steps for tool execution')
33
35
 
34
36
  def get_filtered_tools(self) -> List[BaseTool]:
35
37
  """
@@ -88,8 +90,7 @@ class LLMNode(BaseTool):
88
90
  raise ToolException(f"LLMNode requires 'system' and 'task' parameters in input mapping. "
89
91
  f"Actual params: {func_args}")
90
92
  # cast to str in case user passes variable different from str
91
- messages = [SystemMessage(content=str(func_args.get('system'))), HumanMessage(content=str(func_args.get('task')))]
92
- messages.extend(func_args.get('chat_history', []))
93
+ messages = [SystemMessage(content=str(func_args.get('system'))), *func_args.get('chat_history', []), HumanMessage(content=str(func_args.get('task')))]
93
94
  else:
94
95
  # Flow for chat-based LLM node w/o prompt/task from pipeline but with messages in state
95
96
  # verify messages structure
@@ -122,14 +123,25 @@ class LLMNode(BaseTool):
122
123
  }
123
124
  for key, value in (self.structured_output_dict or {}).items()
124
125
  }
126
+ # Add default output field for proper response to user
127
+ struct_params['elitea_response'] = {'description': 'final output to user', 'type': 'str'}
125
128
  struct_model = create_pydantic_model(f"LLMOutput", struct_params)
126
- llm = llm_client.with_structured_output(struct_model)
127
- completion = llm.invoke(messages, config=config)
128
- result = completion.model_dump()
129
+ completion = llm_client.invoke(messages, config=config)
130
+ if hasattr(completion, 'tool_calls') and completion.tool_calls:
131
+ new_messages, _ = self.__perform_tool_calling(completion, messages, llm_client, config)
132
+ llm = self.__get_struct_output_model(llm_client, struct_model)
133
+ completion = llm.invoke(new_messages, config=config)
134
+ result = completion.model_dump()
135
+ else:
136
+ llm = self.__get_struct_output_model(llm_client, struct_model)
137
+ completion = llm.invoke(messages, config=config)
138
+ result = completion.model_dump()
129
139
 
130
140
  # Ensure messages are properly formatted
131
141
  if result.get('messages') and isinstance(result['messages'], list):
132
142
  result['messages'] = [{'role': 'assistant', 'content': '\n'.join(result['messages'])}]
143
+ else:
144
+ result['messages'] = messages + [AIMessage(content=result.get(ELITEA_RS, ''))]
133
145
 
134
146
  return result
135
147
  else:
@@ -139,117 +151,15 @@ class LLMNode(BaseTool):
139
151
  # Handle both tool-calling and regular responses
140
152
  if hasattr(completion, 'tool_calls') and completion.tool_calls:
141
153
  # Handle iterative tool-calling and execution
142
- new_messages = messages + [completion]
143
- max_iterations = 15
144
- iteration = 0
145
-
146
- # Continue executing tools until no more tool calls or max iterations reached
147
- current_completion = completion
148
- while (hasattr(current_completion, 'tool_calls') and
149
- current_completion.tool_calls and
150
- iteration < max_iterations):
151
-
152
- iteration += 1
153
- logger.info(f"Tool execution iteration {iteration}/{max_iterations}")
154
-
155
- # Execute each tool call in the current completion
156
- tool_calls = current_completion.tool_calls if hasattr(current_completion.tool_calls,
157
- '__iter__') else []
158
-
159
- for tool_call in tool_calls:
160
- tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call,
161
- 'name',
162
- '')
163
- tool_args = tool_call.get('args', {}) if isinstance(tool_call, dict) else getattr(tool_call,
164
- 'args',
165
- {})
166
- tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(
167
- tool_call, 'id', '')
168
-
169
- # Find the tool in filtered tools
170
- filtered_tools = self.get_filtered_tools()
171
- tool_to_execute = None
172
- for tool in filtered_tools:
173
- if tool.name == tool_name:
174
- tool_to_execute = tool
175
- break
176
-
177
- if tool_to_execute:
178
- try:
179
- logger.info(f"Executing tool '{tool_name}' with args: {tool_args}")
180
- # Pass the underlying config to the tool execution invoke method
181
- # since it may be another agent, graph, etc. to see it properly in thinking steps
182
- tool_result = tool_to_execute.invoke(tool_args, config=config)
183
-
184
- # Create tool message with result - preserve structured content
185
- from langchain_core.messages import ToolMessage
186
-
187
- # Check if tool_result is structured content (list of dicts)
188
- # TODO: need solid check for being compatible with ToolMessage content format
189
- if isinstance(tool_result, list) and all(
190
- isinstance(item, dict) and 'type' in item for item in tool_result
191
- ):
192
- # Use structured content directly for multimodal support
193
- tool_message = ToolMessage(
194
- content=tool_result,
195
- tool_call_id=tool_call_id
196
- )
197
- else:
198
- # Fallback to string conversion for other tool results
199
- tool_message = ToolMessage(
200
- content=str(tool_result),
201
- tool_call_id=tool_call_id
202
- )
203
- new_messages.append(tool_message)
204
-
205
- except Exception as e:
206
- logger.error(f"Error executing tool '{tool_name}': {e}")
207
- # Create error tool message
208
- from langchain_core.messages import ToolMessage
209
- tool_message = ToolMessage(
210
- content=f"Error executing {tool_name}: {str(e)}",
211
- tool_call_id=tool_call_id
212
- )
213
- new_messages.append(tool_message)
214
- else:
215
- logger.warning(f"Tool '{tool_name}' not found in available tools")
216
- # Create error tool message for missing tool
217
- from langchain_core.messages import ToolMessage
218
- tool_message = ToolMessage(
219
- content=f"Tool '{tool_name}' not available",
220
- tool_call_id=tool_call_id
221
- )
222
- new_messages.append(tool_message)
223
-
224
- # Call LLM again with tool results to get next response
225
- try:
226
- current_completion = llm_client.invoke(new_messages, config=config)
227
- new_messages.append(current_completion)
228
-
229
- # Check if we still have tool calls
230
- if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
231
- logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls")
232
- else:
233
- logger.info("LLM completed without requesting more tools")
234
- break
235
-
236
- except Exception as e:
237
- logger.error(f"Error in LLM call during iteration {iteration}: {e}")
238
- # Add error message and break the loop
239
- error_msg = f"Error processing tool results in iteration {iteration}: {str(e)}"
240
- new_messages.append(AIMessage(content=error_msg))
241
- break
242
-
243
- # Log completion status
244
- if iteration >= max_iterations:
245
- logger.warning(f"Reached maximum iterations ({max_iterations}) for tool execution")
246
- # Add a warning message to the chat
247
- warning_msg = f"Maximum tool execution iterations ({max_iterations}) reached. Stopping tool execution."
248
- new_messages.append(AIMessage(content=warning_msg))
249
- else:
250
- logger.info(f"Tool execution completed after {iteration} iterations")
154
+ new_messages, current_completion = self.__perform_tool_calling(completion, messages, llm_client, config)
251
155
 
252
- return {"messages": new_messages}
156
+ output_msgs = {"messages": new_messages}
157
+ if self.output_variables:
158
+ if self.output_variables[0] == 'messages':
159
+ return output_msgs
160
+ output_msgs[self.output_variables[0]] = current_completion.content if current_completion else None
161
+
162
+ return output_msgs
253
163
  else:
254
164
  # Regular text response
255
165
  content = completion.content.strip() if hasattr(completion, 'content') else str(completion)
@@ -275,4 +185,120 @@ class LLMNode(BaseTool):
275
185
 
276
186
  def _run(self, *args, **kwargs):
277
187
  # Legacy support for old interface
278
- return self.invoke(kwargs, **kwargs)
188
+ return self.invoke(kwargs, **kwargs)
189
+
190
+ def __perform_tool_calling(self, completion, messages, llm_client, config):
191
+ # Handle iterative tool-calling and execution
192
+ new_messages = messages + [completion]
193
+ iteration = 0
194
+
195
+ # Continue executing tools until no more tool calls or max iterations reached
196
+ current_completion = completion
197
+ while (hasattr(current_completion, 'tool_calls') and
198
+ current_completion.tool_calls and
199
+ iteration < self.steps_limit):
200
+
201
+ iteration += 1
202
+ logger.info(f"Tool execution iteration {iteration}/{self.steps_limit}")
203
+
204
+ # Execute each tool call in the current completion
205
+ tool_calls = current_completion.tool_calls if hasattr(current_completion.tool_calls,
206
+ '__iter__') else []
207
+
208
+ for tool_call in tool_calls:
209
+ tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call,
210
+ 'name',
211
+ '')
212
+ tool_args = tool_call.get('args', {}) if isinstance(tool_call, dict) else getattr(tool_call,
213
+ 'args',
214
+ {})
215
+ tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(
216
+ tool_call, 'id', '')
217
+
218
+ # Find the tool in filtered tools
219
+ filtered_tools = self.get_filtered_tools()
220
+ tool_to_execute = None
221
+ for tool in filtered_tools:
222
+ if tool.name == tool_name:
223
+ tool_to_execute = tool
224
+ break
225
+
226
+ if tool_to_execute:
227
+ try:
228
+ logger.info(f"Executing tool '{tool_name}' with args: {tool_args}")
229
+ # Pass the underlying config to the tool execution invoke method
230
+ # since it may be another agent, graph, etc. to see it properly in thinking steps
231
+ tool_result = tool_to_execute.invoke(tool_args, config=config)
232
+
233
+ # Create tool message with result - preserve structured content
234
+ from langchain_core.messages import ToolMessage
235
+
236
+ # Check if tool_result is structured content (list of dicts)
237
+ # TODO: need solid check for being compatible with ToolMessage content format
238
+ if isinstance(tool_result, list) and all(
239
+ isinstance(item, dict) and 'type' in item for item in tool_result
240
+ ):
241
+ # Use structured content directly for multimodal support
242
+ tool_message = ToolMessage(
243
+ content=tool_result,
244
+ tool_call_id=tool_call_id
245
+ )
246
+ else:
247
+ # Fallback to string conversion for other tool results
248
+ tool_message = ToolMessage(
249
+ content=str(tool_result),
250
+ tool_call_id=tool_call_id
251
+ )
252
+ new_messages.append(tool_message)
253
+
254
+ except Exception as e:
255
+ logger.error(f"Error executing tool '{tool_name}': {e}")
256
+ # Create error tool message
257
+ from langchain_core.messages import ToolMessage
258
+ tool_message = ToolMessage(
259
+ content=f"Error executing {tool_name}: {str(e)}",
260
+ tool_call_id=tool_call_id
261
+ )
262
+ new_messages.append(tool_message)
263
+ else:
264
+ logger.warning(f"Tool '{tool_name}' not found in available tools")
265
+ # Create error tool message for missing tool
266
+ from langchain_core.messages import ToolMessage
267
+ tool_message = ToolMessage(
268
+ content=f"Tool '{tool_name}' not available",
269
+ tool_call_id=tool_call_id
270
+ )
271
+ new_messages.append(tool_message)
272
+
273
+ # Call LLM again with tool results to get next response
274
+ try:
275
+ current_completion = llm_client.invoke(new_messages, config=config)
276
+ new_messages.append(current_completion)
277
+
278
+ # Check if we still have tool calls
279
+ if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
280
+ logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls")
281
+ else:
282
+ logger.info("LLM completed without requesting more tools")
283
+ break
284
+
285
+ except Exception as e:
286
+ logger.error(f"Error in LLM call during iteration {iteration}: {e}")
287
+ # Add error message and break the loop
288
+ error_msg = f"Error processing tool results in iteration {iteration}: {str(e)}"
289
+ new_messages.append(AIMessage(content=error_msg))
290
+ break
291
+
292
+ # Log completion status
293
+ if iteration >= self.steps_limit:
294
+ logger.warning(f"Reached maximum iterations ({self.steps_limit}) for tool execution")
295
+ # Add a warning message to the chat
296
+ warning_msg = f"Maximum tool execution iterations ({self.steps_limit}) reached. Stopping tool execution."
297
+ new_messages.append(AIMessage(content=warning_msg))
298
+ else:
299
+ logger.info(f"Tool execution completed after {iteration} iterations")
300
+
301
+ return new_messages, current_completion
302
+
303
+ def __get_struct_output_model(self, llm_client, pydantic_model):
304
+ return llm_client.with_structured_output(pydantic_model)
@@ -64,36 +64,10 @@ def _is_deno_available() -> bool:
64
64
 
65
65
 
66
66
  def _setup_pyodide_cache_env() -> None:
67
- """Setup Pyodide caching environment variables for performance optimization"""
67
+ """Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
68
68
  try:
69
- # Check if cache environment file exists and source it
70
- cache_env_file = os.path.expanduser("~/.pyodide_cache_env")
71
- if os.path.exists(cache_env_file):
72
- with open(cache_env_file, 'r') as f:
73
- for line in f:
74
- line = line.strip()
75
- if line.startswith('export ') and '=' in line:
76
- # Parse export VAR=value format
77
- var_assignment = line[7:] # Remove 'export '
78
- if '=' in var_assignment:
79
- key, value = var_assignment.split('=', 1)
80
- # Remove quotes if present
81
- value = value.strip('"').strip("'")
82
- os.environ[key] = value
83
- logger.debug(f"Set Pyodide cache env: {key}={value}")
84
-
85
- # Set default caching environment variables if not already set
86
- cache_defaults = {
87
- 'PYODIDE_PACKAGES_PATH': os.path.expanduser('~/.cache/pyodide'),
88
- 'DENO_DIR': os.path.expanduser('~/.cache/deno'),
89
- 'PYODIDE_CACHE_DIR': os.path.expanduser('~/.cache/pyodide'),
90
- }
91
-
92
- for key, default_value in cache_defaults.items():
93
- if key not in os.environ:
94
- os.environ[key] = default_value
95
- logger.debug(f"Set default Pyodide env: {key}={default_value}")
96
-
69
+ for key in ["SANDBOX_BASE", "DENO_DIR"]:
70
+ logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
97
71
  except Exception as e:
98
72
  logger.warning(f"Could not setup Pyodide cache environment: {e}")
99
73
 
@@ -142,7 +116,7 @@ class PyodideSandboxTool(BaseTool):
142
116
  def _prepare_pyodide_input(self, code: str) -> str:
143
117
  """Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
144
118
  pyodide_predata = ""
145
-
119
+
146
120
  # Add alita_client if available
147
121
  if self.alita_client:
148
122
  try:
@@ -158,7 +132,7 @@ class PyodideSandboxTool(BaseTool):
158
132
  f"auth_token='{self.alita_client.auth_token}')\n")
159
133
  except FileNotFoundError:
160
134
  logger.error(f"sandbox_client.py not found. Ensure the file exists.")
161
-
135
+
162
136
  return f"#elitea simplified client\n{pyodide_predata}{code}"
163
137
 
164
138
  def _initialize_sandbox(self) -> None:
@@ -175,9 +149,19 @@ class PyodideSandboxTool(BaseTool):
175
149
 
176
150
  from langchain_sandbox import PyodideSandbox
177
151
 
152
+ # Air-gapped settings
153
+ sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
154
+ sandbox_tmp = os.path.join(sandbox_base, "tmp")
155
+ deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
156
+
178
157
  # Configure sandbox with performance optimizations
179
158
  self._sandbox = PyodideSandbox(
180
159
  stateful=self.stateful,
160
+ #
161
+ allow_env=["SANDBOX_BASE"],
162
+ allow_read=[sandbox_base, sandbox_tmp, deno_cache],
163
+ allow_write=[sandbox_tmp, deno_cache],
164
+ #
181
165
  allow_net=self.allow_net,
182
166
  # Use auto node_modules_dir for better caching
183
167
  node_modules_dir="auto"
@@ -90,64 +90,74 @@ available_count = len(AVAILABLE_TOOLS)
90
90
  total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
91
91
  logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
92
92
 
93
+
93
94
  def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
94
95
  tools = []
96
+
95
97
  for tool in tools_list:
96
- # validate tool name syntax - it cannot be started with _
97
- for tool_name in tool.get('settings', {}).get('selected_tools', []):
98
- if isinstance(tool_name, str) and tool_name.startswith('_'):
99
- raise ValueError(f"Tool name '{tool_name}' from toolkit '{tool.get('type', '')}' cannot start with '_'")
100
- if not tool.get('settings'):
98
+ settings = tool.get('settings')
99
+
100
+ # Skip tools without settings early
101
+ if not settings:
101
102
  logger.warning(f"Tool '{tool.get('type', '')}' has no settings, skipping...")
102
103
  continue
103
- tool['settings']['alita'] = alita
104
- tool['settings']['llm'] = llm
105
- tool['settings']['store'] = store
104
+
105
+ # Validate tool names once
106
+ selected_tools = settings.get('selected_tools', [])
107
+ invalid_tools = [name for name in selected_tools if isinstance(name, str) and name.startswith('_')]
108
+ if invalid_tools:
109
+ raise ValueError(f"Tool names {invalid_tools} from toolkit '{tool.get('type', '')}' cannot start with '_'")
110
+
111
+ # Cache tool type and add common settings
106
112
  tool_type = tool['type']
113
+ settings['alita'] = alita
114
+ settings['llm'] = llm
115
+ settings['store'] = store
107
116
 
108
- # Handle special cases for ADO tools
117
+ # Set pgvector collection schema if present
118
+ if settings.get('pgvector_configuration'):
119
+ settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
120
+
121
+ # Handle ADO special cases
109
122
  if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
110
123
  tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
124
+ continue
111
125
 
112
- # Check if tool is available and has get_tools function
113
- elif tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
126
+ # Handle ADO repos aliases
127
+ if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
114
128
  try:
115
- get_tools_func = AVAILABLE_TOOLS[tool_type]['get_tools']
116
- tools.extend(get_tools_func(tool))
117
-
129
+ tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
118
130
  except Exception as e:
119
- logger.error(f"Error getting tools for {tool_type}: {e}")
120
- raise ToolException(f"Error getting tools for {tool_type}: {e}")
131
+ logger.error(f"Error getting ADO repos tools: {e}")
132
+ continue
121
133
 
122
- # Handle ADO repos special case (it might be requested as azure_devops_repos)
123
- elif tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
134
+ # Handle standard tools
135
+ if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
124
136
  try:
125
- get_tools_func = AVAILABLE_TOOLS['ado_repos']['get_tools']
126
- tools.extend(get_tools_func(tool))
137
+ tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
127
138
  except Exception as e:
128
- logger.error(f"Error getting ADO repos tools: {e}")
139
+ logger.error(f"Error getting tools for {tool_type}: {e}")
140
+ raise ToolException(f"Error getting tools for {tool_type}: {e}")
141
+ continue
129
142
 
130
143
  # Handle custom modules
131
- elif tool.get("settings", {}).get("module"):
144
+ if settings.get("module"):
132
145
  try:
133
- settings = tool.get("settings", {})
134
146
  mod = import_module(settings.pop("module"))
135
147
  tkitclass = getattr(mod, settings.pop("class"))
136
- #
137
- get_toolkit_params = tool["settings"].copy()
148
+ get_toolkit_params = settings.copy()
138
149
  get_toolkit_params["name"] = tool.get("name")
139
- #
140
150
  toolkit = tkitclass.get_toolkit(**get_toolkit_params)
141
151
  tools.extend(toolkit.get_tools())
142
152
  except Exception as e:
143
153
  logger.error(f"Error in getting custom toolkit: {e}")
154
+ continue
144
155
 
156
+ # Tool not available
157
+ if tool_type in FAILED_IMPORTS:
158
+ logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
145
159
  else:
146
- # Tool not available or not found
147
- if tool_type in FAILED_IMPORTS:
148
- logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
149
- else:
150
- logger.warning(f"Unknown tool type: {tool_type}")
160
+ logger.warning(f"Unknown tool type: {tool_type}")
151
161
 
152
162
  return tools
153
163
 
@@ -110,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
110
110
  def __init__(self, **kwargs):
111
111
  conn = kwargs.get('connection_string', None)
112
112
  connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
113
- collection_name = kwargs.get('collection_name')
113
+ collection_name = kwargs.get('collection_schema')
114
114
 
115
115
  if 'vectorstore_type' not in kwargs:
116
116
  kwargs['vectorstore_type'] = 'PGVector'
@@ -160,6 +160,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
160
160
  if clean_index:
161
161
  self._clean_index(index_name)
162
162
  #
163
+ self.index_meta_init(index_name, kwargs)
164
+ #
163
165
  self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
164
166
  self._log_tool_event(f"Loading the documents to index...{kwargs}")
165
167
  documents = self._base_loader(**kwargs)
@@ -179,7 +181,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
179
181
  return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
180
182
  else "no new documents to index"}
181
183
  except Exception as e:
182
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, results_count)
184
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
183
185
  raise e
184
186
 
185
187
 
@@ -454,6 +456,29 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
454
456
  reranking_config=reranking_config,
455
457
  extended_search=extended_search
456
458
  )
459
+
460
+ def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
461
+ index_meta = super().get_index_meta(index_name)
462
+ if not index_meta:
463
+ self._log_tool_event(
464
+ f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
465
+ tool_name="index_data"
466
+ )
467
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
468
+ created_on = time.time()
469
+ metadata = {
470
+ "collection": index_name,
471
+ "type": IndexerKeywords.INDEX_META_TYPE.value,
472
+ "indexed": 0,
473
+ "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
474
+ "index_configuration": index_configuration,
475
+ "created_on": created_on,
476
+ "updated_on": created_on,
477
+ "history": "[]",
478
+ "task_id": None,
479
+ }
480
+ index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
481
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
457
482
 
458
483
  def index_meta_update(self, index_name: str, state: str, result: int):
459
484
  index_meta_raw = super().get_index_meta(index_name)
@@ -1,5 +1,6 @@
1
1
  import ast
2
2
  import fnmatch
3
+ import json
3
4
  import logging
4
5
  from typing import Optional, List, Generator
5
6
 
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
21
22
  return self.vector_adapter.get_code_indexed_data(self, index_name)
22
23
 
23
24
  def key_fn(self, document: Document):
24
- return document.metadata.get('id')
25
+ return document.metadata.get("filename")
25
26
 
26
27
  def compare_fn(self, document: Document, idx_data):
27
28
  return (document.metadata.get('commit_hash') and
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
46
47
  )
47
48
 
48
49
  def _extend_data(self, documents: Generator[Document, None, None]):
49
- yield from parse_code_files_for_db(documents)
50
+ yield from documents
50
51
 
51
52
  def _index_tool_params(self):
52
53
  """Return the parameters for indexing data."""
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
117
118
  if not file_content:
118
119
  # empty file, skip
119
120
  continue
121
+ #
122
+ # ensure file content is a string
123
+ if isinstance(file_content, bytes):
124
+ file_content = file_content.decode("utf-8", errors="ignore")
125
+ elif isinstance(file_content, dict) and file.endswith('.json'):
126
+ file_content = json.dumps(file_content)
127
+ elif not isinstance(file_content, str):
128
+ file_content = str(file_content)
129
+ #
120
130
  # hash the file content to ensure uniqueness
121
131
  import hashlib
122
132
  file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
127
137
  self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
128
138
  self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
129
139
 
130
- return file_content_generator()
140
+ return parse_code_files_for_db(file_content_generator())
131
141
 
132
142
  def __handle_get_files(self, path: str, branch: str):
133
143
  """
@@ -3,6 +3,7 @@ from typing import Optional, List
3
3
  from logging import getLogger
4
4
 
5
5
  import requests
6
+ from langchain_core.documents import Document
6
7
 
7
8
  logger = getLogger(__name__)
8
9
  from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
193
194
  else:
194
195
  return super().process_image(link, ocr_languages)
195
196
 
197
+ def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
198
+ content_format: ContentFormat, ocr_languages: Optional[str] = None,
199
+ keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
200
+ if not page.get("title"):
201
+ # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
202
+ page["title"] = "Untitled"
203
+ return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
204
+ ocr_languages, keep_markdown_format, keep_newlines)
205
+
196
206
  # TODO review usage
197
207
  # def process_svg(
198
208
  # self,
@@ -115,9 +115,8 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
115
115
  """Remove trailing slash from URL if present."""
116
116
  return url.rstrip('/') if url else url
117
117
 
118
- @model_validator(mode='before')
119
- @classmethod
120
- def validate_toolkit(cls, values: Dict) -> Dict:
118
+ @model_validator(mode='after')
119
+ def validate_toolkit(self):
121
120
  try:
122
121
  import gitlab
123
122
  except ImportError:
@@ -125,17 +124,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
125
124
  "python-gitlab is not installed. "
126
125
  "Please install it with `pip install python-gitlab`"
127
126
  )
128
- values['repository'] = cls._sanitize_url(values['repository'])
127
+ self.repository = self._sanitize_url(self.repository)
129
128
  g = gitlab.Gitlab(
130
- url=cls._sanitize_url(values['url']),
131
- private_token=values['private_token'],
129
+ url=self._sanitize_url(self.url),
130
+ private_token=self.private_token.get_secret_value(),
132
131
  keep_base_url=True,
133
132
  )
134
133
 
135
134
  g.auth()
136
- cls._git = g
137
- cls._active_branch = values.get('branch')
138
- return super().validate_toolkit(values)
135
+ self._git = g
136
+ self._active_branch = self.branch
137
+ return self
139
138
 
140
139
  @property
141
140
  def repo_instance(self):