alita-sdk 0.3.457__py3-none-any.whl → 0.3.486__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (102) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +194 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3592 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1256 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1665 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +64 -8
  30. alita_sdk/community/inventory/__init__.py +224 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +169 -0
  57. alita_sdk/community/inventory/visualize.py +1370 -0
  58. alita_sdk/configurations/bitbucket.py +0 -3
  59. alita_sdk/runtime/clients/client.py +99 -26
  60. alita_sdk/runtime/langchain/assistant.py +4 -2
  61. alita_sdk/runtime/langchain/constants.py +2 -1
  62. alita_sdk/runtime/langchain/langraph_agent.py +134 -31
  63. alita_sdk/runtime/langchain/utils.py +1 -1
  64. alita_sdk/runtime/llms/preloaded.py +2 -6
  65. alita_sdk/runtime/toolkits/__init__.py +2 -0
  66. alita_sdk/runtime/toolkits/application.py +1 -1
  67. alita_sdk/runtime/toolkits/mcp.py +46 -36
  68. alita_sdk/runtime/toolkits/planning.py +171 -0
  69. alita_sdk/runtime/toolkits/tools.py +39 -6
  70. alita_sdk/runtime/tools/function.py +17 -5
  71. alita_sdk/runtime/tools/llm.py +249 -14
  72. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  73. alita_sdk/runtime/tools/planning/models.py +246 -0
  74. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  75. alita_sdk/runtime/tools/vectorstore_base.py +41 -6
  76. alita_sdk/runtime/utils/mcp_oauth.py +80 -0
  77. alita_sdk/runtime/utils/streamlit.py +6 -10
  78. alita_sdk/runtime/utils/toolkit_utils.py +19 -4
  79. alita_sdk/tools/__init__.py +54 -27
  80. alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
  81. alita_sdk/tools/base_indexer_toolkit.py +150 -19
  82. alita_sdk/tools/bitbucket/__init__.py +2 -2
  83. alita_sdk/tools/chunkers/__init__.py +3 -1
  84. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +95 -6
  85. alita_sdk/tools/chunkers/universal_chunker.py +269 -0
  86. alita_sdk/tools/code_indexer_toolkit.py +55 -22
  87. alita_sdk/tools/elitea_base.py +86 -21
  88. alita_sdk/tools/jira/__init__.py +1 -1
  89. alita_sdk/tools/jira/api_wrapper.py +91 -40
  90. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  91. alita_sdk/tools/qtest/__init__.py +1 -1
  92. alita_sdk/tools/qtest/api_wrapper.py +871 -32
  93. alita_sdk/tools/sharepoint/api_wrapper.py +22 -2
  94. alita_sdk/tools/sharepoint/authorization_helper.py +17 -1
  95. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +8 -2
  96. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  97. {alita_sdk-0.3.457.dist-info → alita_sdk-0.3.486.dist-info}/METADATA +146 -2
  98. {alita_sdk-0.3.457.dist-info → alita_sdk-0.3.486.dist-info}/RECORD +102 -40
  99. alita_sdk-0.3.486.dist-info/entry_points.txt +2 -0
  100. {alita_sdk-0.3.457.dist-info → alita_sdk-0.3.486.dist-info}/WHEEL +0 -0
  101. {alita_sdk-0.3.457.dist-info → alita_sdk-0.3.486.dist-info}/licenses/LICENSE +0 -0
  102. {alita_sdk-0.3.457.dist-info → alita_sdk-0.3.486.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,269 @@
1
+ """
2
+ Universal Chunker - Routes documents to appropriate chunkers based on file type.
3
+
4
+ This module provides a universal chunking interface that automatically selects
5
+ the appropriate chunking strategy based on the file extension:
6
+
7
+ - .md, .markdown → Markdown chunker (header-based splitting)
8
+ - .py, .js, .ts, .java, etc. → TreeSitter code chunker
9
+ - .json → JSON chunker
10
+ - other → Default text chunker
11
+
12
+ Usage:
13
+ from alita_sdk.tools.chunkers.universal_chunker import universal_chunker
14
+
15
+ # Chunk documents from a loader
16
+ for chunk in universal_chunker(document_generator, config):
17
+ print(chunk.page_content)
18
+ """
19
+
20
+ import logging
21
+ import os
22
+ from typing import Generator, Dict, Any, Optional
23
+ from langchain_core.documents import Document
24
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
25
+
26
+ from .code.codeparser import parse_code_files_for_db
27
+ from .sematic.markdown_chunker import markdown_chunker
28
+ from .sematic.json_chunker import json_chunker
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ # File extension mappings
34
+ MARKDOWN_EXTENSIONS = {'.md', '.markdown', '.mdown', '.mkd', '.mdx'}
35
+ JSON_EXTENSIONS = {'.json', '.jsonl', '.jsonc'}
36
+ CODE_EXTENSIONS = {
37
+ '.py', '.js', '.jsx', '.mjs', '.cjs', '.ts', '.tsx',
38
+ '.java', '.kt', '.rs', '.go', '.cpp', '.c', '.cs',
39
+ '.hs', '.rb', '.scala', '.lua'
40
+ }
41
+
42
+
43
+ def get_file_extension(file_path: str) -> str:
44
+ """Extract file extension from path."""
45
+ return os.path.splitext(file_path)[-1].lower()
46
+
47
+
48
+ def get_file_type(file_path: str) -> str:
49
+ """
50
+ Determine the file type category for chunking.
51
+
52
+ Returns:
53
+ 'markdown', 'json', 'code', or 'text'
54
+ """
55
+ ext = get_file_extension(file_path)
56
+
57
+ if ext in MARKDOWN_EXTENSIONS:
58
+ return 'markdown'
59
+ elif ext in JSON_EXTENSIONS:
60
+ return 'json'
61
+ elif ext in CODE_EXTENSIONS:
62
+ return 'code'
63
+ else:
64
+ return 'text'
65
+
66
+
67
+ def _default_text_chunker(
68
+ documents: Generator[Document, None, None],
69
+ config: Dict[str, Any]
70
+ ) -> Generator[Document, None, None]:
71
+ """
72
+ Default text chunker for unknown file types.
73
+ Uses recursive character splitting.
74
+ """
75
+ chunk_size = config.get('chunk_size', 1000)
76
+ chunk_overlap = config.get('chunk_overlap', 100)
77
+
78
+ splitter = RecursiveCharacterTextSplitter(
79
+ chunk_size=chunk_size,
80
+ chunk_overlap=chunk_overlap,
81
+ length_function=len,
82
+ )
83
+
84
+ for doc in documents:
85
+ chunks = splitter.split_documents([doc])
86
+ for idx, chunk in enumerate(chunks, 1):
87
+ chunk.metadata['chunk_id'] = idx
88
+ chunk.metadata['chunk_type'] = 'text'
89
+ yield chunk
90
+
91
+
92
+ def _code_chunker_from_documents(
93
+ documents: Generator[Document, None, None],
94
+ config: Dict[str, Any]
95
+ ) -> Generator[Document, None, None]:
96
+ """
97
+ Adapter to convert Document generator to code parser format.
98
+ """
99
+ def file_content_generator():
100
+ for doc in documents:
101
+ yield {
102
+ 'file_name': doc.metadata.get('file_path', doc.metadata.get('filename', 'unknown')),
103
+ 'file_content': doc.page_content,
104
+ 'commit_hash': doc.metadata.get('commit_hash', ''),
105
+ }
106
+
107
+ # parse_code_files_for_db returns chunks with proper metadata
108
+ for chunk in parse_code_files_for_db(file_content_generator()):
109
+ # Ensure file_path is preserved
110
+ if 'file_path' not in chunk.metadata and 'filename' in chunk.metadata:
111
+ chunk.metadata['file_path'] = chunk.metadata['filename']
112
+ yield chunk
113
+
114
+
115
+ def universal_chunker(
116
+ documents: Generator[Document, None, None],
117
+ config: Optional[Dict[str, Any]] = None
118
+ ) -> Generator[Document, None, None]:
119
+ """
120
+ Universal chunker that routes documents to appropriate chunkers based on file type.
121
+
122
+ Each document is inspected for its file extension (from metadata.file_path or
123
+ metadata.file_name) and routed to the appropriate chunker:
124
+
125
+ - Markdown files → markdown_chunker (header-based splitting)
126
+ - JSON files → json_chunker (recursive JSON splitting)
127
+ - Code files → code parser (TreeSitter-based parsing)
128
+ - Other files → default text chunker (recursive character splitting)
129
+
130
+ Args:
131
+ documents: Generator yielding Document objects with file content
132
+ config: Optional configuration dict with:
133
+ - markdown_config: Config for markdown chunker
134
+ - json_config: Config for JSON chunker
135
+ - code_config: Config for code chunker
136
+ - text_config: Config for default text chunker
137
+
138
+ Yields:
139
+ Document objects with chunked content and preserved metadata
140
+ """
141
+ if config is None:
142
+ config = {}
143
+
144
+ # Default configs for each chunker type
145
+ markdown_config = config.get('markdown_config', {
146
+ 'strip_header': False,
147
+ 'return_each_line': False,
148
+ 'headers_to_split_on': [
149
+ ('#', 'Header 1'),
150
+ ('##', 'Header 2'),
151
+ ('###', 'Header 3'),
152
+ ('####', 'Header 4'),
153
+ ],
154
+ 'max_tokens': 1024,
155
+ 'token_overlap': 50,
156
+ 'min_chunk_chars': 100, # Merge chunks smaller than this
157
+ })
158
+
159
+ json_config = config.get('json_config', {
160
+ 'max_tokens': 512,
161
+ })
162
+
163
+ code_config = config.get('code_config', {})
164
+
165
+ text_config = config.get('text_config', {
166
+ 'chunk_size': 1000,
167
+ 'chunk_overlap': 100,
168
+ })
169
+
170
+ # Buffer documents by type for batch processing
171
+ # This is more efficient than processing one at a time
172
+ markdown_docs = []
173
+ json_docs = []
174
+ code_docs = []
175
+ text_docs = []
176
+
177
+ # Buffer size before flushing
178
+ BUFFER_SIZE = 10
179
+
180
+ def flush_markdown():
181
+ if markdown_docs:
182
+ def gen():
183
+ for d in markdown_docs:
184
+ yield d
185
+ for chunk in markdown_chunker(gen(), markdown_config):
186
+ yield chunk
187
+ markdown_docs.clear()
188
+
189
+ def flush_json():
190
+ if json_docs:
191
+ def gen():
192
+ for d in json_docs:
193
+ yield d
194
+ for chunk in json_chunker(gen(), json_config):
195
+ yield chunk
196
+ json_docs.clear()
197
+
198
+ def flush_code():
199
+ if code_docs:
200
+ def gen():
201
+ for d in code_docs:
202
+ yield d
203
+ for chunk in _code_chunker_from_documents(gen(), code_config):
204
+ yield chunk
205
+ code_docs.clear()
206
+
207
+ def flush_text():
208
+ if text_docs:
209
+ def gen():
210
+ for d in text_docs:
211
+ yield d
212
+ for chunk in _default_text_chunker(gen(), text_config):
213
+ yield chunk
214
+ text_docs.clear()
215
+
216
+ for doc in documents:
217
+ # Get file path from metadata
218
+ file_path = (doc.metadata.get('file_path') or
219
+ doc.metadata.get('file_name') or
220
+ doc.metadata.get('source') or
221
+ 'unknown')
222
+
223
+ # Ensure file_path is in metadata for downstream use
224
+ doc.metadata['file_path'] = file_path
225
+
226
+ file_type = get_file_type(file_path)
227
+
228
+ if file_type == 'markdown':
229
+ markdown_docs.append(doc)
230
+ if len(markdown_docs) >= BUFFER_SIZE:
231
+ yield from flush_markdown()
232
+ elif file_type == 'json':
233
+ json_docs.append(doc)
234
+ if len(json_docs) >= BUFFER_SIZE:
235
+ yield from flush_json()
236
+ elif file_type == 'code':
237
+ code_docs.append(doc)
238
+ if len(code_docs) >= BUFFER_SIZE:
239
+ yield from flush_code()
240
+ else:
241
+ text_docs.append(doc)
242
+ if len(text_docs) >= BUFFER_SIZE:
243
+ yield from flush_text()
244
+
245
+ # Flush remaining documents
246
+ yield from flush_markdown()
247
+ yield from flush_json()
248
+ yield from flush_code()
249
+ yield from flush_text()
250
+
251
+
252
+ def chunk_single_document(
253
+ doc: Document,
254
+ config: Optional[Dict[str, Any]] = None
255
+ ) -> Generator[Document, None, None]:
256
+ """
257
+ Convenience function to chunk a single document.
258
+
259
+ Args:
260
+ doc: Single Document to chunk
261
+ config: Optional chunker configuration
262
+
263
+ Yields:
264
+ Chunked Document objects
265
+ """
266
+ def single_doc_gen():
267
+ yield doc
268
+
269
+ yield from universal_chunker(single_doc_gen(), config)
@@ -9,13 +9,13 @@ from langchain_core.tools import ToolException
9
9
  from pydantic import Field
10
10
 
11
11
  from alita_sdk.tools.base_indexer_toolkit import BaseIndexerToolkit
12
- from .chunkers.code.codeparser import parse_code_files_for_db
13
12
 
14
13
  logger = logging.getLogger(__name__)
15
14
 
16
15
 
17
16
  class CodeIndexerToolkit(BaseIndexerToolkit):
18
17
  def _get_indexed_data(self, index_name: str):
18
+ self._ensure_vectorstore_initialized()
19
19
  if not self.vector_adapter:
20
20
  raise ToolException("Vector adapter is not initialized. "
21
21
  "Check your configuration: embedding_model and vectorstore_type.")
@@ -66,26 +66,40 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
66
66
  def loader(self,
67
67
  branch: Optional[str] = None,
68
68
  whitelist: Optional[List[str]] = None,
69
- blacklist: Optional[List[str]] = None) -> Generator[Document, None, None]:
69
+ blacklist: Optional[List[str]] = None,
70
+ chunked: bool = True) -> Generator[Document, None, None]:
70
71
  """
71
- Generates file content from a branch, respecting whitelist and blacklist patterns.
72
+ Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
72
73
 
73
74
  Parameters:
74
75
  - branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
75
76
  - whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
76
77
  - blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
78
+ - chunked (bool): If True (default), applies universal chunker based on file type.
79
+ If False, returns raw Documents without chunking.
77
80
 
78
81
  Returns:
79
- - generator: Yields content from files matching the whitelist but not the blacklist.
82
+ - generator: Yields Documents from files matching the whitelist but not the blacklist.
83
+ Each document has exactly the key 'filename' in metadata, which is used as an ID
84
+ for further operations (indexing, deduplication, and retrieval).
80
85
 
81
86
  Example:
82
87
  # Use 'feature-branch', include '.py' files, exclude 'test_' files
83
- file_generator = loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*'])
88
+ for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
89
+ print(doc.page_content)
84
90
 
85
91
  Notes:
86
92
  - Whitelist and blacklist use Unix shell-style wildcards.
87
93
  - Files must match the whitelist and not the blacklist to be included.
94
+ - Each document MUST have exactly the key 'filename' in metadata. This key is used as an ID
95
+ for further operations such as indexing, deduplication, and retrieval.
96
+ - When chunked=True:
97
+ - .md files → markdown chunker (header-based splitting)
98
+ - .py/.js/.ts/etc → code parser (TreeSitter-based)
99
+ - .json files → JSON chunker
100
+ - other files → default text chunker
88
101
  """
102
+ import hashlib
89
103
 
90
104
  _files = self.__handle_get_files("", self.__get_branch(branch))
91
105
  self._log_tool_event(message="Listing files in branch", tool_name="loader")
@@ -103,41 +117,60 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
103
117
  or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
104
118
  return False
105
119
 
106
- def file_content_generator():
120
+ def raw_document_generator() -> Generator[Document, None, None]:
121
+ """Yields raw Documents without chunking."""
107
122
  self._log_tool_event(message="Reading the files", tool_name="loader")
108
- # log the progress of file reading
109
123
  total_files = len(_files)
124
+ processed = 0
125
+
110
126
  for idx, file in enumerate(_files, 1):
111
127
  if is_whitelisted(file) and not is_blacklisted(file):
112
- # read file ONLY if it matches whitelist and does not match blacklist
113
128
  try:
114
129
  file_content = self._read_file(file, self.__get_branch(branch))
115
130
  except Exception as e:
116
131
  logger.error(f"Failed to read file {file}: {e}")
117
- file_content = ""
132
+ continue
133
+
118
134
  if not file_content:
119
- # empty file, skip
120
135
  continue
121
- #
122
- # ensure file content is a string
136
+
137
+ # Ensure file content is a string
123
138
  if isinstance(file_content, bytes):
124
139
  file_content = file_content.decode("utf-8", errors="ignore")
125
140
  elif isinstance(file_content, dict) and file.endswith('.json'):
126
141
  file_content = json.dumps(file_content)
127
142
  elif not isinstance(file_content, str):
128
143
  file_content = str(file_content)
129
- #
130
- # hash the file content to ensure uniqueness
131
- import hashlib
144
+
145
+ # Hash the file content for uniqueness tracking
132
146
  file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
133
- yield {"file_name": file,
134
- "file_content": file_content,
135
- "commit_hash": file_hash}
147
+ processed += 1
148
+
149
+ yield Document(
150
+ page_content=file_content,
151
+ metadata={
152
+ 'file_path': file,
153
+ 'filename': file,
154
+ 'source': file,
155
+ 'commit_hash': file_hash,
156
+ }
157
+ )
158
+
136
159
  if idx % 10 == 0 or idx == total_files:
137
- self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
138
- self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
139
-
140
- return parse_code_files_for_db(file_content_generator())
160
+ self._log_tool_event(
161
+ message=f"{idx} out of {total_files} files checked, {processed} matched",
162
+ tool_name="loader"
163
+ )
164
+
165
+ self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
166
+
167
+ if not chunked:
168
+ # Return raw documents without chunking
169
+ return raw_document_generator()
170
+
171
+ # Apply universal chunker based on file type
172
+ from .chunkers.universal_chunker import universal_chunker
173
+ return universal_chunker(raw_document_generator())
141
174
 
142
175
  def __handle_get_files(self, path: str, branch: str):
143
176
  """
@@ -128,12 +128,37 @@ BaseIndexDataParams = create_model(
128
128
 
129
129
 
130
130
  class BaseToolApiWrapper(BaseModel):
131
-
131
+
132
+ # Optional RunnableConfig for CLI/standalone usage (allows dispatch_custom_event to work)
133
+ _runnable_config: Optional[Dict[str, Any]] = None
134
+ # toolkit id propagated from backend
135
+ toolkit_id: int = 0
132
136
  def get_available_tools(self):
133
137
  raise NotImplementedError("Subclasses should implement this method")
134
138
 
135
- def _log_tool_event(self, message: str, tool_name: str = None):
136
- """Log data and dispatch custom event for the tool"""
139
+ def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
140
+ """
141
+ Set the RunnableConfig for dispatching custom events.
142
+
143
+ This is required when running outside of a LangChain agent context
144
+ (e.g., from CLI). Without a config containing a run_id,
145
+ dispatch_custom_event will fail with "Unable to dispatch an adhoc event
146
+ without a parent run id".
147
+
148
+ Args:
149
+ config: A RunnableConfig dict with at least {'run_id': uuid}
150
+ """
151
+ self._runnable_config = config
152
+
153
+ def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
154
+ """Log data and dispatch custom event for the tool.
155
+
156
+ Args:
157
+ message: The message to log
158
+ tool_name: Name of the tool (defaults to 'tool_progress')
159
+ config: Optional RunnableConfig. If not provided, uses self._runnable_config.
160
+ Required when running outside a LangChain agent context.
161
+ """
137
162
 
138
163
  try:
139
164
  from langchain_core.callbacks import dispatch_custom_event
@@ -142,6 +167,10 @@ class BaseToolApiWrapper(BaseModel):
142
167
  tool_name = 'tool_progress'
143
168
 
144
169
  logger.info(message)
170
+
171
+ # Use provided config, fall back to instance config
172
+ effective_config = config or self._runnable_config
173
+
145
174
  dispatch_custom_event(
146
175
  name="thinking_step",
147
176
  data={
@@ -149,6 +178,7 @@ class BaseToolApiWrapper(BaseModel):
149
178
  "tool_name": tool_name,
150
179
  "toolkit": self.__class__.__name__,
151
180
  },
181
+ config=effective_config,
152
182
  )
153
183
  except Exception as e:
154
184
  logger.warning(f"Failed to dispatch progress event: {str(e)}")
@@ -165,6 +195,11 @@ class BaseToolApiWrapper(BaseModel):
165
195
  # execution = str(execution)
166
196
  return execution
167
197
  except Exception as e:
198
+ # Re-raise McpAuthorizationRequired directly without wrapping
199
+ from alita_sdk.runtime.utils.mcp_oauth import McpAuthorizationRequired
200
+ if isinstance(e, McpAuthorizationRequired):
201
+ raise
202
+
168
203
  # Catch all tool execution exceptions and provide user-friendly error messages
169
204
  error_type = type(e).__name__
170
205
  error_message = str(e)
@@ -589,27 +624,37 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
589
624
  def loader(self,
590
625
  branch: Optional[str] = None,
591
626
  whitelist: Optional[List[str]] = None,
592
- blacklist: Optional[List[str]] = None) -> str:
627
+ blacklist: Optional[List[str]] = None,
628
+ chunked: bool = True) -> Generator[Document, None, None]:
593
629
  """
594
- Generates file content from a branch, respecting whitelist and blacklist patterns.
630
+ Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
595
631
 
596
632
  Parameters:
597
633
  - branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
598
634
  - whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
599
635
  - blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
636
+ - chunked (bool): If True (default), applies universal chunker based on file type.
637
+ If False, returns raw Documents without chunking.
600
638
 
601
639
  Returns:
602
- - generator: Yields content from files matching the whitelist but not the blacklist.
640
+ - generator: Yields Documents from files matching the whitelist but not the blacklist.
603
641
 
604
642
  Example:
605
643
  # Use 'feature-branch', include '.py' files, exclude 'test_' files
606
- file_generator = loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*'])
644
+ for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
645
+ print(doc.page_content)
607
646
 
608
647
  Notes:
609
648
  - Whitelist and blacklist use Unix shell-style wildcards.
610
649
  - Files must match the whitelist and not the blacklist to be included.
650
+ - When chunked=True:
651
+ - .md files → markdown chunker (header-based splitting)
652
+ - .py/.js/.ts/etc → code parser (TreeSitter-based)
653
+ - .json files → JSON chunker
654
+ - other files → default text chunker
611
655
  """
612
- from .chunkers.code.codeparser import parse_code_files_for_db
656
+ from langchain_core.documents import Document
657
+ import hashlib
613
658
 
614
659
  _files = self.__handle_get_files("", self.__get_branch(branch))
615
660
  self._log_tool_event(message="Listing files in branch", tool_name="loader")
@@ -627,32 +672,52 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
627
672
  or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
628
673
  return False
629
674
 
630
- def file_content_generator():
675
+ def raw_document_generator() -> Generator[Document, None, None]:
676
+ """Yields raw Documents without chunking."""
631
677
  self._log_tool_event(message="Reading the files", tool_name="loader")
632
- # log the progress of file reading
633
678
  total_files = len(_files)
679
+ processed = 0
680
+
634
681
  for idx, file in enumerate(_files, 1):
635
682
  if is_whitelisted(file) and not is_blacklisted(file):
636
- # read file ONLY if it matches whitelist and does not match blacklist
637
683
  try:
638
684
  file_content = self._read_file(file, self.__get_branch(branch))
639
685
  except Exception as e:
640
686
  logger.error(f"Failed to read file {file}: {e}")
641
- file_content = ""
687
+ continue
688
+
642
689
  if not file_content:
643
- # empty file, skip
644
690
  continue
645
- # hash the file content to ensure uniqueness
646
- import hashlib
691
+
692
+ # Hash the file content for uniqueness tracking
647
693
  file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
648
- yield {"file_name": file,
649
- "file_content": file_content,
650
- "commit_hash": file_hash}
694
+ processed += 1
695
+
696
+ yield Document(
697
+ page_content=file_content,
698
+ metadata={
699
+ 'file_path': file,
700
+ 'file_name': file,
701
+ 'source': file,
702
+ 'commit_hash': file_hash,
703
+ }
704
+ )
705
+
651
706
  if idx % 10 == 0 or idx == total_files:
652
- self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
653
- self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
707
+ self._log_tool_event(
708
+ message=f"{idx} out of {total_files} files checked, {processed} matched",
709
+ tool_name="loader"
710
+ )
711
+
712
+ self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
654
713
 
655
- return parse_code_files_for_db(file_content_generator())
714
+ if not chunked:
715
+ # Return raw documents without chunking
716
+ return raw_document_generator()
717
+
718
+ # Apply universal chunker based on file type
719
+ from .chunkers.universal_chunker import universal_chunker
720
+ return universal_chunker(raw_document_generator())
656
721
 
657
722
  def index_data(self,
658
723
  index_name: str,
@@ -68,7 +68,7 @@ class JiraToolkit(BaseToolkit):
68
68
  name,
69
69
  cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
70
70
  limit=(int, Field(description="Limit issues. Default is 5", gt=0, default=5)),
71
- api_version=(Optional[str], Field(description="Rest API version: optional. Default is 2", default="2")),
71
+ api_version=(Literal['2', '3'], Field(description="Rest API version: optional. Default is 2", default="3")),
72
72
  labels=(Optional[str], Field(
73
73
  description="List of comma separated labels used for labeling of agent's created or updated entities",
74
74
  default=None,