alita-sdk 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1256 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +64 -8
  30. alita_sdk/community/inventory/__init__.py +224 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/visualize.py +1370 -0
  58. alita_sdk/configurations/bitbucket.py +94 -2
  59. alita_sdk/configurations/confluence.py +96 -1
  60. alita_sdk/configurations/gitlab.py +79 -0
  61. alita_sdk/configurations/jira.py +103 -0
  62. alita_sdk/configurations/testrail.py +88 -0
  63. alita_sdk/configurations/xray.py +93 -0
  64. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  65. alita_sdk/configurations/zephyr_essential.py +75 -0
  66. alita_sdk/runtime/clients/artifact.py +1 -1
  67. alita_sdk/runtime/clients/client.py +214 -42
  68. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  69. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  70. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  71. alita_sdk/runtime/langchain/assistant.py +118 -30
  72. alita_sdk/runtime/langchain/constants.py +8 -1
  73. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  74. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  75. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  76. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +41 -12
  77. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
  78. alita_sdk/runtime/langchain/document_loaders/constants.py +116 -99
  79. alita_sdk/runtime/langchain/interfaces/llm_processor.py +2 -2
  80. alita_sdk/runtime/langchain/langraph_agent.py +307 -71
  81. alita_sdk/runtime/langchain/utils.py +48 -8
  82. alita_sdk/runtime/llms/preloaded.py +2 -6
  83. alita_sdk/runtime/models/mcp_models.py +61 -0
  84. alita_sdk/runtime/toolkits/__init__.py +26 -0
  85. alita_sdk/runtime/toolkits/application.py +9 -2
  86. alita_sdk/runtime/toolkits/artifact.py +18 -6
  87. alita_sdk/runtime/toolkits/datasource.py +13 -6
  88. alita_sdk/runtime/toolkits/mcp.py +780 -0
  89. alita_sdk/runtime/toolkits/planning.py +178 -0
  90. alita_sdk/runtime/toolkits/tools.py +205 -55
  91. alita_sdk/runtime/toolkits/vectorstore.py +9 -4
  92. alita_sdk/runtime/tools/__init__.py +11 -3
  93. alita_sdk/runtime/tools/application.py +7 -0
  94. alita_sdk/runtime/tools/artifact.py +225 -12
  95. alita_sdk/runtime/tools/function.py +95 -5
  96. alita_sdk/runtime/tools/graph.py +10 -4
  97. alita_sdk/runtime/tools/image_generation.py +212 -0
  98. alita_sdk/runtime/tools/llm.py +494 -102
  99. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  100. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  101. alita_sdk/runtime/tools/mcp_server_tool.py +4 -4
  102. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  103. alita_sdk/runtime/tools/planning/models.py +246 -0
  104. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  105. alita_sdk/runtime/tools/router.py +2 -1
  106. alita_sdk/runtime/tools/sandbox.py +180 -79
  107. alita_sdk/runtime/tools/vectorstore.py +22 -21
  108. alita_sdk/runtime/tools/vectorstore_base.py +125 -52
  109. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  110. alita_sdk/runtime/utils/mcp_client.py +465 -0
  111. alita_sdk/runtime/utils/mcp_oauth.py +244 -0
  112. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  113. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  114. alita_sdk/runtime/utils/streamlit.py +40 -13
  115. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  116. alita_sdk/runtime/utils/utils.py +12 -0
  117. alita_sdk/tools/__init__.py +77 -33
  118. alita_sdk/tools/ado/repos/__init__.py +7 -6
  119. alita_sdk/tools/ado/repos/repos_wrapper.py +11 -11
  120. alita_sdk/tools/ado/test_plan/__init__.py +7 -7
  121. alita_sdk/tools/ado/wiki/__init__.py +7 -11
  122. alita_sdk/tools/ado/wiki/ado_wrapper.py +89 -15
  123. alita_sdk/tools/ado/work_item/__init__.py +7 -11
  124. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  125. alita_sdk/tools/advanced_jira_mining/__init__.py +8 -7
  126. alita_sdk/tools/aws/delta_lake/__init__.py +11 -9
  127. alita_sdk/tools/azure_ai/search/__init__.py +7 -6
  128. alita_sdk/tools/base_indexer_toolkit.py +345 -70
  129. alita_sdk/tools/bitbucket/__init__.py +9 -8
  130. alita_sdk/tools/bitbucket/api_wrapper.py +50 -6
  131. alita_sdk/tools/browser/__init__.py +4 -4
  132. alita_sdk/tools/carrier/__init__.py +4 -6
  133. alita_sdk/tools/chunkers/__init__.py +3 -1
  134. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  135. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  136. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  137. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  138. alita_sdk/tools/cloud/aws/__init__.py +7 -6
  139. alita_sdk/tools/cloud/azure/__init__.py +7 -6
  140. alita_sdk/tools/cloud/gcp/__init__.py +7 -6
  141. alita_sdk/tools/cloud/k8s/__init__.py +7 -6
  142. alita_sdk/tools/code/linter/__init__.py +7 -7
  143. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  144. alita_sdk/tools/code/sonar/__init__.py +8 -7
  145. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  146. alita_sdk/tools/confluence/__init__.py +9 -8
  147. alita_sdk/tools/confluence/api_wrapper.py +171 -75
  148. alita_sdk/tools/confluence/loader.py +10 -0
  149. alita_sdk/tools/custom_open_api/__init__.py +9 -4
  150. alita_sdk/tools/elastic/__init__.py +8 -7
  151. alita_sdk/tools/elitea_base.py +492 -52
  152. alita_sdk/tools/figma/__init__.py +7 -7
  153. alita_sdk/tools/figma/api_wrapper.py +2 -1
  154. alita_sdk/tools/github/__init__.py +9 -9
  155. alita_sdk/tools/github/api_wrapper.py +9 -26
  156. alita_sdk/tools/github/github_client.py +62 -2
  157. alita_sdk/tools/gitlab/__init__.py +8 -8
  158. alita_sdk/tools/gitlab/api_wrapper.py +135 -33
  159. alita_sdk/tools/gitlab_org/__init__.py +7 -8
  160. alita_sdk/tools/google/bigquery/__init__.py +11 -12
  161. alita_sdk/tools/google_places/__init__.py +8 -7
  162. alita_sdk/tools/jira/__init__.py +9 -7
  163. alita_sdk/tools/jira/api_wrapper.py +100 -52
  164. alita_sdk/tools/keycloak/__init__.py +8 -7
  165. alita_sdk/tools/localgit/local_git.py +56 -54
  166. alita_sdk/tools/memory/__init__.py +1 -1
  167. alita_sdk/tools/non_code_indexer_toolkit.py +3 -2
  168. alita_sdk/tools/ocr/__init__.py +8 -7
  169. alita_sdk/tools/openapi/__init__.py +10 -1
  170. alita_sdk/tools/pandas/__init__.py +8 -7
  171. alita_sdk/tools/postman/__init__.py +7 -8
  172. alita_sdk/tools/postman/api_wrapper.py +19 -8
  173. alita_sdk/tools/postman/postman_analysis.py +8 -1
  174. alita_sdk/tools/pptx/__init__.py +8 -9
  175. alita_sdk/tools/qtest/__init__.py +16 -11
  176. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  177. alita_sdk/tools/rally/__init__.py +7 -8
  178. alita_sdk/tools/report_portal/__init__.py +9 -7
  179. alita_sdk/tools/salesforce/__init__.py +7 -7
  180. alita_sdk/tools/servicenow/__init__.py +10 -10
  181. alita_sdk/tools/sharepoint/__init__.py +7 -6
  182. alita_sdk/tools/sharepoint/api_wrapper.py +127 -36
  183. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  184. alita_sdk/tools/sharepoint/utils.py +8 -2
  185. alita_sdk/tools/slack/__init__.py +7 -6
  186. alita_sdk/tools/sql/__init__.py +8 -7
  187. alita_sdk/tools/sql/api_wrapper.py +71 -23
  188. alita_sdk/tools/testio/__init__.py +7 -6
  189. alita_sdk/tools/testrail/__init__.py +8 -9
  190. alita_sdk/tools/utils/__init__.py +26 -4
  191. alita_sdk/tools/utils/content_parser.py +88 -60
  192. alita_sdk/tools/utils/text_operations.py +254 -0
  193. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +76 -26
  194. alita_sdk/tools/xray/__init__.py +9 -7
  195. alita_sdk/tools/zephyr/__init__.py +7 -6
  196. alita_sdk/tools/zephyr_enterprise/__init__.py +8 -6
  197. alita_sdk/tools/zephyr_essential/__init__.py +7 -6
  198. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  199. alita_sdk/tools/zephyr_scale/__init__.py +7 -6
  200. alita_sdk/tools/zephyr_squad/__init__.py +7 -6
  201. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +147 -2
  202. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/RECORD +206 -130
  203. alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
  204. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
  205. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
  206. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ from copy import deepcopy
1
2
  import os
2
3
  import re
3
4
  import tempfile
@@ -91,21 +92,24 @@ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool
91
92
  return ToolException(
92
93
  "Not supported type of files entered. Supported types are TXT, DOCX, PDF, PPTX, XLSX and XLS only.")
93
94
 
94
- if hasattr(loader, 'get_content'):
95
- return loader.get_content()
96
- else:
97
- extension = Path(file_path if file_path else file_name).suffix
98
- loader_kwargs = get_loader_kwargs(loaders_map.get(extension), file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets)
99
- if file_content:
100
- return load_content_from_bytes(file_content=file_content,
101
- extension=extension,
102
- loader_extra_config=loader_kwargs,
103
- llm=llm)
95
+ try:
96
+ if hasattr(loader, 'get_content'):
97
+ return loader.get_content()
104
98
  else:
105
- return load_content(file_path=file_path,
106
- extension=extension,
107
- loader_extra_config=loader_kwargs,
108
- llm=llm)
99
+ extension = Path(file_path if file_path else file_name).suffix
100
+ loader_kwargs = get_loader_kwargs(loaders_map.get(extension), file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets)
101
+ if file_content:
102
+ return load_content_from_bytes(file_content=file_content,
103
+ extension=extension,
104
+ loader_extra_config=loader_kwargs,
105
+ llm=llm)
106
+ else:
107
+ return load_content(file_path=file_path,
108
+ extension=extension,
109
+ loader_extra_config=loader_kwargs,
110
+ llm=llm)
111
+ except Exception as e:
112
+ return ToolException(f"Error reading file ({file_name or file_path}) content. Make sure these types are supported: {str(e)}")
109
113
 
110
114
  def load_file_docs(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
111
115
  sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False) -> List[Document] | ToolException:
@@ -126,7 +130,7 @@ def load_file_docs(file_name=None, file_content=None, is_capture_image: bool = F
126
130
 
127
131
  def get_loader_kwargs(loader_object, file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
128
132
  sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False, prompt=None):
129
- loader_kwargs = loader_object['kwargs']
133
+ loader_kwargs = deepcopy(loader_object['kwargs'])
130
134
  loader_kwargs.update({
131
135
  "file_path": file_path,
132
136
  "file_content": file_content,
@@ -152,7 +156,7 @@ def prepare_loader(file_name=None, file_content=None, is_capture_image: bool = F
152
156
 
153
157
  loader_object = loaders_map.get(extension)
154
158
  if not loader_object:
155
- return None
159
+ loader_object = loaders_map.get('.txt') # Default to text loader if no specific loader found
156
160
  loader_kwargs = get_loader_kwargs(loader_object, file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets, prompt)
157
161
  loader = loader_object['class'](**loader_kwargs)
158
162
  return loader
@@ -221,58 +225,80 @@ def process_document_by_type(content, extension_source: str, document: Document
221
225
  metadata={**document.metadata, 'chunk_id': 1}
222
226
  )
223
227
  return
228
+ #
229
+ chunks_counter = 0
224
230
  for chunk in chunks:
231
+ chunks_counter += 1
232
+ metadata = {**document.metadata, **chunk.metadata}
233
+ #
234
+ # ensure each chunk has a unique chunk_id
235
+ metadata['chunk_id'] = chunks_counter
236
+ #
225
237
  yield Document(
226
238
  page_content=sanitize_for_postgres(chunk.page_content),
227
- metadata={**document.metadata, **chunk.metadata}
239
+ metadata=metadata
228
240
  )
229
241
 
230
242
 
231
- def process_content_by_type(content, filename: str, llm=None, chunking_config=None) -> \
243
+ def process_content_by_type(content, filename: str, llm=None, chunking_config=None, fallback_extensions=None) -> \
232
244
  Generator[Document, None, None]:
233
245
  """Process the content of a file based on its type using a configured loader."""
234
246
  temp_file_path = None
235
- try:
236
- match = re.search(r'\.([^.]+)$', filename)
237
- extension = f".{match.group(1).lower()}" if match else ".txt"
238
-
239
- with tempfile.NamedTemporaryFile(mode='w+b', suffix=extension, delete=False) as temp_file:
240
- temp_file_path = temp_file.name
241
- if content is None:
242
- logger.warning(
243
- f"'{IndexerKeywords.CONTENT_IN_BYTES.value}' ie expected but not found in document metadata.")
244
- return []
245
-
246
- temp_file.write(content)
247
- temp_file.flush()
248
-
249
- loader_config = loaders_map.get(extension)
250
- if not loader_config:
251
- logger.warning(f"No loader found for file extension: {extension}. File: {temp_file_path}")
252
- return []
253
-
254
- loader_cls = loader_config['class']
255
- loader_kwargs = loader_config['kwargs']
256
- # Determine which loader configuration keys are allowed to be overridden by user input.
257
- # If 'allowed_to_override' is specified in the loader configuration, use it; otherwise, allow all keys in loader_kwargs.
258
- allowed_to_override = loader_config.get('allowed_to_override', list(loader_kwargs.keys()))
259
- # If a chunking_config is provided and contains custom configuration for the current file extension,
260
- # update loader_kwargs with user-supplied values, but only for keys explicitly permitted in allowed_to_override.
261
- # This ensures that only safe and intended parameters can be customized, preventing accidental or unauthorized changes
262
- # to critical loader settings.
263
- if chunking_config and (users_config_for_extension := chunking_config.get(extension, {})):
264
- for key in set(users_config_for_extension.keys()) & set(allowed_to_override):
265
- loader_kwargs[key] = users_config_for_extension[key]
266
- if LoaderProperties.LLM.value in loader_kwargs:
267
- loader_kwargs[LoaderProperties.LLM.value] = llm
268
- if LoaderProperties.PROMPT_DEFAULT.value in loader_kwargs:
269
- loader_kwargs.pop(LoaderProperties.PROMPT_DEFAULT.value)
270
- loader_kwargs[LoaderProperties.PROMPT.value] = image_processing_prompt
271
- loader = loader_cls(file_path=temp_file_path, **loader_kwargs)
272
- return loader.load()
273
- finally:
274
- if temp_file_path and os.path.exists(temp_file_path):
275
- os.remove(temp_file_path)
247
+ extensions = fallback_extensions if fallback_extensions else []
248
+ match = re.search(r'\.([^.]+)$', filename)
249
+
250
+ if match:
251
+ extensions.insert(0, f".{match.group(1).lower()}")
252
+ elif not extensions:
253
+ extensions = [".txt"]
254
+
255
+ for extension in extensions:
256
+ try:
257
+ with tempfile.NamedTemporaryFile(mode='w+b', suffix=extension, delete=False) as temp_file:
258
+ temp_file_path = temp_file.name
259
+ if content is None:
260
+ logger.warning(
261
+ f"'{IndexerKeywords.CONTENT_IN_BYTES.value}' ie expected but not found in document metadata.")
262
+ return []
263
+
264
+ temp_file.write(content)
265
+ temp_file.flush()
266
+
267
+ loader_config = loaders_map.get(extension)
268
+ if not loader_config:
269
+ logger.warning(f"No loader found for file extension: {extension}. File: {temp_file_path}")
270
+ return []
271
+
272
+ loader_cls = loader_config['class']
273
+ loader_kwargs = loader_config['kwargs']
274
+ # Determine which loader configuration keys are allowed to be overridden by user input.
275
+ # If 'allowed_to_override' is specified in the loader configuration, use it; otherwise, allow all keys in loader_kwargs.
276
+ allowed_to_override = loader_config.get('allowed_to_override', loader_kwargs)
277
+ # If a chunking_config is provided and contains custom configuration for the current file extension,
278
+ # update loader_kwargs with user-supplied values, but only for keys explicitly permitted in allowed_to_override and if value differs from default.
279
+ # This ensures that only safe and intended parameters can be customized, preventing accidental or unauthorized changes
280
+ # to critical loader settings.
281
+ if chunking_config and (users_config_for_extension := chunking_config.get(extension, {})):
282
+ for key in set(users_config_for_extension.keys()) & set(allowed_to_override.keys()):
283
+ if users_config_for_extension[key] != allowed_to_override[key]:
284
+ loader_kwargs[key] = users_config_for_extension[key]
285
+ if LoaderProperties.LLM.value in loader_kwargs and loader_kwargs.pop(LoaderProperties.LLM.value):
286
+ loader_kwargs['llm'] = llm
287
+ if LoaderProperties.PROMPT_DEFAULT.value in loader_kwargs and loader_kwargs.pop(LoaderProperties.PROMPT_DEFAULT.value):
288
+ loader_kwargs[LoaderProperties.PROMPT.value] = image_processing_prompt
289
+ loader = loader_cls(file_path=temp_file_path, **loader_kwargs)
290
+ yield from loader.load()
291
+ break
292
+ except Exception as e:
293
+ if fallback_extensions:
294
+ logger.warning(f"Error loading attachment: {str(e)} for file {temp_file_path} (extension: {extension})")
295
+ logger.warning(f"Continuing with fallback extensions: {fallback_extensions}.")
296
+ continue
297
+ else:
298
+ raise e
299
+ finally:
300
+ if temp_file_path and os.path.exists(temp_file_path):
301
+ os.remove(temp_file_path)
276
302
 
277
303
  # FIXME copied from langchain_core/utils/strings.py of 0.3.74 version
278
304
  # https://github.com/langchain-ai/langchain/pull/32157
@@ -296,7 +322,9 @@ def sanitize_for_postgres(text: str, replacement: str = "") -> str:
296
322
  return text.replace("\x00", replacement)
297
323
 
298
324
 
299
- def file_extension_by_chunker(chunker_name: str) -> str:
325
+ def file_extension_by_chunker(chunker_name: str) -> str | None:
326
+ if not chunker_name:
327
+ return None
300
328
  name = chunker_name.lower()
301
329
  if name == "markdown":
302
330
  return ".md"
@@ -0,0 +1,254 @@
1
+ """
2
+ Shared text operations utilities for file manipulation across toolkits.
3
+
4
+ Provides common functionality for:
5
+ - Parsing OLD/NEW marker-based edits
6
+ - Text file validation
7
+ - Line-based slicing and partial reads
8
+ - Content searching with context
9
+ """
10
+ import re
11
+ import logging
12
+ from typing import List, Tuple, Dict, Optional
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Text file extensions that support editing
17
+ TEXT_EDITABLE_EXTENSIONS = {
18
+ '.md', '.txt', '.csv', '.json', '.xml', '.html',
19
+ '.yaml', '.yml', '.ini', '.conf', '.log', '.sh',
20
+ '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.go',
21
+ '.rb', '.php', '.c', '.cpp', '.h', '.hpp', '.cs',
22
+ '.sql', '.r', '.m', '.swift', '.kt', '.rs', '.scala'
23
+ }
24
+
25
+
26
+ def parse_old_new_markers(file_query: str) -> List[Tuple[str, str]]:
27
+ """
28
+ Parse OLD/NEW marker-based edit instructions.
29
+
30
+ Extracts pairs of old and new content from a file query using markers:
31
+ - OLD <<<< ... >>>> OLD
32
+ - NEW <<<< ... >>>> NEW
33
+
34
+ Args:
35
+ file_query: String containing marked old and new content sections
36
+
37
+ Returns:
38
+ List of tuples (old_content, new_content) for each edit pair
39
+
40
+ Example:
41
+ >>> query = '''
42
+ ... OLD <<<<
43
+ ... Hello World
44
+ ... >>>> OLD
45
+ ... NEW <<<<
46
+ ... Hello Mars
47
+ ... >>>> NEW
48
+ ... '''
49
+ >>> parse_old_new_markers(query)
50
+ [('Hello World', 'Hello Mars')]
51
+ """
52
+ # Split the file content by lines
53
+ code_lines = file_query.split("\n")
54
+
55
+ # Initialize lists to hold the contents of OLD and NEW sections
56
+ old_contents = []
57
+ new_contents = []
58
+
59
+ # Initialize variables to track whether the current line is within an OLD or NEW section
60
+ in_old_section = False
61
+ in_new_section = False
62
+
63
+ # Temporary storage for the current section's content
64
+ current_section_content = []
65
+
66
+ # Iterate through each line in the file content
67
+ for line in code_lines:
68
+ # Check for OLD section start
69
+ if "OLD <<<" in line:
70
+ in_old_section = True
71
+ current_section_content = [] # Reset current section content
72
+ continue # Skip the line with the marker
73
+
74
+ # Check for OLD section end
75
+ if ">>>> OLD" in line:
76
+ in_old_section = False
77
+ old_contents.append("\n".join(current_section_content).strip()) # Add the captured content
78
+ current_section_content = [] # Reset current section content
79
+ continue # Skip the line with the marker
80
+
81
+ # Check for NEW section start
82
+ if "NEW <<<" in line:
83
+ in_new_section = True
84
+ current_section_content = [] # Reset current section content
85
+ continue # Skip the line with the marker
86
+
87
+ # Check for NEW section end
88
+ if ">>>> NEW" in line:
89
+ in_new_section = False
90
+ new_contents.append("\n".join(current_section_content).strip()) # Add the captured content
91
+ current_section_content = [] # Reset current section content
92
+ continue # Skip the line with the marker
93
+
94
+ # If currently in an OLD or NEW section, add the line to the current section content
95
+ if in_old_section or in_new_section:
96
+ current_section_content.append(line)
97
+
98
+ # Pair the OLD and NEW contents
99
+ paired_contents = list(zip(old_contents, new_contents))
100
+
101
+ return paired_contents
102
+
103
+
104
+ def is_text_editable(filename: str) -> bool:
105
+ """
106
+ Check if a file is editable as text based on its extension.
107
+
108
+ Args:
109
+ filename: Name or path of the file to check
110
+
111
+ Returns:
112
+ True if file extension is in the text-editable whitelist
113
+
114
+ Example:
115
+ >>> is_text_editable("config.json")
116
+ True
117
+ >>> is_text_editable("image.png")
118
+ False
119
+ """
120
+ from pathlib import Path
121
+ ext = Path(filename).suffix.lower()
122
+ return ext in TEXT_EDITABLE_EXTENSIONS
123
+
124
+
125
+ def apply_line_slice(
126
+ content: str,
127
+ offset: Optional[int] = None,
128
+ limit: Optional[int] = None,
129
+ head: Optional[int] = None,
130
+ tail: Optional[int] = None
131
+ ) -> str:
132
+ """
133
+ Apply line-based slicing to text content.
134
+
135
+ Supports multiple modes:
136
+ - offset + limit: Read from line `offset` for `limit` lines (1-indexed)
137
+ - head: Read only first N lines
138
+ - tail: Read only last N lines
139
+ - No params: Return full content
140
+
141
+ Args:
142
+ content: Text content to slice
143
+ offset: Starting line number (1-indexed, inclusive)
144
+ limit: Number of lines to read from offset
145
+ head: Return only first N lines
146
+ tail: Return only last N lines
147
+
148
+ Returns:
149
+ Sliced content as string
150
+
151
+ Example:
152
+ >>> text = "line1\\nline2\\nline3\\nline4\\nline5"
153
+ >>> apply_line_slice(text, offset=2, limit=2)
154
+ 'line2\\nline3'
155
+ >>> apply_line_slice(text, head=2)
156
+ 'line1\\nline2'
157
+ >>> apply_line_slice(text, tail=2)
158
+ 'line4\\nline5'
159
+ """
160
+ if not content:
161
+ return content
162
+
163
+ lines = content.splitlines(keepends=True)
164
+
165
+ # Head mode: first N lines
166
+ if head is not None:
167
+ return ''.join(lines[:head])
168
+
169
+ # Tail mode: last N lines
170
+ if tail is not None:
171
+ return ''.join(lines[-tail:] if tail > 0 else lines)
172
+
173
+ # Offset + limit mode: slice from offset for limit lines
174
+ if offset is not None:
175
+ start_idx = max(0, offset - 1) # Convert 1-indexed to 0-indexed
176
+ if limit is not None:
177
+ end_idx = start_idx + limit
178
+ return ''.join(lines[start_idx:end_idx])
179
+ else:
180
+ return ''.join(lines[start_idx:])
181
+
182
+ # No slicing parameters: return full content
183
+ return content
184
+
185
+
186
+ def search_in_content(
187
+ content: str,
188
+ pattern: str,
189
+ is_regex: bool = True,
190
+ context_lines: int = 2
191
+ ) -> List[Dict[str, any]]:
192
+ """
193
+ Search for pattern in content with context lines.
194
+
195
+ Args:
196
+ content: Text content to search
197
+ pattern: Search pattern (regex if is_regex=True, else literal string)
198
+ is_regex: Whether to treat pattern as regex (default True)
199
+ context_lines: Number of lines before/after match to include (default 2)
200
+
201
+ Returns:
202
+ List of match dictionaries with keys:
203
+ - line_number: 1-indexed line number of match
204
+ - line_content: The matching line
205
+ - match_text: The actual matched text
206
+ - context_before: List of lines before match
207
+ - context_after: List of lines after match
208
+
209
+ Example:
210
+ >>> text = "line1\\nHello World\\nline3"
211
+ >>> matches = search_in_content(text, "Hello", is_regex=False)
212
+ >>> matches[0]['line_number']
213
+ 2
214
+ >>> matches[0]['match_text']
215
+ 'Hello'
216
+ """
217
+ if not content:
218
+ return []
219
+
220
+ lines = content.splitlines()
221
+ matches = []
222
+
223
+ # Compile regex pattern or escape for literal search
224
+ if is_regex:
225
+ try:
226
+ regex = re.compile(pattern, re.IGNORECASE)
227
+ except re.error as e:
228
+ logger.warning(f"Invalid regex pattern '{pattern}': {e}")
229
+ return []
230
+ else:
231
+ regex = re.compile(re.escape(pattern), re.IGNORECASE)
232
+
233
+ # Search each line
234
+ for line_idx, line in enumerate(lines):
235
+ match = regex.search(line)
236
+ if match:
237
+ line_number = line_idx + 1 # Convert to 1-indexed
238
+
239
+ # Get context lines
240
+ context_start = max(0, line_idx - context_lines)
241
+ context_end = min(len(lines), line_idx + context_lines + 1)
242
+
243
+ context_before = lines[context_start:line_idx]
244
+ context_after = lines[line_idx + 1:context_end]
245
+
246
+ matches.append({
247
+ 'line_number': line_number,
248
+ 'line_content': line,
249
+ 'match_text': match.group(0),
250
+ 'context_before': context_before,
251
+ 'context_after': context_after,
252
+ })
253
+
254
+ return matches