alita-sdk 0.3.257__py3-none-any.whl → 0.3.562__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +72 -12
  30. alita_sdk/community/inventory/__init__.py +236 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  58. alita_sdk/community/inventory/visualize.py +1370 -0
  59. alita_sdk/configurations/__init__.py +11 -0
  60. alita_sdk/configurations/ado.py +148 -2
  61. alita_sdk/configurations/azure_search.py +1 -1
  62. alita_sdk/configurations/bigquery.py +1 -1
  63. alita_sdk/configurations/bitbucket.py +94 -2
  64. alita_sdk/configurations/browser.py +18 -0
  65. alita_sdk/configurations/carrier.py +19 -0
  66. alita_sdk/configurations/confluence.py +130 -1
  67. alita_sdk/configurations/delta_lake.py +1 -1
  68. alita_sdk/configurations/figma.py +76 -5
  69. alita_sdk/configurations/github.py +65 -1
  70. alita_sdk/configurations/gitlab.py +81 -0
  71. alita_sdk/configurations/google_places.py +17 -0
  72. alita_sdk/configurations/jira.py +103 -0
  73. alita_sdk/configurations/openapi.py +111 -0
  74. alita_sdk/configurations/postman.py +1 -1
  75. alita_sdk/configurations/qtest.py +72 -3
  76. alita_sdk/configurations/report_portal.py +115 -0
  77. alita_sdk/configurations/salesforce.py +19 -0
  78. alita_sdk/configurations/service_now.py +1 -12
  79. alita_sdk/configurations/sharepoint.py +167 -0
  80. alita_sdk/configurations/sonar.py +18 -0
  81. alita_sdk/configurations/sql.py +20 -0
  82. alita_sdk/configurations/testio.py +101 -0
  83. alita_sdk/configurations/testrail.py +88 -0
  84. alita_sdk/configurations/xray.py +94 -1
  85. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  86. alita_sdk/configurations/zephyr_essential.py +95 -0
  87. alita_sdk/runtime/clients/artifact.py +21 -4
  88. alita_sdk/runtime/clients/client.py +458 -67
  89. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  90. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  91. alita_sdk/runtime/clients/sandbox_client.py +352 -0
  92. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  93. alita_sdk/runtime/langchain/assistant.py +183 -43
  94. alita_sdk/runtime/langchain/constants.py +647 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  96. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  97. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
  100. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  101. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  102. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  103. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  104. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  105. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  106. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  107. alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
  108. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  109. alita_sdk/runtime/langchain/langraph_agent.py +407 -92
  110. alita_sdk/runtime/langchain/utils.py +102 -8
  111. alita_sdk/runtime/llms/preloaded.py +2 -6
  112. alita_sdk/runtime/models/mcp_models.py +61 -0
  113. alita_sdk/runtime/skills/__init__.py +91 -0
  114. alita_sdk/runtime/skills/callbacks.py +498 -0
  115. alita_sdk/runtime/skills/discovery.py +540 -0
  116. alita_sdk/runtime/skills/executor.py +610 -0
  117. alita_sdk/runtime/skills/input_builder.py +371 -0
  118. alita_sdk/runtime/skills/models.py +330 -0
  119. alita_sdk/runtime/skills/registry.py +355 -0
  120. alita_sdk/runtime/skills/skill_runner.py +330 -0
  121. alita_sdk/runtime/toolkits/__init__.py +28 -0
  122. alita_sdk/runtime/toolkits/application.py +14 -4
  123. alita_sdk/runtime/toolkits/artifact.py +24 -9
  124. alita_sdk/runtime/toolkits/datasource.py +13 -6
  125. alita_sdk/runtime/toolkits/mcp.py +780 -0
  126. alita_sdk/runtime/toolkits/planning.py +178 -0
  127. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  128. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  129. alita_sdk/runtime/toolkits/tools.py +314 -70
  130. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  131. alita_sdk/runtime/tools/__init__.py +24 -0
  132. alita_sdk/runtime/tools/application.py +16 -4
  133. alita_sdk/runtime/tools/artifact.py +367 -33
  134. alita_sdk/runtime/tools/data_analysis.py +183 -0
  135. alita_sdk/runtime/tools/function.py +100 -4
  136. alita_sdk/runtime/tools/graph.py +81 -0
  137. alita_sdk/runtime/tools/image_generation.py +218 -0
  138. alita_sdk/runtime/tools/llm.py +1013 -177
  139. alita_sdk/runtime/tools/loop.py +3 -1
  140. alita_sdk/runtime/tools/loop_output.py +3 -1
  141. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  142. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  143. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  144. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  145. alita_sdk/runtime/tools/planning/models.py +246 -0
  146. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  147. alita_sdk/runtime/tools/router.py +2 -1
  148. alita_sdk/runtime/tools/sandbox.py +375 -0
  149. alita_sdk/runtime/tools/skill_router.py +776 -0
  150. alita_sdk/runtime/tools/tool.py +3 -1
  151. alita_sdk/runtime/tools/vectorstore.py +69 -65
  152. alita_sdk/runtime/tools/vectorstore_base.py +163 -90
  153. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  154. alita_sdk/runtime/utils/mcp_client.py +492 -0
  155. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  156. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  157. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  158. alita_sdk/runtime/utils/streamlit.py +41 -14
  159. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  160. alita_sdk/runtime/utils/utils.py +48 -0
  161. alita_sdk/tools/__init__.py +135 -37
  162. alita_sdk/tools/ado/__init__.py +2 -2
  163. alita_sdk/tools/ado/repos/__init__.py +15 -19
  164. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
  165. alita_sdk/tools/ado/test_plan/__init__.py +26 -8
  166. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
  167. alita_sdk/tools/ado/wiki/__init__.py +27 -12
  168. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
  169. alita_sdk/tools/ado/work_item/__init__.py +27 -12
  170. alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
  171. alita_sdk/tools/advanced_jira_mining/__init__.py +12 -8
  172. alita_sdk/tools/aws/delta_lake/__init__.py +14 -11
  173. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  174. alita_sdk/tools/azure_ai/search/__init__.py +13 -8
  175. alita_sdk/tools/base/tool.py +5 -1
  176. alita_sdk/tools/base_indexer_toolkit.py +454 -110
  177. alita_sdk/tools/bitbucket/__init__.py +27 -19
  178. alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
  179. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  180. alita_sdk/tools/browser/__init__.py +41 -16
  181. alita_sdk/tools/browser/crawler.py +3 -1
  182. alita_sdk/tools/browser/utils.py +15 -6
  183. alita_sdk/tools/carrier/__init__.py +18 -17
  184. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  185. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  186. alita_sdk/tools/chunkers/__init__.py +3 -1
  187. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  188. alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
  189. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  190. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  191. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  192. alita_sdk/tools/cloud/aws/__init__.py +11 -7
  193. alita_sdk/tools/cloud/azure/__init__.py +11 -7
  194. alita_sdk/tools/cloud/gcp/__init__.py +11 -7
  195. alita_sdk/tools/cloud/k8s/__init__.py +11 -7
  196. alita_sdk/tools/code/linter/__init__.py +9 -8
  197. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  198. alita_sdk/tools/code/sonar/__init__.py +20 -13
  199. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  200. alita_sdk/tools/confluence/__init__.py +21 -14
  201. alita_sdk/tools/confluence/api_wrapper.py +197 -58
  202. alita_sdk/tools/confluence/loader.py +14 -2
  203. alita_sdk/tools/custom_open_api/__init__.py +11 -5
  204. alita_sdk/tools/elastic/__init__.py +10 -8
  205. alita_sdk/tools/elitea_base.py +546 -64
  206. alita_sdk/tools/figma/__init__.py +11 -8
  207. alita_sdk/tools/figma/api_wrapper.py +352 -153
  208. alita_sdk/tools/github/__init__.py +17 -17
  209. alita_sdk/tools/github/api_wrapper.py +9 -26
  210. alita_sdk/tools/github/github_client.py +81 -12
  211. alita_sdk/tools/github/schemas.py +2 -1
  212. alita_sdk/tools/github/tool.py +5 -1
  213. alita_sdk/tools/gitlab/__init__.py +18 -13
  214. alita_sdk/tools/gitlab/api_wrapper.py +224 -80
  215. alita_sdk/tools/gitlab_org/__init__.py +13 -10
  216. alita_sdk/tools/google/bigquery/__init__.py +13 -13
  217. alita_sdk/tools/google/bigquery/tool.py +5 -1
  218. alita_sdk/tools/google_places/__init__.py +20 -11
  219. alita_sdk/tools/jira/__init__.py +21 -11
  220. alita_sdk/tools/jira/api_wrapper.py +315 -168
  221. alita_sdk/tools/keycloak/__init__.py +10 -8
  222. alita_sdk/tools/localgit/__init__.py +8 -3
  223. alita_sdk/tools/localgit/local_git.py +62 -54
  224. alita_sdk/tools/localgit/tool.py +5 -1
  225. alita_sdk/tools/memory/__init__.py +38 -14
  226. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  227. alita_sdk/tools/ocr/__init__.py +10 -8
  228. alita_sdk/tools/openapi/__init__.py +281 -108
  229. alita_sdk/tools/openapi/api_wrapper.py +883 -0
  230. alita_sdk/tools/openapi/tool.py +20 -0
  231. alita_sdk/tools/pandas/__init__.py +18 -11
  232. alita_sdk/tools/pandas/api_wrapper.py +40 -45
  233. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  234. alita_sdk/tools/postman/__init__.py +10 -11
  235. alita_sdk/tools/postman/api_wrapper.py +19 -8
  236. alita_sdk/tools/postman/postman_analysis.py +8 -1
  237. alita_sdk/tools/pptx/__init__.py +10 -10
  238. alita_sdk/tools/qtest/__init__.py +21 -14
  239. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  240. alita_sdk/tools/rally/__init__.py +12 -10
  241. alita_sdk/tools/report_portal/__init__.py +22 -16
  242. alita_sdk/tools/salesforce/__init__.py +21 -16
  243. alita_sdk/tools/servicenow/__init__.py +20 -16
  244. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  245. alita_sdk/tools/sharepoint/__init__.py +16 -14
  246. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  247. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  248. alita_sdk/tools/sharepoint/utils.py +8 -2
  249. alita_sdk/tools/slack/__init__.py +11 -7
  250. alita_sdk/tools/sql/__init__.py +21 -19
  251. alita_sdk/tools/sql/api_wrapper.py +71 -23
  252. alita_sdk/tools/testio/__init__.py +20 -13
  253. alita_sdk/tools/testrail/__init__.py +12 -11
  254. alita_sdk/tools/testrail/api_wrapper.py +214 -46
  255. alita_sdk/tools/utils/__init__.py +28 -4
  256. alita_sdk/tools/utils/content_parser.py +182 -62
  257. alita_sdk/tools/utils/text_operations.py +254 -0
  258. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  259. alita_sdk/tools/xray/__init__.py +17 -14
  260. alita_sdk/tools/xray/api_wrapper.py +58 -113
  261. alita_sdk/tools/yagmail/__init__.py +8 -3
  262. alita_sdk/tools/zephyr/__init__.py +11 -7
  263. alita_sdk/tools/zephyr_enterprise/__init__.py +15 -9
  264. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
  265. alita_sdk/tools/zephyr_essential/__init__.py +15 -10
  266. alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
  267. alita_sdk/tools/zephyr_essential/client.py +6 -4
  268. alita_sdk/tools/zephyr_scale/__init__.py +12 -8
  269. alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
  270. alita_sdk/tools/zephyr_squad/__init__.py +11 -7
  271. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/METADATA +184 -37
  272. alita_sdk-0.3.562.dist-info/RECORD +450 -0
  273. alita_sdk-0.3.562.dist-info/entry_points.txt +2 -0
  274. alita_sdk/tools/bitbucket/tools.py +0 -304
  275. alita_sdk-0.3.257.dist-info/RECORD +0 -343
  276. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/WHEEL +0 -0
  277. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/licenses/LICENSE +0 -0
  278. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/top_level.txt +0 -0
@@ -6,9 +6,9 @@ from langchain.text_splitter import CharacterTextSplitter
6
6
  import fitz
7
7
 
8
8
  try:
9
- from langchain_chroma import Chroma
9
+ from langchain_postgres import PGVector
10
10
  except ImportError:
11
- Chroma = None
11
+ PGVector = None
12
12
 
13
13
  from langchain_community.embeddings.sentence_transformer import (
14
14
  SentenceTransformerEmbeddings,
@@ -32,13 +32,22 @@ def get_page(urls, html_only=False):
32
32
  return docs_transformed
33
33
 
34
34
 
35
- def webRag(urls, max_response_size, query):
36
- if Chroma is None:
37
- return "Chroma is not initialized. Web rag is not available."
35
+ def webRag(urls, max_response_size, query, connection_string=None):
36
+ if PGVector is None:
37
+ return "PGVector is not initialized. Web rag is not available."
38
+
39
+ if not connection_string:
40
+ return "Connection string or embedding model is missing. Web rag is not available."
38
41
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
39
42
  docs = text_splitter.split_documents(get_page(urls))
40
43
  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
41
- db = Chroma.from_documents(docs, embedding_function)
44
+ db = PGVector.from_documents(
45
+ documents=docs,
46
+ embedding=embedding_function,
47
+ collection_name="web_rag",
48
+ pre_delete_collection=True,
49
+ connection=connection_string
50
+ )
42
51
  docs = db.search(query, "mmr", k=10)
43
52
  text = ""
44
53
  for doc in docs:
@@ -1,12 +1,14 @@
1
1
  import logging
2
2
  from typing import Dict, List, Optional, Literal
3
3
  from langchain_core.tools import BaseToolkit, BaseTool
4
- from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
4
+ from pydantic import create_model, BaseModel, ConfigDict, Field
5
5
  from functools import lru_cache
6
6
 
7
7
  from .api_wrapper import CarrierAPIWrapper
8
8
  from .tools import __all__
9
- from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ..elitea_base import filter_missconfigured_index_tools
10
+ from ..utils import clean_string, get_max_toolkit_length
11
+ from ...configurations.carrier import CarrierConfiguration
10
12
 
11
13
  logger = logging.getLogger(__name__)
12
14
 
@@ -15,7 +17,6 @@ name = 'carrier'
15
17
 
16
18
  class AlitaCarrierToolkit(BaseToolkit):
17
19
  tools: List[BaseTool] = []
18
- toolkit_max_length: int = 100
19
20
 
20
21
  @classmethod
21
22
  @lru_cache(maxsize=32)
@@ -24,15 +25,10 @@ class AlitaCarrierToolkit(BaseToolkit):
24
25
  for t in __all__:
25
26
  default = t['tool'].__pydantic_fields__['args_schema'].default
26
27
  selected_tools[t['name']] = default.schema() if default else default
27
- cls.toolkit_max_length = get_max_toolkit_length(selected_tools)
28
28
  return create_model(
29
29
  name,
30
- url=(str, Field(description="Carrier Platform Base URL")),
31
- organization=(str, Field(description="Carrier Organization Name", json_schema_extra={'toolkit_name': True,
32
- 'max_toolkit_length': cls.toolkit_max_length})),
33
- private_token=(
34
- SecretStr, Field(description="Carrier Platform Authentication Token", json_schema_extra={'secret': True})),
35
30
  project_id=(Optional[str], Field(None, description="Optional project ID for scoped operations")),
31
+ carrier_configuration=(CarrierConfiguration, Field(description="Carrier Configuration", json_schema_extra={'configuration_types': ['carrier']})),
36
32
  selected_tools=(
37
33
  List[Literal[tuple(selected_tools)]],
38
34
  Field(default=[], json_schema_extra={"args_schemas": selected_tools}),
@@ -49,6 +45,7 @@ class AlitaCarrierToolkit(BaseToolkit):
49
45
  )
50
46
 
51
47
  @classmethod
48
+ @filter_missconfigured_index_tools
52
49
  def get_toolkit(
53
50
  cls,
54
51
  selected_tools: Optional[List[str]] = None,
@@ -58,23 +55,29 @@ class AlitaCarrierToolkit(BaseToolkit):
58
55
  selected_tools = selected_tools or []
59
56
  logger.info(f"[AlitaCarrierToolkit] Initializing toolkit with selected tools: {selected_tools}")
60
57
 
58
+ wrapper_payload = {
59
+ **kwargs,
60
+ **kwargs.get('carrier_configuration', {}),
61
+ }
62
+
61
63
  try:
62
- carrier_api_wrapper = CarrierAPIWrapper(**kwargs)
64
+ carrier_api_wrapper = CarrierAPIWrapper(**wrapper_payload)
63
65
  logger.info(
64
- f"[AlitaCarrierToolkit] CarrierAPIWrapper initialized successfully with URL: {kwargs.get('url')}")
66
+ f"[AlitaCarrierToolkit] CarrierAPIWrapper initialized successfully with URL: {wrapper_payload.get('url')}")
65
67
  except Exception as e:
66
68
  logger.exception(f"[AlitaCarrierToolkit] Error initializing CarrierAPIWrapper: {e}")
67
69
  raise ValueError(f"CarrierAPIWrapper initialization error: {e}")
68
70
 
69
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
70
-
71
71
  tools = []
72
72
  for tool_def in __all__:
73
73
  if selected_tools and tool_def['name'] not in selected_tools:
74
74
  continue
75
75
  try:
76
76
  tool_instance = tool_def['tool'](api_wrapper=carrier_api_wrapper)
77
- tool_instance.name = prefix + tool_instance.name
77
+ if toolkit_name:
78
+ tool_instance.description = f"{tool_instance.description}\nToolkit: {toolkit_name}"
79
+ tool_instance.description = tool_instance.description[:1000]
80
+ tool_instance.metadata = {"toolkit_name": toolkit_name}
78
81
  tools.append(tool_instance)
79
82
  logger.info(f"[AlitaCarrierToolkit] Successfully initialized tool '{tool_instance.name}'")
80
83
  except Exception as e:
@@ -92,9 +95,7 @@ class AlitaCarrierToolkit(BaseToolkit):
92
95
  def get_tools(tool_config: Dict) -> List[BaseTool]:
93
96
  return AlitaCarrierToolkit.get_toolkit(
94
97
  selected_tools=tool_config.get('selected_tools', []),
95
- url=tool_config['settings']['url'],
96
98
  project_id=tool_config['settings'].get('project_id'),
97
- organization=tool_config['settings']['organization'],
98
- private_token=tool_config['settings']['private_token'],
99
+ carrier_configuration=tool_config['settings']['carrier_configuration'],
99
100
  toolkit_name=tool_config.get('toolkit_name')
100
101
  ).get_tools()
@@ -154,6 +154,7 @@ class CreateExcelReportTool(BaseTool):
154
154
  "tp_threshold": (int, Field(default=None, description="Throughput threshold")),
155
155
  "rt_threshold": (int, Field(default=None, description="Response time threshold")),
156
156
  "er_threshold": (int, Field(default=None, description="Error rate threshold")),
157
+ "include_group_pauses": (bool, Field(default=False, description="Include group pauses in Gatling Excel report")),
157
158
  }
158
159
  )
159
160
 
@@ -200,6 +201,7 @@ class CreateExcelReportTool(BaseTool):
200
201
  "tp_threshold": 10,
201
202
  "rt_threshold": 500,
202
203
  "er_threshold": 5,
204
+ "include_group_pauses": False,
203
205
  }
204
206
 
205
207
  def _request_parameter_confirmation(self, default_parameters):
@@ -217,7 +219,8 @@ class CreateExcelReportTool(BaseTool):
217
219
  excel_report_file_name = f'/tmp/reports_test_results_{report["build_id"]}_excel_report.xlsx'
218
220
  bucket_name = report["name"].replace("_", "").replace(" ", "").lower()
219
221
 
220
- result_stats_j = self._parse_report(test_log_file_path, lg_type, parameters["think_time"], is_absolute_file_path=True)
222
+ result_stats_j = self._parse_report(test_log_file_path, lg_type, parameters["think_time"],
223
+ parameters["include_group_pauses"], is_absolute_file_path=True)
221
224
  calc_thr_j = self._calculate_thresholds(result_stats_j, parameters)
222
225
 
223
226
  return self._generate_and_upload_report(
@@ -233,21 +236,22 @@ class CreateExcelReportTool(BaseTool):
233
236
  excel_report_file_name = f'{file_path}_{current_date}.xlsx'
234
237
  bucket_name = bucket
235
238
 
236
- result_stats_j = self._parse_report(file_path, lg_type, parameters["think_time"], is_absolute_file_path=True)
239
+ result_stats_j = self._parse_report(file_path, lg_type, parameters["think_time"],
240
+ parameters["include_group_pauses"], is_absolute_file_path=True)
237
241
  calc_thr_j = self._calculate_thresholds(result_stats_j, parameters)
238
242
 
239
243
  return self._generate_and_upload_report(
240
244
  result_stats_j, carrier_report, calc_thr_j, parameters, excel_report_file_name, bucket_name, file_path
241
245
  )
242
246
 
243
- def _parse_report(self, file_path, lg_type, think_time, is_absolute_file_path=False):
247
+ def _parse_report(self, file_path, lg_type, think_time, include_group_pauses, is_absolute_file_path=False):
244
248
  """Parse the report based on its type."""
245
249
  if lg_type == "gatling":
246
250
  if is_absolute_file_path:
247
251
  report_file = file_path
248
252
  else:
249
253
  report_file = get_latest_log_file(file_path, "simulation.log")
250
- parser = GatlingReportParser(report_file, think_time)
254
+ parser = GatlingReportParser(report_file, include_group_pauses, think_time)
251
255
  result_stats_j = parser.parse()
252
256
  result_stats_j["requests"].update(result_stats_j["groups"])
253
257
  elif lg_type == "jmeter":
@@ -118,9 +118,10 @@ class JMeterReportParser(PerformanceReportParser):
118
118
 
119
119
  class GatlingReportParser(PerformanceReportParser):
120
120
 
121
- def __init__(self, log_file: str, think_times="5,0-10,0"):
121
+ def __init__(self, log_file: str, include_group_pauses, think_times="5,0-10,0"):
122
122
  self.calculated_think_time = think_times
123
123
  self.log_file = log_file
124
+ self.include_group_pauses = include_group_pauses
124
125
 
125
126
  @staticmethod
126
127
  def convert_timestamp_to_datetime(timestamp: int) -> datetime:
@@ -210,7 +211,7 @@ class GatlingReportParser(PerformanceReportParser):
210
211
  ramp_end = self.convert_timestamp_to_datetime(int(line.split('\t')[3]))
211
212
 
212
213
  elif line.startswith('GROUP'):
213
- self.parse_group_line(groups, line)
214
+ self.parse_group_line(groups, line, self.include_group_pauses)
214
215
  except FileNotFoundError as e:
215
216
  print(f"File not found: {e}")
216
217
  raise
@@ -242,11 +243,14 @@ class GatlingReportParser(PerformanceReportParser):
242
243
  requests[request_name].append((response_time, status))
243
244
 
244
245
  @staticmethod
245
- def parse_group_line(groups, line):
246
+ def parse_group_line(groups, line, include_group_pauses):
246
247
  parts = line.split('\t')
247
248
  if len(parts) >= 6:
248
249
  group_name = parts[1]
249
- response_time = int(parts[4])
250
+ if include_group_pauses:
251
+ response_time = int(parts[3]) - int(parts[2])
252
+ else:
253
+ response_time = int(parts[4])
250
254
  status = parts[5].strip()
251
255
  groups[group_name].append((response_time, status))
252
256
 
@@ -3,6 +3,7 @@ from .sematic.statistical_chunker import statistical_chunker
3
3
  from .sematic.markdown_chunker import markdown_chunker
4
4
  from .sematic.proposal_chunker import proposal_chunker
5
5
  from .sematic.json_chunker import json_chunker
6
+ from .universal_chunker import universal_chunker, chunk_single_document, get_file_type
6
7
  from .models import StatisticalChunkerConfig, MarkdownChunkerConfig, ProposalChunkerConfig
7
8
 
8
9
  __all__ = {
@@ -10,7 +11,8 @@ __all__ = {
10
11
  'statistical': statistical_chunker,
11
12
  'markdown': markdown_chunker,
12
13
  'proposal': proposal_chunker,
13
- 'json': json_chunker
14
+ 'json': json_chunker,
15
+ 'universal': universal_chunker,
14
16
  }
15
17
 
16
18
  __confluence_chunkers__ = {
@@ -79,7 +79,7 @@ def parse_code_files_for_db(file_content_generator: Generator[str, None, None],
79
79
  for splitted_document in splitted_documents:
80
80
  metadata = {
81
81
  "filename": file_name,
82
- "method_name": node.name,
82
+ "method_name": node.name if node.name else 'unknown',
83
83
  "language": programming_language.value,
84
84
  }
85
85
  commit_hash = data.get("commit_hash")
@@ -9,7 +9,7 @@ def json_chunker(file_content_generator: Generator[Document, None, None], config
9
9
  for doc in file_content_generator:
10
10
  try:
11
11
  data_dict = json.loads(doc.page_content)
12
- chunks = RecursiveJsonSplitter(max_chunk_size=max_tokens).split_json(json_data=data_dict)
12
+ chunks = RecursiveJsonSplitter(max_chunk_size=max_tokens).split_json(json_data=data_dict, convert_lists=True)
13
13
  if len(chunks) == 1:
14
14
  yield doc
15
15
  continue
@@ -17,6 +17,7 @@ def json_chunker(file_content_generator: Generator[Document, None, None], config
17
17
  for chunk in chunks:
18
18
  metadata = doc.metadata.copy()
19
19
  metadata['chunk_id'] = chunk_id
20
+ metadata['method_name'] = 'json'
20
21
  chunk_id += 1
21
22
  yield Document(page_content=json.dumps(chunk), metadata=metadata)
22
23
  except Exception as e:
@@ -1,4 +1,4 @@
1
- from typing import Generator
1
+ from typing import Generator, List
2
2
  from langchain_core.documents import Document
3
3
  from langchain_text_splitters import MarkdownHeaderTextSplitter, ExperimentalMarkdownSyntaxTextSplitter
4
4
  from langchain.text_splitter import TokenTextSplitter
@@ -7,34 +7,60 @@ from copy import deepcopy as copy
7
7
 
8
8
 
9
9
  def markdown_chunker(file_content_generator: Generator[Document, None, None], config: dict, *args, **kwargs) -> Generator[Document, None, None]:
10
+ """
11
+ Chunks markdown documents by headers, with support for:
12
+ - Minimum chunk size to avoid tiny fragments
13
+ - Maximum token limit with overflow splitting
14
+ - Header metadata preservation
15
+
16
+ Config options:
17
+ strip_header (bool): Remove headers from content. Default: False
18
+ return_each_line (bool): Split on every line. Default: False
19
+ headers_to_split_on (list): Headers to split on, e.g. [('#', 'H1'), ('##', 'H2')]
20
+ max_tokens (int): Maximum tokens per chunk. Default: 512
21
+ token_overlap (int): Token overlap for large chunk splitting. Default: 10
22
+ min_chunk_chars (int): Minimum characters per chunk. Default: 100
23
+ Chunks smaller than this will be merged with the next chunk.
24
+ """
10
25
  strip_header = config.get("strip_header", False)
11
26
  return_each_line = config.get("return_each_line", False)
12
27
  headers_to_split_on = config.get("headers_to_split_on", [])
13
28
  max_tokens = config.get("max_tokens", 512)
14
29
  tokens_overlapping = config.get("token_overlap", 10)
30
+ min_chunk_chars = config.get("min_chunk_chars", 100) # Minimum characters per chunk
31
+
15
32
  headers_to_split_on = [tuple(header) for header in headers_to_split_on]
33
+
16
34
  for doc in file_content_generator:
17
35
  doc_metadata = doc.metadata
18
36
  doc_content = doc.page_content
19
37
  chunk_id = 0
38
+
20
39
  markdown_splitter = MarkdownHeaderTextSplitter(
21
40
  headers_to_split_on=headers_to_split_on,
22
41
  strip_headers=strip_header,
23
42
  return_each_line=return_each_line
24
43
  )
25
44
  md_header_splits = markdown_splitter.split_text(doc_content)
26
- for chunk in md_header_splits:
45
+
46
+ # Merge small chunks with the next one
47
+ merged_chunks = _merge_small_chunks(md_header_splits, min_chunk_chars)
48
+
49
+ for chunk in merged_chunks:
27
50
  if tiktoken_length(chunk.page_content) > max_tokens:
28
- for subchunk in TokenTextSplitter(encoding_name="cl100k_base",
29
- chunk_size=max_tokens,
30
- chunk_overlap=tokens_overlapping
31
- ).split_text(chunk.page_content):
51
+ # Split large chunks into smaller ones
52
+ for subchunk in TokenTextSplitter(
53
+ encoding_name="cl100k_base",
54
+ chunk_size=max_tokens,
55
+ chunk_overlap=tokens_overlapping
56
+ ).split_text(chunk.page_content):
32
57
  chunk_id += 1
33
58
  headers_meta = list(chunk.metadata.values())
34
59
  docmeta = copy(doc_metadata)
35
60
  docmeta.update({"headers": "; ".join(headers_meta)})
36
61
  docmeta['chunk_id'] = chunk_id
37
62
  docmeta['chunk_type'] = "document"
63
+ docmeta['method_name'] = 'markdown'
38
64
  yield Document(
39
65
  page_content=subchunk,
40
66
  metadata=docmeta
@@ -46,12 +72,77 @@ def markdown_chunker(file_content_generator: Generator[Document, None, None], co
46
72
  docmeta.update({"headers": "; ".join(headers_meta)})
47
73
  docmeta['chunk_id'] = chunk_id
48
74
  docmeta['chunk_type'] = "document"
75
+ docmeta['method_name'] = 'text'
49
76
  yield Document(
50
77
  page_content=chunk.page_content,
51
78
  metadata=docmeta
52
79
  )
53
80
 
54
81
 
82
+ def _merge_small_chunks(chunks: List[Document], min_chars: int) -> List[Document]:
83
+ """
84
+ Merge chunks that are smaller than min_chars with the next chunk.
85
+
86
+ This prevents tiny fragments (like standalone headers or short notes)
87
+ from becoming separate chunks.
88
+
89
+ Args:
90
+ chunks: List of Document chunks from markdown splitter
91
+ min_chars: Minimum character count for a chunk
92
+
93
+ Returns:
94
+ List of merged Document chunks
95
+ """
96
+ if not chunks:
97
+ return chunks
98
+
99
+ merged = []
100
+ pending_content = ""
101
+ pending_metadata = {}
102
+
103
+ for i, chunk in enumerate(chunks):
104
+ content = chunk.page_content.strip()
105
+
106
+ if pending_content:
107
+ # Merge pending content with current chunk
108
+ combined_content = pending_content + "\n\n" + content
109
+ # Use the pending metadata (from the header) but can be extended
110
+ combined_metadata = {**pending_metadata}
111
+ # Add any new header info from current chunk
112
+ for key, value in chunk.metadata.items():
113
+ if key not in combined_metadata or not combined_metadata[key]:
114
+ combined_metadata[key] = value
115
+
116
+ if len(combined_content) >= min_chars:
117
+ # Combined is big enough, emit it
118
+ merged.append(Document(
119
+ page_content=combined_content,
120
+ metadata=combined_metadata
121
+ ))
122
+ pending_content = ""
123
+ pending_metadata = {}
124
+ else:
125
+ # Still too small, keep accumulating
126
+ pending_content = combined_content
127
+ pending_metadata = combined_metadata
128
+ elif len(content) < min_chars:
129
+ # Current chunk is too small, start pending
130
+ pending_content = content
131
+ pending_metadata = dict(chunk.metadata)
132
+ else:
133
+ # Current chunk is big enough
134
+ merged.append(chunk)
135
+
136
+ # Don't forget any remaining pending content
137
+ if pending_content:
138
+ merged.append(Document(
139
+ page_content=pending_content,
140
+ metadata=pending_metadata
141
+ ))
142
+
143
+ return merged
144
+
145
+
55
146
  def markdown_by_headers_chunker(file_content_generator: Generator[Document, None, None], config: dict, *args, **kwargs) -> Generator[Document, None, None]:
56
147
  strip_header = config.get("strip_header", False)
57
148
  return_each_line = config.get("return_each_line", False)
@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
6
6
  from langchain.text_splitter import TokenTextSplitter
7
7
 
8
8
  from typing import Optional, List
9
- from langchain_core.pydantic_v1 import BaseModel
9
+ from pydantic import BaseModel
10
10
  from ..utils import tiktoken_length
11
11
 
12
12
  logger = getLogger(__name__)