alita-sdk 0.3.207__py3-none-any.whl → 0.3.209__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -634,27 +634,112 @@ class AlitaClient:
634
634
  "execution_time_seconds": 0.0
635
635
  }
636
636
 
637
- # Find the specific tool
637
+ # Find the specific tool with smart name matching
638
638
  target_tool = None
639
+ toolkit_name = toolkit_config.get('toolkit_name', '').lower()
640
+
641
+ # Helper function to extract base tool name from full name
642
+ def extract_base_tool_name(full_name: str) -> str:
643
+ """Extract base tool name from toolkit___toolname format."""
644
+ if '___' in full_name:
645
+ return full_name.split('___', 1)[1]
646
+ return full_name
647
+
648
+ # Helper function to create full tool name
649
+ def create_full_tool_name(base_name: str, toolkit_name: str) -> str:
650
+ """Create full tool name in toolkit___toolname format."""
651
+ return f"{toolkit_name}___{base_name}"
652
+
653
+ # Normalize tool_name to handle both formats
654
+ # If user provides toolkit___toolname, extract just the tool name
655
+ # If user provides just toolname, keep as is
656
+ if '___' in tool_name:
657
+ normalized_tool_name = extract_base_tool_name(tool_name)
658
+ logger.info(f"Extracted base tool name '{normalized_tool_name}' from full name '{tool_name}'")
659
+ else:
660
+ normalized_tool_name = tool_name
661
+
662
+ # Try multiple matching strategies
639
663
  for tool in tools:
640
- if hasattr(tool, 'name') and tool.name == tool_name:
641
- target_tool = tool
642
- break
643
- elif hasattr(tool, 'func') and hasattr(tool.func, '__name__') and tool.func.__name__ == tool_name:
644
- target_tool = tool
645
- break
664
+ tool_name_attr = None
665
+ if hasattr(tool, 'name'):
666
+ tool_name_attr = tool.name
667
+ elif hasattr(tool, 'func') and hasattr(tool.func, '__name__'):
668
+ tool_name_attr = tool.func.__name__
669
+
670
+ if tool_name_attr:
671
+ # Strategy 1: Exact match with provided name (handles both formats)
672
+ if tool_name_attr == tool_name:
673
+ target_tool = tool
674
+ logger.info(f"Found tool using exact match: '{tool_name_attr}'")
675
+ break
676
+
677
+ # Strategy 2: Match normalized name with toolkit prefix
678
+ expected_full_name = create_full_tool_name(normalized_tool_name, toolkit_name)
679
+ if tool_name_attr == expected_full_name:
680
+ target_tool = tool
681
+ logger.info(f"Found tool using toolkit prefix mapping: '{tool_name_attr}' for normalized name '{normalized_tool_name}'")
682
+ break
683
+
684
+ # Strategy 3: Match base names (extract from both sides)
685
+ base_tool_name = extract_base_tool_name(tool_name_attr)
686
+ if base_tool_name == normalized_tool_name:
687
+ target_tool = tool
688
+ logger.info(f"Found tool using base name mapping: '{tool_name_attr}' -> '{base_tool_name}' matches '{normalized_tool_name}'")
689
+ break
690
+
691
+ # Strategy 4: Match provided name with base tool name (reverse lookup)
692
+ if tool_name_attr == normalized_tool_name:
693
+ target_tool = tool
694
+ logger.info(f"Found tool using direct name match: '{tool_name_attr}' matches normalized '{normalized_tool_name}'")
695
+ break
646
696
 
647
697
  if target_tool is None:
648
698
  available_tools = []
699
+ base_available_tools = []
700
+ full_available_tools = []
701
+
649
702
  for tool in tools:
703
+ tool_name_attr = None
650
704
  if hasattr(tool, 'name'):
651
- available_tools.append(tool.name)
705
+ tool_name_attr = tool.name
652
706
  elif hasattr(tool, 'func') and hasattr(tool.func, '__name__'):
653
- available_tools.append(tool.func.__name__)
707
+ tool_name_attr = tool.func.__name__
708
+
709
+ if tool_name_attr:
710
+ available_tools.append(tool_name_attr)
711
+
712
+ # Extract base name for user-friendly error
713
+ base_name = extract_base_tool_name(tool_name_attr)
714
+ if base_name not in base_available_tools:
715
+ base_available_tools.append(base_name)
716
+
717
+ # Track full names separately
718
+ if '___' in tool_name_attr:
719
+ full_available_tools.append(tool_name_attr)
720
+
721
+ # Create comprehensive error message
722
+ error_msg = f"Tool '{tool_name}' not found in toolkit '{toolkit_config.get('toolkit_name')}'."
723
+
724
+ if base_available_tools and full_available_tools:
725
+ error_msg += f" Available tools: {base_available_tools} (base names) or {full_available_tools} (full names)"
726
+ elif base_available_tools:
727
+ error_msg += f" Available tools: {base_available_tools}"
728
+ elif available_tools:
729
+ error_msg += f" Available tools: {available_tools}"
730
+ else:
731
+ error_msg += " No tools found in the toolkit."
732
+
733
+ # Add helpful hint about naming conventions
734
+ if '___' in tool_name:
735
+ error_msg += f" Note: You provided a full name '{tool_name}'. Try using just the base name '{extract_base_tool_name(tool_name)}'."
736
+ elif full_available_tools:
737
+ possible_full_name = create_full_tool_name(tool_name, toolkit_name)
738
+ error_msg += f" Note: You provided a base name '{tool_name}'. The full name might be '{possible_full_name}'."
654
739
 
655
740
  return {
656
741
  "success": False,
657
- "error": f"Tool '{tool_name}' not found. Available tools: {available_tools}",
742
+ "error": error_msg,
658
743
  "tool_name": tool_name,
659
744
  "toolkit_config": toolkit_config,
660
745
  "llm_model": llm_model,
@@ -664,7 +749,22 @@ class AlitaClient:
664
749
 
665
750
  # Execute the tool with callback support
666
751
  try:
667
- logger.info(f"Executing tool '{tool_name}' with parameters: {tool_params}")
752
+ # Log which tool was found and how
753
+ actual_tool_name = getattr(target_tool, 'name', None) or getattr(target_tool.func, '__name__', 'unknown')
754
+
755
+ # Determine which matching strategy was used
756
+ if actual_tool_name == tool_name:
757
+ logger.info(f"Found tool '{tool_name}' using exact match")
758
+ elif actual_tool_name == create_full_tool_name(normalized_tool_name, toolkit_name):
759
+ logger.info(f"Found tool '{tool_name}' using toolkit prefix mapping ('{actual_tool_name}' for normalized '{normalized_tool_name}')")
760
+ elif extract_base_tool_name(actual_tool_name) == normalized_tool_name:
761
+ logger.info(f"Found tool '{tool_name}' using base name mapping ('{actual_tool_name}' -> '{extract_base_tool_name(actual_tool_name)}')")
762
+ elif actual_tool_name == normalized_tool_name:
763
+ logger.info(f"Found tool '{tool_name}' using direct normalized name match ('{actual_tool_name}')")
764
+ else:
765
+ logger.info(f"Found tool '{tool_name}' using fallback matching ('{actual_tool_name}')")
766
+
767
+ logger.info(f"Executing tool '{tool_name}' (internal name: '{actual_tool_name}') with parameters: {tool_params}")
668
768
 
669
769
  # Start timing the tool execution
670
770
  start_time = time.time()
@@ -9,7 +9,7 @@ from langchain_core.callbacks import dispatch_custom_event
9
9
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, BaseMessage
10
10
  from langchain_core.runnables import Runnable
11
11
  from langchain_core.runnables import RunnableConfig
12
- from langchain_core.tools import BaseTool
12
+ from langchain_core.tools import BaseTool, ToolException
13
13
  from langgraph.channels.ephemeral_value import EphemeralValue
14
14
  from langgraph.graph import StateGraph
15
15
  from langgraph.graph.graph import END, START
@@ -506,10 +506,10 @@ def create_graph(
506
506
  for toolkit, selected_tools in connected_tools.items():
507
507
  for tool in selected_tools:
508
508
  tool_names.append(f"{toolkit}___{tool}")
509
+ elif isinstance(connected_tools, list):
510
+ # for cases when tools are provided as a list of names with already bound toolkit_name
511
+ tool_names = connected_tools
509
512
 
510
- # Filter tools if specific tool names are provided
511
- available_tools = []
512
-
513
513
  if tool_names:
514
514
  # Filter tools by name
515
515
  tool_dict = {tool.name: tool for tool in tools if isinstance(tool, BaseTool)}
@@ -580,7 +580,10 @@ def create_graph(
580
580
  default_output=node['condition'].get('default_output', 'END')))
581
581
 
582
582
  # set default value for state variable at START
583
- entry_point = clean_string(schema['entry_point'])
583
+ try:
584
+ entry_point = clean_string(schema['entry_point'])
585
+ except KeyError:
586
+ raise ToolException("Entry point is not defined in the schema. Please define 'entry_point' in the schema.")
584
587
  for key, value in state.items():
585
588
  if 'type' in value and 'value' in value:
586
589
  # set default value for state variable if it is defined in the schema
@@ -199,13 +199,12 @@ class VectorStoreWrapper(BaseToolApiWrapper):
199
199
  data = store.get(include=['documents', 'metadatas'])
200
200
  # re-structure data to be more usable
201
201
  for doc_str, meta, db_id in zip(data['documents'], data['metadatas'], data['ids']):
202
- doc = json.loads(doc_str)
203
202
  doc_id = str(meta['id'])
204
203
  dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
205
204
  parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
206
205
  result[doc_id] = {
207
206
  'metadata': meta,
208
- 'document': doc,
207
+ 'document': doc_str,
209
208
  'id': db_id,
210
209
  IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
211
210
  IndexerKeywords.PARENT.value: parent_id
@@ -1,7 +1,10 @@
1
1
  import json
2
2
  import logging
3
- from typing import Optional
3
+ from typing import Any, Dict, Generator, List, Optional
4
4
 
5
+ from langchain_core.documents import Document
6
+
7
+ from alita_sdk.tools.elitea_base import BaseIndexParams
5
8
  from azure.devops.connection import Connection
6
9
  from azure.devops.v7_0.test_plan.models import TestPlanCreateParams, TestSuiteCreateParams, \
7
10
  SuiteTestCaseCreateUpdateParameters
@@ -13,7 +16,11 @@ from pydantic.fields import FieldInfo as Field
13
16
  import xml.etree.ElementTree as ET
14
17
 
15
18
  from ..work_item import AzureDevOpsApiWrapper
16
- from ...elitea_base import BaseToolApiWrapper
19
+ from ...elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
20
+ try:
21
+ from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
22
+ except ImportError:
23
+ from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
17
24
 
18
25
  logger = logging.getLogger(__name__)
19
26
 
@@ -96,7 +103,6 @@ TestCaseAddModel = create_model(
96
103
  suite_id=(int, Field(description="ID of the test suite to which test cases are to be added"))
97
104
  )
98
105
 
99
-
100
106
  test_steps_description = """Json or XML array string with test steps.
101
107
  Json example: [{"stepNumber": 1, "action": "Some action", "expectedResult": "Some expectation"},...]
102
108
  XML example:
@@ -158,7 +164,19 @@ TestCasesGetModel = create_model(
158
164
  suite_id=(int, Field(description="ID of the test suite for which test cases are requested"))
159
165
  )
160
166
 
161
- class TestPlanApiWrapper(BaseToolApiWrapper):
167
+ # Schema for indexing ADO Wiki pages into vector store
168
+ indexData = create_model(
169
+ "indexData",
170
+ __base__=BaseIndexParams,
171
+ plan_id=(int, Field(description="ID of the test plan for which test cases are requested")),
172
+ suite_ids=(list[int], Field(description="List of test suite IDs for which test cases are requested (can be empty)")),
173
+ progress_step=(Optional[int], Field(default=None, ge=0, le=100,
174
+ description="Optional step size for progress reporting during indexing")),
175
+ clean_index=(Optional[bool], Field(default=False,
176
+ description="Optional flag to enforce clean existing index before indexing new data")),
177
+ )
178
+
179
+ class TestPlanApiWrapper(BaseVectorStoreToolApiWrapper):
162
180
  __test__ = False
163
181
  organization_url: str
164
182
  project: str
@@ -166,6 +184,13 @@ class TestPlanApiWrapper(BaseToolApiWrapper):
166
184
  limit: Optional[int] = 5
167
185
  _client: Optional[TestPlanClient] = PrivateAttr()
168
186
 
187
+ llm: Any = None
188
+ connection_string: Optional[SecretStr] = None
189
+ collection_name: Optional[str] = None
190
+ embedding_model: Optional[str] = "HuggingFaceEmbeddings"
191
+ embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
192
+ vectorstore_type: Optional[str] = "PGVector"
193
+
169
194
  class Config:
170
195
  arbitrary_types_allowed = True
171
196
 
@@ -250,8 +275,10 @@ class TestPlanApiWrapper(BaseToolApiWrapper):
250
275
  try:
251
276
  if isinstance(suite_test_case_create_update_parameters, str):
252
277
  suite_test_case_create_update_parameters = json.loads(suite_test_case_create_update_parameters)
253
- suite_test_case_create_update_params_obj = [SuiteTestCaseCreateUpdateParameters(**param) for param in suite_test_case_create_update_parameters]
254
- test_cases = self._client.add_test_cases_to_suite(suite_test_case_create_update_params_obj, self.project, plan_id, suite_id)
278
+ suite_test_case_create_update_params_obj = [SuiteTestCaseCreateUpdateParameters(**param) for param in
279
+ suite_test_case_create_update_parameters]
280
+ test_cases = self._client.add_test_cases_to_suite(suite_test_case_create_update_params_obj, self.project,
281
+ plan_id, suite_id)
255
282
  return [test_case.as_dict() for test_case in test_cases]
256
283
  except Exception as e:
257
284
  logger.error(f"Error adding test case: {e}")
@@ -268,10 +295,11 @@ class TestPlanApiWrapper(BaseToolApiWrapper):
268
295
  test_steps=test_case['test_steps'],
269
296
  test_steps_format=test_case['test_steps_format']) for test_case in test_cases]
270
297
 
271
-
272
- def create_test_case(self, plan_id: int, suite_id: int, title: str, description: str, test_steps: str, test_steps_format: str = 'json'):
298
+ def create_test_case(self, plan_id: int, suite_id: int, title: str, description: str, test_steps: str,
299
+ test_steps_format: str = 'json'):
273
300
  """Creates a new test case in specified suite in Azure DevOps."""
274
- work_item_wrapper = AzureDevOpsApiWrapper(organization_url=self.organization_url, token=self.token.get_secret_value(), project=self.project)
301
+ work_item_wrapper = AzureDevOpsApiWrapper(organization_url=self.organization_url,
302
+ token=self.token.get_secret_value(), project=self.project)
275
303
  if test_steps_format == 'json':
276
304
  steps_xml = self.get_test_steps_xml(json.loads(test_steps))
277
305
  elif test_steps_format == 'xml':
@@ -279,8 +307,9 @@ class TestPlanApiWrapper(BaseToolApiWrapper):
279
307
  else:
280
308
  return ToolException("Unknown test steps format: " + test_steps_format)
281
309
  work_item_json = self.build_ado_test_case(title, description, steps_xml)
282
- created_work_item_id = work_item_wrapper.create_work_item(work_item_json=json.dumps(work_item_json), wi_type="Test Case")['id']
283
- return self.add_test_case([{"work_item":{"id":created_work_item_id}}], plan_id, suite_id)
310
+ created_work_item_id = \
311
+ work_item_wrapper.create_work_item(work_item_json=json.dumps(work_item_json), wi_type="Test Case")['id']
312
+ return self.add_test_case([{"work_item": {"id": created_work_item_id}}], plan_id, suite_id)
284
313
 
285
314
  def build_ado_test_case(self, title, description, steps_xml):
286
315
  """
@@ -355,6 +384,42 @@ class TestPlanApiWrapper(BaseToolApiWrapper):
355
384
  logger.error(f"Error getting test cases: {e}")
356
385
  return ToolException(f"Error getting test cases: {e}")
357
386
 
387
+ def index_data(self,
388
+ plan_id: str,
389
+ suite_ids: list[str] = [],
390
+ collection_suffix: str = '',
391
+ progress_step: int = None,
392
+ clean_index: bool = False
393
+ ):
394
+ """Load ADO TestCases into the vector store."""
395
+ docs = self._base_loader(plan_id, suite_ids)
396
+ embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
397
+ vs = self._init_vector_store(collection_suffix, embeddings=embedding)
398
+ return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
399
+
400
+ def _base_loader(self, plan_id: str, suite_ids: Optional[list[str]] = []) -> Generator[Document, None, None]:
401
+ cases = []
402
+ for sid in suite_ids:
403
+ cases.extend(self.get_test_cases(plan_id, sid))
404
+ #
405
+ for case in cases:
406
+ field_dicts = case.get('work_item', {}).get('work_item_fields', [])
407
+ data = {k: v for d in field_dicts for k, v in d.items()}
408
+ yield Document(
409
+ page_content=data.get('Microsoft.VSTS.TCM.Steps', ''),
410
+ metadata={
411
+ 'id': case.get('work_item', {}).get('id', ''),
412
+ 'title': case.get('work_item', {}).get('name', ''),
413
+ 'plan_id': case.get('test_plan', {}).get('id', ''),
414
+ 'suite_id': case.get('test_suite', {}).get('id', ''),
415
+ 'description': data.get('System.Description', ''),
416
+ 'updated_on': data.get('System.Rev', ''),
417
+ })
418
+
419
+ def _process_document(self, document: Document) -> Generator[Document, None, None]:
420
+ yield document
421
+
422
+ @extend_with_vector_tools
358
423
  def get_available_tools(self):
359
424
  """Return a list of available tools."""
360
425
  return [
@@ -423,5 +488,11 @@ class TestPlanApiWrapper(BaseToolApiWrapper):
423
488
  "description": self.get_test_cases.__doc__,
424
489
  "args_schema": TestCasesGetModel,
425
490
  "ref": self.get_test_cases,
491
+ },
492
+ {
493
+ "name": "index_data",
494
+ "ref": self.index_data,
495
+ "description": self.index_data.__doc__,
496
+ "args_schema": indexData,
426
497
  }
427
498
  ]
@@ -1,6 +1,10 @@
1
1
  import logging
2
- from typing import Optional
2
+ from typing import Any, Dict, Generator, List, Optional
3
3
 
4
+ from alita_sdk.tools.elitea_base import BaseIndexParams
5
+ from langchain_core.documents import Document
6
+
7
+ from ...elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
4
8
  from azure.devops.connection import Connection
5
9
  from azure.devops.exceptions import AzureDevOpsServiceError
6
10
  from azure.devops.v7_0.core import CoreClient
@@ -12,6 +16,10 @@ from msrest.authentication import BasicAuthentication
12
16
  from pydantic import create_model, PrivateAttr, SecretStr
13
17
  from pydantic import model_validator
14
18
  from pydantic.fields import Field
19
+ try:
20
+ from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
21
+ except ImportError:
22
+ from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
15
23
 
16
24
  from ...elitea_base import BaseToolApiWrapper
17
25
 
@@ -52,14 +60,32 @@ RenamePageInput = create_model(
52
60
  version_type=(Optional[str], Field(description="Version type (branch, tag, or commit). Determines how Id is interpreted", default="branch"))
53
61
  )
54
62
 
63
+ # Schema for indexing ADO Wiki pages into vector store
64
+ indexData = create_model(
65
+ "indexData",
66
+ __base__=BaseIndexParams,
67
+ wiki_identifier=(str, Field(description="Wiki identifier to index, e.g., 'ABCProject.wiki'")),
68
+ progress_step=(Optional[int], Field(default=None, ge=0, le=100,
69
+ description="Optional step size for progress reporting during indexing")),
70
+ clean_index=(Optional[bool], Field(default=False,
71
+ description="Optional flag to enforce clean existing index before indexing new data")),
72
+ )
73
+
55
74
 
56
- class AzureDevOpsApiWrapper(BaseToolApiWrapper):
75
+ class AzureDevOpsApiWrapper(BaseVectorStoreToolApiWrapper):
57
76
  organization_url: str
58
77
  project: str
59
78
  token: SecretStr
60
79
  _client: Optional[WikiClient] = PrivateAttr() # Private attribute for the wiki client
61
80
  _core_client: Optional[CoreClient] = PrivateAttr() # Private attribute for the CoreClient client
62
81
 
82
+ llm: Any = None
83
+ connection_string: Optional[SecretStr] = None
84
+ collection_name: Optional[str] = None
85
+ embedding_model: Optional[str] = "HuggingFaceEmbeddings"
86
+ embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
87
+ vectorstore_type: Optional[str] = "PGVector"
88
+
63
89
  class Config:
64
90
  arbitrary_types_allowed = True # Allow arbitrary types (e.g., WorkItemTrackingClient)
65
91
 
@@ -216,6 +242,34 @@ class AzureDevOpsApiWrapper(BaseToolApiWrapper):
216
242
  logger.error(f"Unable to modify wiki page: {str(e)}")
217
243
  return ToolException(f"Unable to modify wiki page: {str(e)}")
218
244
 
245
+ def index_data(
246
+ self,
247
+ wiki_identifier: str,
248
+ collection_suffix: str = '',
249
+ progress_step: int = None,
250
+ clean_index: bool = False
251
+ ):
252
+ """Load ADO Wiki pages into the vector store."""
253
+ docs = self._base_loader(wiki_identifier)
254
+ embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
255
+ vs = self._init_vector_store(collection_suffix, embeddings=embedding)
256
+ return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
257
+
258
+ def _base_loader(self, wiki_identifier: str) -> Generator[Document, None, None]:
259
+ pages = self._client.get_pages_batch(pages_batch_request={}, project=self.project, wiki_identifier=wiki_identifier)
260
+ #
261
+ for page in pages:
262
+ content = self._client.get_page_by_id(project=self.project, wiki_identifier=wiki_identifier, id=page.id, include_content=True).page.content
263
+ yield Document(page_content=content, metadata={
264
+ 'id': page.id,
265
+ 'path': page.path,
266
+ 'updated_on': ''
267
+ })
268
+
269
+ def _process_document(self, document: Document) -> Generator[Document, None, None]:
270
+ yield document
271
+
272
+ @extend_with_vector_tools
219
273
  def get_available_tools(self):
220
274
  """Return a list of available tools."""
221
275
  return [
@@ -260,5 +314,11 @@ class AzureDevOpsApiWrapper(BaseToolApiWrapper):
260
314
  "description": self.rename_wiki_page.__doc__,
261
315
  "args_schema": RenamePageInput,
262
316
  "ref": self.rename_wiki_page,
317
+ },
318
+ {
319
+ "name": "index_data",
320
+ "ref": self.index_data,
321
+ "description": self.index_data.__doc__,
322
+ "args_schema": indexData,
263
323
  }
264
324
  ]
@@ -1,12 +1,13 @@
1
1
  from typing import Generator
2
2
  from langchain.schema import Document
3
+ from langchain_core.documents import Document
3
4
  from langchain_text_splitters import MarkdownHeaderTextSplitter
4
5
  from langchain.text_splitter import TokenTextSplitter
5
6
  from ..utils import tiktoken_length
6
7
  from copy import deepcopy as copy
7
8
 
8
9
 
9
- def markdown_chunker(file_content_generator: Generator[Document, None, None], config: dict, *args, **kwargs) -> Generator[str, None, None]:
10
+ def markdown_chunker(file_content_generator: Generator[Document, None, None], config: dict, *args, **kwargs) -> Generator[Document, None, None]:
10
11
  strip_header = config.get("strip_header", False)
11
12
  return_each_line = config.get("return_each_line", False)
12
13
  headers_to_split_on = config.get("headers_to_split_on", [])
@@ -1,7 +1,8 @@
1
1
  import json
2
2
  import logging
3
- from typing import Optional, List, Dict, Any
3
+ from typing import Optional, List, Dict, Any, Generator
4
4
 
5
+ from ..chunkers import markdown_chunker
5
6
  from ..utils.content_parser import parse_file_content
6
7
  from langchain_core.tools import ToolException
7
8
  from office365.runtime.auth.client_credential import ClientCredential
@@ -184,11 +185,19 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
184
185
  vs = self._init_vector_store(collection_suffix, embeddings=embedding)
185
186
  return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
186
187
 
187
- def _process_document(self, document: Document) -> Document:
188
- page_content = self.read_file(document.metadata['Path'], is_capture_image=True)
188
+ def _process_document(self, document: Document) -> Generator[Document, None, None]:
189
+ config = {
190
+ "max_tokens": self.llm.model_config.get('max_tokens', 512),
191
+ "token_overlap": self.llm.model_config.get('token_overlap',
192
+ int(self.llm.model_config.get('max_tokens', 512) * 0.05))
193
+ }
194
+ chunks = markdown_chunker(file_content_generator=self._generate_file_content(document), config=config)
195
+ yield from chunks
189
196
 
197
+ def _generate_file_content(self, document: Document) -> Generator[Document, None, None]:
198
+ page_content = self.read_file(document.metadata['Path'], is_capture_image=True)
190
199
  document.page_content = json.dumps(str(page_content))
191
- return document
200
+ yield document
192
201
 
193
202
  def get_available_tools(self):
194
203
  return [
@@ -11,6 +11,7 @@ from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
11
11
  from langchain_core.documents import Document
12
12
 
13
13
  from ...runtime.utils.utils import IndexerKeywords
14
+ from ..utils.content_parser import parse_file_content
14
15
 
15
16
  try:
16
17
  from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
@@ -658,8 +659,12 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
658
659
  page_content = "This filetype is not supported."
659
660
  if attachment['filetype'] == 'txt' :
660
661
  page_content = self._client.get(endpoint=f"get_attachment/{attachment['id']}")
661
- # TODO: add support for other file types
662
- # use utility to handle different types (tools/utils)
662
+ else:
663
+ try:
664
+ attachment_path = self._client.attachments.get_attachment(attachment_id=attachment['id'], path=f"./{attachment['filename']}")
665
+ page_content = parse_file_content(file_name=attachment['filename'], file_content=attachment_path.read_bytes(), llm=self.llm, is_capture_image=True)
666
+ except Exception as e:
667
+ logger.error(f"Unable to parse page's content with type: {attachment['filetype']}: {e}")
663
668
  return page_content
664
669
 
665
670
  def _to_markup(self, data: List[Dict], output_format: str) -> str:
@@ -11,9 +11,12 @@ import pymupdf
11
11
  from langchain_core.tools import ToolException
12
12
  from transformers import BlipProcessor, BlipForConditionalGeneration
13
13
  from langchain_core.messages import HumanMessage
14
+ from logging import getLogger
14
15
 
15
16
  from ...runtime.langchain.tools.utils import bytes_to_base64
16
17
 
18
+ logger = getLogger(__name__)
19
+
17
20
  image_processing_prompt='''
18
21
  You are an AI model designed for analyzing images. Your task is to accurately describe the content of the given image. Depending on the type of image, follow these specific instructions:
19
22
 
@@ -56,7 +59,33 @@ Be as precise and thorough as possible in your responses. If something is unclea
56
59
 
57
60
  IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg']
58
61
 
59
- def parse_file_content(file_name, file_content, is_capture_image: bool = False, page_number: int = None, sheet_name: str = None, llm=None):
62
+
63
+ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
64
+ sheet_name: str = None, llm=None, file_path: str = None):
65
+ """Parse the content of a file based on its type and return the parsed content.
66
+
67
+ Args:
68
+ file_name (str): The name of the file to parse.
69
+ file_content (bytes): The content of the file as bytes.
70
+ is_capture_image (bool): Whether to capture images from the file.
71
+ page_number (int, optional): The specific page number to parse for PDF or PPTX files.
72
+ sheet_name (str, optional): The specific sheet name to parse for Excel files.
73
+ llm: The language model to use for image processing.
74
+ file_path (str, optional): The path to the file if it needs to be read from disk.
75
+ Returns:
76
+ str: The parsed content of the file.
77
+ Raises:
78
+ ToolException: If the file type is not supported or if there is an error reading the file.
79
+ """
80
+
81
+ if (file_path and (file_name or file_content)) or (not file_path and (not file_name or file_content is None)):
82
+ raise ToolException("Either (file_name and file_content) or file_path must be provided, but not both.")
83
+
84
+ if file_path:
85
+ file_content = file_to_bytes(file_path)
86
+ if file_content is None:
87
+ return ToolException(f"File not found or could not be read: {file_path}")
88
+ file_name = file_path.split('/')[-1] # Extract file name from path
60
89
  if file_name.endswith('.txt'):
61
90
  return parse_txt(file_content)
62
91
  elif file_name.endswith('.docx'):
@@ -176,4 +205,25 @@ def __perform_llm_prediction_for_image(llm, image: bytes, image_format='png', pr
176
205
  },
177
206
  ])
178
207
  ])
179
- return f"\n[Image description: {result.content}]\n"
208
+ return f"\n[Image description: {result.content}]\n"
209
+
210
+ def file_to_bytes(filepath):
211
+ """
212
+ Reads a file and returns its content as a bytes object.
213
+
214
+ Args:
215
+ filepath (str): The path to the file.
216
+
217
+ Returns:
218
+ bytes: The content of the file as a bytes object.
219
+ """
220
+ try:
221
+ with open(filepath, "rb") as f:
222
+ file_content_bytes = f.read()
223
+ return file_content_bytes
224
+ except FileNotFoundError:
225
+ logger.error(f"File not found: {filepath}")
226
+ return None
227
+ except Exception as e:
228
+ logger.error(f"Error reading file {filepath}: {e}")
229
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.207
3
+ Version: 0.3.209
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ alita_sdk/community/utils.py,sha256=lvuCJaNqVPHOORJV6kIPcXJcdprVW_TJvERtYAEgpjM,
4
4
  alita_sdk/runtime/__init__.py,sha256=4W0UF-nl3QF2bvET5lnah4o24CoTwSoKXhuN0YnwvEE,828
5
5
  alita_sdk/runtime/clients/__init__.py,sha256=BdehU5GBztN1Qi1Wul0cqlU46FxUfMnI6Vq2Zd_oq1M,296
6
6
  alita_sdk/runtime/clients/artifact.py,sha256=4N2t5x3GibyXLq3Fvrv2o_VA7Z000yNfc-UN4eGsHZg,2679
7
- alita_sdk/runtime/clients/client.py,sha256=R2ISXLCi7ODQaw6juPlknCtWkcSeZw-lmq3VOc6V-yM,35783
7
+ alita_sdk/runtime/clients/client.py,sha256=xTmrgKWVMfLPZG_BKkMheTMlJUK-a7URCIk6YiFyxVY,41868
8
8
  alita_sdk/runtime/clients/datasource.py,sha256=HAZovoQN9jBg0_-lIlGBQzb4FJdczPhkHehAiVG3Wx0,1020
9
9
  alita_sdk/runtime/clients/prompt.py,sha256=li1RG9eBwgNK_Qf0qUaZ8QNTmsncFrAL2pv3kbxZRZg,1447
10
10
  alita_sdk/runtime/langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,7 +12,7 @@ alita_sdk/runtime/langchain/assistant.py,sha256=Bn9vUyZlFAP-D9Bh3zc2G1ZQkh5rr2c2
12
12
  alita_sdk/runtime/langchain/chat_message_template.py,sha256=kPz8W2BG6IMyITFDA5oeb5BxVRkHEVZhuiGl4MBZKdc,2176
13
13
  alita_sdk/runtime/langchain/constants.py,sha256=eHVJ_beJNTf1WJo4yq7KMK64fxsRvs3lKc34QCXSbpk,3319
14
14
  alita_sdk/runtime/langchain/indexer.py,sha256=0ENHy5EOhThnAiYFc7QAsaTNp9rr8hDV_hTK8ahbatk,37592
15
- alita_sdk/runtime/langchain/langraph_agent.py,sha256=QwD9NZ74Hp4rZvP7nirzOmjFJhfJ7eiuAvsq7aAW4Uw,43563
15
+ alita_sdk/runtime/langchain/langraph_agent.py,sha256=csK5vNXULMImBsfNzb6B2HgXuCWgCNYf3DIVvnmk5uQ,43835
16
16
  alita_sdk/runtime/langchain/mixedAgentParser.py,sha256=M256lvtsL3YtYflBCEp-rWKrKtcY1dJIyRGVv7KW9ME,2611
17
17
  alita_sdk/runtime/langchain/mixedAgentRenderes.py,sha256=asBtKqm88QhZRILditjYICwFVKF5KfO38hu2O-WrSWE,5964
18
18
  alita_sdk/runtime/langchain/store_manager.py,sha256=i8Fl11IXJhrBXq1F1ukEVln57B1IBe-tqSUvfUmBV4A,2218
@@ -79,7 +79,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
79
79
  alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
80
80
  alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
81
81
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
82
- alita_sdk/runtime/tools/vectorstore.py,sha256=RhGg2gGY5PFfllouuwB5uLkM_lAlr_SqpsziLKgXq1U,30672
82
+ alita_sdk/runtime/tools/vectorstore.py,sha256=o818tabxkG-o1Opv7zQy9lIL4sW2lfEHNCJrNiU7uPU,30634
83
83
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
84
84
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -97,9 +97,9 @@ alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,
97
97
  alita_sdk/tools/ado/repos/__init__.py,sha256=guYY95Gtyb0S4Jj0V1qO0x2jlRoH0H1cKjHXNwmShow,6388
98
98
  alita_sdk/tools/ado/repos/repos_wrapper.py,sha256=spBq-0QdRRNctz_Qbl4IIDpnjitzQLhvJzRIW_6jKNA,48583
99
99
  alita_sdk/tools/ado/test_plan/__init__.py,sha256=bVywTYTvdm1rUeP2krVVMRN-xDCY--ze7NFdTxJP9ow,4708
100
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py,sha256=p1Mptd_1J6bmkyrvf2M-FB79s8THzEesBlfgaOnRXb8,18152
100
+ alita_sdk/tools/ado/test_plan/test_plan_wrapper.py,sha256=p1S5vQJSSfXXXZ3MH8qaqKZiH9MeT_pCHGU56Xn5UdI,21639
101
101
  alita_sdk/tools/ado/wiki/__init__.py,sha256=WCIKOisU2h3E4SNDvGfWCMZ3nRMxfH_ZhIffmSHH3XI,4576
102
- alita_sdk/tools/ado/wiki/ado_wrapper.py,sha256=l4bc2QoKSUXg9UqNcx0ylv7YL9JPPQd35Ti5MXyEgC4,12690
102
+ alita_sdk/tools/ado/wiki/ado_wrapper.py,sha256=2mIuvYbVnX1eYSBzr4PQTrOZa_s0btONCT-e8MWNUbU,15482
103
103
  alita_sdk/tools/ado/work_item/__init__.py,sha256=k6gZ6pEE7gvNWvCDoDV05jltzbqxC_NPm06CEr5Wwcs,4726
104
104
  alita_sdk/tools/ado/work_item/ado_wrapper.py,sha256=aLB-aSNQST0FCwP7I01OXanCpZHKVarZZB1u9j2H1LA,26253
105
105
  alita_sdk/tools/advanced_jira_mining/__init__.py,sha256=pUTzECqGvYaR5qWY3JPUhrImrZgc7pCXuqSe5eWIE80,4604
@@ -164,7 +164,7 @@ alita_sdk/tools/chunkers/code/treesitter/treesitter_rs.py,sha256=LgKyNffBy30gIr8
164
164
  alita_sdk/tools/chunkers/code/treesitter/treesitter_ts.py,sha256=Qs1a_BBN296iZc5hh8UNF9sc0G0-A_XZVhP3Na1ZNDg,387
165
165
  alita_sdk/tools/chunkers/sematic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
166
  alita_sdk/tools/chunkers/sematic/base.py,sha256=bRHpCFbOy-KPe4HBGpegrvIhvOsd7sDRfmb06T8tSuU,349
167
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py,sha256=clmhRgewfBD-Ho1v0bSWKkCg_Fc9RSNfiwky1QM7AdU,2523
167
+ alita_sdk/tools/chunkers/sematic/markdown_chunker.py,sha256=NZCZi0Xzi58Bm7-9LzwGoAhdNZhvUERb_sK1bNQpqCQ,2574
168
168
  alita_sdk/tools/chunkers/sematic/proposal_chunker.py,sha256=t8JjX9TH6yHXXaemiDK1E6000tlES2Kl8XfyezmlIoo,5116
169
169
  alita_sdk/tools/chunkers/sematic/statistical_chunker.py,sha256=VDQcMC-ky72GqdWJiHMmcRmfJTTU5XglBF1IWg2Qews,13403
170
170
  alita_sdk/tools/cloud/__init__.py,sha256=ekqANTJAyuURqpjNTn6MmSn2q6qEKwENxEXBUFGkkck,512
@@ -269,7 +269,7 @@ alita_sdk/tools/servicenow/__init__.py,sha256=VHH3qpUbEJ0tdtrIiWakohCmbifUOPgCVX
269
269
  alita_sdk/tools/servicenow/api_wrapper.py,sha256=WpH-bBLGFdhehs4g-K-WAkNuaD1CSrwsDpdgB3RG53s,6120
270
270
  alita_sdk/tools/servicenow/servicenow_client.py,sha256=Rdqfu-ll-qbnclMzChLZBsfXRDzgoX_FdeI2WLApWxc,3269
271
271
  alita_sdk/tools/sharepoint/__init__.py,sha256=CiaOmzPl-9WNWZU9AtP-Y-Mg_uBnxeKFTnUjJ5aQbmA,3036
272
- alita_sdk/tools/sharepoint/api_wrapper.py,sha256=YNtXmathHN46FCD4M9zDs2li0USdKW35lRXckYU8XdU,10011
272
+ alita_sdk/tools/sharepoint/api_wrapper.py,sha256=o9z8-_d3F4qfgvonJIed9obVPvTYC06zmkwk2XZ1UfI,10600
273
273
  alita_sdk/tools/sharepoint/authorization_helper.py,sha256=n-nL5dlBoLMK70nHu7P2RYCb8C6c9HMA_gEaw8LxuhE,2007
274
274
  alita_sdk/tools/sharepoint/utils.py,sha256=fZ1YzAu5CTjKSZeslowpOPH974902S8vCp1Wu7L44LM,446
275
275
  alita_sdk/tools/slack/__init__.py,sha256=mbP2JiHybGSAH0ay8pxvPCqeU2eb9CK_NaCKG1uhPE4,3894
@@ -280,9 +280,9 @@ alita_sdk/tools/sql/models.py,sha256=AKJgSl_kEEz4fZfw3kbvdGHXaRZ-yiaqfJOB6YOj3i0
280
280
  alita_sdk/tools/testio/__init__.py,sha256=qi12wyJXN02hrUXg08CbijcCL5pi30JMbJfiXjn1Zr0,2646
281
281
  alita_sdk/tools/testio/api_wrapper.py,sha256=BvmL5h634BzG6p7ajnQLmj-uoAw1gjWnd4FHHu1h--Q,21638
282
282
  alita_sdk/tools/testrail/__init__.py,sha256=YILz5ZjkHfBg1tQ-FKFBP_s0uo2WDY110Qgsg0kBntM,4157
283
- alita_sdk/tools/testrail/api_wrapper.py,sha256=Sfe_5sJk-cIAYRlpO7DcCN117UAPbBIzf_HI6fVrOQ8,31999
283
+ alita_sdk/tools/testrail/api_wrapper.py,sha256=5T5Mowo2xW-s2k4mB9IL7kSHok8I7C2UMcz1Uq25_NY,32419
284
284
  alita_sdk/tools/utils/__init__.py,sha256=155xepXPr4OEzs2Mz5YnjXcBpxSv1X2eznRUVoPtyK0,3268
285
- alita_sdk/tools/utils/content_parser.py,sha256=Ou967dO3JnnL9kAidzofwV6TVe2_ul86ZMjcBOK-VnA,7811
285
+ alita_sdk/tools/utils/content_parser.py,sha256=JiL3zXCadDR0DVt6Zgq03LxWXwxuYv8us84bYMeqYa4,9788
286
286
  alita_sdk/tools/xray/__init__.py,sha256=dn-Ine9mHF8c_yZ-pWkn-gvSvSmGwdrqxPJOz6Cmqc4,3297
287
287
  alita_sdk/tools/xray/api_wrapper.py,sha256=l7Cwvh_5bEaH0IM3yLo1PSClqV1E20wH_sEHaJntM3s,8517
288
288
  alita_sdk/tools/yagmail/__init__.py,sha256=c4Qn3em0tLxzRmFKpzbBgY9W2EnOoKf0azoDJHng5CY,2208
@@ -299,8 +299,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=VDsSFUTnBne1mFNssX2eLFxThXAhX
299
299
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
300
300
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
301
301
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
302
- alita_sdk-0.3.207.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
303
- alita_sdk-0.3.207.dist-info/METADATA,sha256=5lVazejNDrCxd8_C-cvo3UuvhtgU7mVpW6WZAUqR_6Q,18917
304
- alita_sdk-0.3.207.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
305
- alita_sdk-0.3.207.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
306
- alita_sdk-0.3.207.dist-info/RECORD,,
302
+ alita_sdk-0.3.209.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
303
+ alita_sdk-0.3.209.dist-info/METADATA,sha256=mnXZpgZ6MJkT3kLO1byDa9SR9lTbzhkEaTZb803EPoo,18917
304
+ alita_sdk-0.3.209.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
305
+ alita_sdk-0.3.209.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
306
+ alita_sdk-0.3.209.dist-info/RECORD,,