alita-sdk 0.3.211__py3-none-any.whl → 0.3.213__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. alita_sdk/runtime/clients/client.py +2 -2
  2. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +48 -24
  3. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +47 -1
  4. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +103 -49
  5. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +63 -0
  6. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +54 -0
  7. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +66 -0
  8. alita_sdk/runtime/langchain/document_loaders/constants.py +13 -19
  9. alita_sdk/runtime/langchain/document_loaders/utils.py +30 -1
  10. alita_sdk/runtime/toolkits/artifact.py +5 -0
  11. alita_sdk/runtime/tools/artifact.py +2 -4
  12. alita_sdk/runtime/tools/vectorstore.py +2 -1
  13. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +13 -37
  14. alita_sdk/tools/ado/wiki/ado_wrapper.py +10 -39
  15. alita_sdk/tools/confluence/api_wrapper.py +2 -0
  16. alita_sdk/tools/elitea_base.py +24 -3
  17. alita_sdk/tools/gitlab/__init__.py +3 -2
  18. alita_sdk/tools/gitlab/api_wrapper.py +45 -18
  19. alita_sdk/tools/gitlab_org/api_wrapper.py +44 -25
  20. alita_sdk/tools/sharepoint/api_wrapper.py +13 -13
  21. alita_sdk/tools/testrail/api_wrapper.py +20 -0
  22. alita_sdk/tools/utils/content_parser.py +37 -162
  23. {alita_sdk-0.3.211.dist-info → alita_sdk-0.3.213.dist-info}/METADATA +1 -1
  24. {alita_sdk-0.3.211.dist-info → alita_sdk-0.3.213.dist-info}/RECORD +27 -24
  25. {alita_sdk-0.3.211.dist-info → alita_sdk-0.3.213.dist-info}/WHEEL +0 -0
  26. {alita_sdk-0.3.211.dist-info → alita_sdk-0.3.213.dist-info}/licenses/LICENSE +0 -0
  27. {alita_sdk-0.3.211.dist-info → alita_sdk-0.3.213.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  from datetime import datetime
3
3
  from typing import Optional, Any, List, Dict
4
+ import fnmatch
4
5
 
5
6
  from gitlab import GitlabGetError
6
7
  from langchain_core.tools import ToolException
@@ -22,7 +23,9 @@ GitLabCreateBranch = create_model(
22
23
 
23
24
  GitLabListBranches = create_model(
24
25
  "GitLabListBranchesModel",
25
- repository=(Optional[str], Field(description="Name of the repository", default=None))
26
+ repository=(Optional[str], Field(description="Name of the repository", default=None)),
27
+ limit=(Optional[int], Field(description="Maximum number of branches to return. If not provided, all branches will be returned.", default=20)),
28
+ branch_wildcard=(Optional[str], Field(description="Wildcard pattern to filter branches by name. If not provided, all branches will be returned.", default=None))
26
29
  )
27
30
 
28
31
  GitlabSetActiveBranch = create_model(
@@ -209,16 +212,32 @@ class GitLabWorkspaceAPIWrapper(BaseToolApiWrapper):
209
212
  self._active_branch = branch
210
213
  return f"Active branch set to {branch}"
211
214
 
212
- def list_branches_in_repo(self, repository: Optional[str] = None) -> List[str]:
213
- """List all branches in the repository."""
215
+ def list_branches_in_repo(self, repository: Optional[str] = None, limit: Optional[int] = 20, branch_wildcard: Optional[str] = None) -> List[str]:
216
+ """
217
+ Lists branches in the repository with optional limit and wildcard filtering.
218
+
219
+ Parameters:
220
+ repository (Optional[str]): Name of the repository. If None, uses the active repository.
221
+ limit (Optional[int]): Maximum number of branches to return
222
+ branch_wildcard (Optional[str]): Wildcard pattern to filter branches (e.g., '*dev')
223
+
224
+ Returns:
225
+ List[str]: List containing names of branches
226
+ """
214
227
  try:
215
228
  repo_instance = self._get_repo(repository)
216
- branches = repo_instance.branches.list()
217
- return [branch.name for branch in branches]
229
+ branches = repo_instance.branches.list(get_all=True)
230
+
231
+ if branch_wildcard:
232
+ branches = [branch for branch in branches if fnmatch.fnmatch(branch.name, branch_wildcard)]
233
+
234
+ if limit:
235
+ branches = branches[:limit]
236
+
237
+ branch_names = [branch.name for branch in branches]
238
+ return branch_names
218
239
  except Exception as e:
219
- return ToolException(f"Unable to list branches due to error: {str(e)}")
220
-
221
-
240
+ return f"Failed to list branches: {str(e)}"
222
241
 
223
242
  def create_branch(self, branch_name: str, repository: Optional[str] = None) -> str:
224
243
  """Create a new branch in the repository."""
@@ -568,104 +587,104 @@ class GitLabWorkspaceAPIWrapper(BaseToolApiWrapper):
568
587
  return [
569
588
  {
570
589
  "name": "create_branch",
571
- "description": self.create_branch.__doc__,
590
+ "description": self.create_branch.__doc__ or "Create a new branch in the repository.",
572
591
  "args_schema": GitLabCreateBranch,
573
592
  "ref": self.create_branch,
574
593
  },
575
594
  {
576
595
  "name": "set_active_branch",
577
- "description": self.set_active_branch.__doc__,
596
+ "description": self.set_active_branch.__doc__ or "Set the active branch for the bot.",
578
597
  "args_schema": GitlabSetActiveBranch,
579
598
  "ref": self.set_active_branch,
580
599
  },
581
600
  {
582
601
  "name": "list_branches_in_repo",
583
- "description": self.list_branches_in_repo.__doc__,
602
+ "description": self.list_branches_in_repo.__doc__ or "List branches in the repository with optional limit and wildcard filtering.",
584
603
  "args_schema": GitLabListBranches,
585
604
  "ref": self.list_branches_in_repo,
586
605
  },
587
606
  {
588
607
  "name": "get_issues",
589
- "description": self.get_issues.__doc__,
608
+ "description": self.get_issues.__doc__ or "Fetches all open issues from the repository.",
590
609
  "args_schema": GitLabGetIssues,
591
610
  "ref": self.get_issues,
592
611
  },
593
612
  {
594
613
  "name": "get_issue",
595
- "description": self.get_issue.__doc__,
614
+ "description": self.get_issue.__doc__ or "Fetches a specific issue and its first 10 comments.",
596
615
  "args_schema": GitLabGetIssue,
597
616
  "ref": self.get_issue,
598
617
  },
599
618
  {
600
619
  "name": "create_pull_request",
601
- "description": self.create_pull_request.__doc__,
620
+ "description": self.create_pull_request.__doc__ or "Creates a pull request in the repository.",
602
621
  "args_schema": GitLabCreatePullRequest,
603
622
  "ref": self.create_pull_request,
604
623
  },
605
624
  {
606
625
  "name": "comment_on_issue",
607
- "description": self.comment_on_issue.__doc__,
626
+ "description": self.comment_on_issue.__doc__ or "Adds a comment to a GitLab issue.",
608
627
  "args_schema": GitLabCommentOnIssue,
609
628
  "ref": self.comment_on_issue,
610
629
  },
611
630
  {
612
631
  "name": "create_file",
613
- "description": self.create_file.__doc__,
632
+ "description": self.create_file.__doc__ or "Creates a new file in the GitLab repository.",
614
633
  "args_schema": GitLabCreateFile,
615
634
  "ref": self.create_file,
616
635
  },
617
636
  {
618
637
  "name": "read_file",
619
- "description": self.read_file.__doc__,
638
+ "description": self.read_file.__doc__ or "Reads a file from the GitLab repository.",
620
639
  "args_schema": GitLabReadFile,
621
640
  "ref": self.read_file,
622
641
  },
623
642
  {
624
643
  "name": "update_file",
625
- "description": self.update_file.__doc__,
644
+ "description": self.update_file.__doc__ or "Updates a file in the GitLab repository.",
626
645
  "args_schema": GitLabUpdateFile,
627
646
  "ref": self.update_file,
628
647
  },
629
648
  {
630
649
  "name": "delete_file",
631
- "description": self.delete_file.__doc__,
650
+ "description": self.delete_file.__doc__ or "Deletes a file from the GitLab repository.",
632
651
  "args_schema": GitLabDeleteFile,
633
652
  "ref": self.delete_file,
634
653
  },
635
654
  {
636
655
  "name": "get_pr_changes",
637
- "description": self.get_pr_changes.__doc__,
656
+ "description": self.get_pr_changes.__doc__ or "Get pull request changes from the specified PR number and repository.",
638
657
  "args_schema": GitLabGetPRChanges,
639
658
  "ref": self.get_pr_changes,
640
659
  },
641
660
  {
642
661
  "name": "create_pr_change_comment",
643
- "description": self.create_pr_change_comment.__doc__,
662
+ "description": self.create_pr_change_comment.__doc__ or "Create a comment on a pull request change in GitLab.",
644
663
  "args_schema": GitLabCreatePullRequestChangeCommentInput,
645
664
  "ref": self.create_pr_change_comment,
646
665
  },
647
666
  {
648
667
  "name": "list_files",
649
- "description": self.list_files.__doc__,
668
+ "description": self.list_files.__doc__ or "List files by defined path.",
650
669
  "args_schema": ListFilesModel,
651
670
  "ref": self.list_files,
652
671
  },
653
672
  {
654
673
  "name": "list_folders",
655
- "description": self.list_folders.__doc__,
674
+ "description": self.list_folders.__doc__ or "List folders by defined path.",
656
675
  "args_schema": ListFilesModel,
657
676
  "ref": self.list_folders,
658
677
  },
659
678
  {
660
679
  "name": "append_file",
661
- "description": self.append_file.__doc__,
680
+ "description": self.append_file.__doc__ or "Appends new content to the end of a file.",
662
681
  "args_schema": AppendFileInput,
663
682
  "ref": self.append_file,
664
683
  },
665
684
  {
666
685
  "ref": self.get_commits,
667
686
  "name": "get_commits",
668
- "description": self.get_commits.__doc__,
687
+ "description": self.get_commits.__doc__ or "Retrieves a list of commits from the repository.",
669
688
  "args_schema": GetCommits,
670
689
  }
671
690
  ]
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import logging
3
- from typing import Optional, List, Generator
3
+ from typing import Optional, List, Generator, Any
4
4
 
5
5
  from langchain_core.documents import Document
6
6
  from langchain_core.tools import ToolException
@@ -129,7 +129,7 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
129
129
  is_capture_image: bool = False,
130
130
  page_number: int = None,
131
131
  sheet_name: str = None,
132
- excel_by_sheets: bool = False):
132
+ excel_by_sheets: bool = False) -> str | dict | ToolException:
133
133
  """ Reads file located at the specified server-relative path. """
134
134
  try:
135
135
  file = self._client.web.get_file_by_server_relative_path(path)
@@ -148,30 +148,30 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
148
148
  excel_by_sheets=excel_by_sheets,
149
149
  llm=self.llm)
150
150
 
151
- def _base_loader(self, **kwargs) -> List[Document]:
151
+ def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
152
152
  try:
153
153
  all_files = self.get_files_list()
154
154
  except Exception as e:
155
155
  raise ToolException(f"Unable to extract files: {e}")
156
156
 
157
- docs: List[Document] = []
158
157
  for file in all_files:
159
158
  metadata = {
160
159
  ("updated_on" if k == "Modified" else k): str(v)
161
160
  for k, v in file.items()
162
161
  }
163
- docs.append(Document(page_content="", metadata=metadata))
164
- return docs
162
+ yield Document(page_content="", metadata=metadata)
165
163
 
166
164
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
167
- page_content = self.read_file(document.metadata['Path'], is_capture_image=True, excel_by_sheets=True)
168
- if isinstance(page_content, dict):
169
- for key, value in page_content.items():
170
- metadata = document.metadata
171
- metadata['page'] = key
172
- yield Document(page_content=str(value), metadata=metadata)
165
+ doc_content = self.read_file(document.metadata['Path'],
166
+ is_capture_image=True,
167
+ excel_by_sheets=True)
168
+ if isinstance(doc_content, dict):
169
+ for page, content in doc_content:
170
+ new_metadata = document.metadata
171
+ new_metadata['page'] = page
172
+ yield Document(page_content=str(content), metadata=new_metadata)
173
173
  else:
174
- document.page_content = json.dumps(str(page_content))
174
+ document.page_content = str(doc_content)
175
175
  yield document
176
176
 
177
177
  @extend_with_vector_tools
@@ -8,6 +8,8 @@ from openai import BadRequestError
8
8
  from pydantic import SecretStr, create_model, model_validator
9
9
  from pydantic.fields import Field, PrivateAttr
10
10
  from testrail_api import StatusCodeError, TestRailAPI
11
+
12
+ from ..chunkers.code.constants import get_file_extension
11
13
  from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
12
14
  from langchain_core.documents import Document
13
15
 
@@ -537,6 +539,9 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
537
539
  title_keyword: Optional[str] = None,
538
540
  **kwargs: Any
539
541
  ) -> Generator[Document, None, None]:
542
+ self._include_attachments = kwargs.get('include_attachments', False)
543
+ self._skip_attachment_extensions = kwargs.get('skip_attachment_extensions', [])
544
+
540
545
  try:
541
546
  if suite_id:
542
547
  resp = self._client.cases.get_cases(project_id=project_id, suite_id=int(suite_id))
@@ -582,6 +587,11 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
582
587
  Generator[Document, None, None]: A generator yielding processed Document objects with metadata.
583
588
  """
584
589
  try:
590
+ if not self._include_attachments:
591
+ # If attachments are not included, return the document as is
592
+ yield document
593
+ return
594
+
585
595
  # get base data from the document required to extract attachments and other metadata
586
596
  base_data = json.loads(document.page_content)
587
597
  case_id = base_data.get("id")
@@ -591,6 +601,10 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
591
601
 
592
602
  # process each attachment to extract its content
593
603
  for attachment in attachments:
604
+ if get_file_extension(attachment['filename']) in self._skip_attachment_extensions:
605
+ logger.info(f"Skipping attachment {attachment['filename']} with unsupported extension.")
606
+ continue
607
+
594
608
  attachment_id = f"attach_{attachment['id']}"
595
609
  # add attachment id to metadata of parent
596
610
  document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
@@ -639,6 +653,12 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
639
653
  'suite_id': (Optional[str],
640
654
  Field(default=None, description="Optional TestRail suite ID to filter test cases")),
641
655
  'section_id': (Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
656
+ 'include_attachments': (Optional[bool],
657
+ Field(description="Whether to include attachment content in indexing",
658
+ default=False)),
659
+ 'skip_attachment_extensions': (Optional[List[str]], Field(
660
+ description="List of file extensions to skip when processing attachments: i.e. ['.png', '.jpg']",
661
+ default=[])),
642
662
  }
643
663
 
644
664
  def _to_markup(self, data: List[Dict], output_format: str) -> str:
@@ -1,19 +1,11 @@
1
- import re
2
-
3
- from docx import Document
4
- from io import BytesIO
5
- import pandas as pd
6
- from PIL import Image
7
- from pptx import Presentation
8
- from pptx.enum.shapes import MSO_SHAPE_TYPE
9
- import io
10
- import pymupdf
1
+ from pathlib import Path
2
+
11
3
  from langchain_core.tools import ToolException
12
- from transformers import BlipProcessor, BlipForConditionalGeneration
13
- from langchain_core.messages import HumanMessage
14
4
  from logging import getLogger
5
+ from alita_sdk.runtime.langchain.document_loaders.constants import loaders_map
6
+ from langchain_core.documents import Document
15
7
 
16
- from ...runtime.langchain.tools.utils import bytes_to_base64
8
+ from ...runtime.langchain.document_loaders.utils import create_temp_file
17
9
 
18
10
  logger = getLogger(__name__)
19
11
 
@@ -61,7 +53,7 @@ IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg']
61
53
 
62
54
 
63
55
  def parse_file_content(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
64
- sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False):
56
+ sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False) -> str | ToolException:
65
57
  """Parse the content of a file based on its type and return the parsed content.
66
58
 
67
59
  Args:
@@ -72,6 +64,7 @@ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool
72
64
  sheet_name (str, optional): The specific sheet name to parse for Excel files.
73
65
  llm: The language model to use for image processing.
74
66
  file_path (str, optional): The path to the file if it needs to be read from disk.
67
+ return_type (str, optional): Tipe of returned result. Possible values are 'str', 'docs'.
75
68
  Returns:
76
69
  str: The parsed content of the file.
77
70
  Raises:
@@ -81,142 +74,39 @@ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool
81
74
  if (file_path and (file_name or file_content)) or (not file_path and (not file_name or file_content is None)):
82
75
  raise ToolException("Either (file_name and file_content) or file_path must be provided, but not both.")
83
76
 
84
- if file_path:
85
- file_content = file_to_bytes(file_path)
86
- if file_content is None:
87
- return ToolException(f"File not found or could not be read: {file_path}")
88
- file_name = file_path.split('/')[-1] # Extract file name from path
89
- if file_name.endswith('.txt'):
90
- return parse_txt(file_content)
91
- elif file_name.endswith('.docx'):
92
- return read_docx_from_bytes(file_content)
93
- elif file_name.endswith('.xlsx') or file_name.endswith('.xls'):
94
- return parse_excel(file_content, sheet_name, excel_by_sheets)
95
- elif file_name.endswith('.pdf'):
96
- return parse_pdf(file_content, page_number, is_capture_image, llm)
97
- elif file_name.endswith('.pptx'):
98
- return parse_pptx(file_content, page_number, is_capture_image, llm)
99
- elif any(file_name.lower().endswith(f".{ext}") for ext in IMAGE_EXTENSIONS):
100
- match = re.search(r'\.([a-zA-Z0-9]+)$', file_name)
101
- return __perform_llm_prediction_for_image(llm, file_content, match.group(1), image_processing_prompt)
102
- else:
77
+ extension = Path(file_path if file_path else file_name).suffix
78
+
79
+ loader_object = loaders_map.get(extension)
80
+ loader_kwargs = loader_object['kwargs']
81
+ loader_kwargs.update({
82
+ "file_path": file_path,
83
+ "file_content": file_content,
84
+ "file_name": file_name,
85
+ "extract_images": is_capture_image,
86
+ "llm": llm,
87
+ "page_number": page_number,
88
+ "sheet_name": sheet_name,
89
+ "excel_by_sheets": excel_by_sheets
90
+ })
91
+ loader = loader_object['class'](**loader_kwargs)
92
+
93
+ if not loader:
103
94
  return ToolException(
104
95
  "Not supported type of files entered. Supported types are TXT, DOCX, PDF, PPTX, XLSX and XLS only.")
105
96
 
106
- def parse_txt(file_content):
107
- try:
108
- return file_content.decode('utf-8')
109
- except Exception as e:
110
- return ToolException(f"Error decoding file content: {e}")
111
-
112
- def parse_excel(file_content, sheet_name = None, return_by_sheets: bool = False):
113
- try:
114
- excel_file = io.BytesIO(file_content)
115
- if sheet_name:
116
- return parse_sheet(excel_file, sheet_name)
117
- dfs = pd.read_excel(excel_file, sheet_name=sheet_name)
118
-
119
- if return_by_sheets:
120
- result = {}
121
- for sheet_name, df in dfs.items():
122
- df.fillna('', inplace=True)
123
- result[sheet_name] = df.to_dict(orient='records')
124
- return result
125
- else:
126
- result = []
127
- for sheet_name, df in dfs.items():
128
- df.fillna('', inplace=True)
129
- string_content = df.to_string(index=False)
130
- result.append(f"====== Sheet name: {sheet_name} ======\n{string_content}")
131
- return "\n\n".join(result)
132
- except Exception as e:
133
- return ToolException(f"Error reading Excel file: {e}")
134
-
135
- def parse_sheet(excel_file, sheet_name):
136
- df = pd.read_excel(excel_file, sheet_name=sheet_name)
137
- df.fillna('', inplace=True)
138
- return df.to_string()
139
-
140
- def parse_pdf(file_content, page_number, is_capture_image, llm):
141
- with pymupdf.open(stream=file_content, filetype="pdf") as report:
142
- text_content = ''
143
- if page_number is not None:
144
- page = report.load_page(page_number - 1)
145
- text_content += read_pdf_page(report, page, page_number, is_capture_image, llm)
146
- else:
147
- for index, page in enumerate(report, start=1):
148
- text_content += read_pdf_page(report, page, index, is_capture_image, llm)
149
- return text_content
150
-
151
- def parse_pptx(file_content, page_number, is_capture_image, llm=None):
152
- prs = Presentation(io.BytesIO(file_content))
153
- text_content = ''
154
- if page_number is not None:
155
- text_content += read_pptx_slide(prs.slides[page_number - 1], page_number, is_capture_image, llm)
97
+ if hasattr(loader, 'get_content'):
98
+ return loader.get_content()
156
99
  else:
157
- for index, slide in enumerate(prs.slides, start=1):
158
- text_content += read_pptx_slide(slide, index, is_capture_image, llm)
159
- return text_content
160
-
161
- def read_pdf_page(report, page, index, is_capture_images, llm=None):
162
- text_content = f'Page: {index}\n'
163
- text_content += page.get_text()
164
- if is_capture_images:
165
- images = page.get_images(full=True)
166
- for i, img in enumerate(images):
167
- xref = img[0]
168
- base_image = report.extract_image(xref)
169
- img_bytes = base_image["image"]
170
- text_content += __perform_llm_prediction_for_image(llm, img_bytes)
171
- return text_content
172
-
173
- def read_docx_from_bytes(file_content):
174
- """Read and return content from a .docx file using a byte stream."""
175
- try:
176
- doc = Document(BytesIO(file_content))
177
- text = []
178
- for paragraph in doc.paragraphs:
179
- text.append(paragraph.text)
180
- return '\n'.join(text)
181
- except Exception as e:
182
- print(f"Error reading .docx from bytes: {e}")
183
- return ""
184
-
185
- def read_pptx_slide(slide, index, is_capture_image, llm):
186
- text_content = f'Slide: {index}\n'
187
- for shape in slide.shapes:
188
- if hasattr(shape, "text"):
189
- text_content += shape.text + "\n"
190
- elif is_capture_image and shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
191
- try:
192
- caption = __perform_llm_prediction_for_image(llm, shape.image.blob)
193
- except:
194
- caption = "\n[Picture: unknown]\n"
195
- text_content += caption
196
- return text_content
197
-
198
- def describe_image(image):
199
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
200
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
201
- inputs = processor(image, return_tensors="pt")
202
- out = model.generate(**inputs)
203
- return "\n[Picture: " + processor.decode(out[0], skip_special_tokens=True) + "]\n"
204
-
205
- def __perform_llm_prediction_for_image(llm, image: bytes, image_format='png', prompt=image_processing_prompt) -> str:
206
- if not llm:
207
- raise ToolException("LLM is not provided for image processing.")
208
- base64_string = bytes_to_base64(image)
209
- result = llm.invoke([
210
- HumanMessage(
211
- content=[
212
- {"type": "text", "text": prompt},
213
- {
214
- "type": "image_url",
215
- "image_url": {"url": f"data:image/{image_format};base64,{base64_string}"},
216
- },
217
- ])
218
- ])
219
- return f"\n[Image description: {result.content}]\n"
100
+ if file_content:
101
+ return load_content_from_bytes(file_content=file_content,
102
+ extension=extension,
103
+ loader_extra_config=loader_kwargs,
104
+ llm=llm)
105
+ else:
106
+ return load_content(file_path=file_path,
107
+ extension=extension,
108
+ loader_extra_config=loader_kwargs,
109
+ llm=llm)
220
110
 
221
111
  # TODO: review usage of this function alongside with functions above
222
112
  def load_content(file_path: str, extension: str = None, loader_extra_config: dict = None, llm = None) -> str:
@@ -254,22 +144,7 @@ def load_content(file_path: str, extension: str = None, loader_extra_config: dic
254
144
 
255
145
  def load_content_from_bytes(file_content: bytes, extension: str = None, loader_extra_config: dict = None, llm = None) -> str:
256
146
  """Loads the content of a file from bytes based on its extension using a configured loader."""
257
-
258
- import tempfile
259
-
260
- # Automatic cleanup with context manager
261
- with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as temp_file:
262
- # Write data to temp file
263
- temp_file.write(file_content)
264
- temp_file.flush() # Ensure data is written
265
-
266
- # Get the file path for operations
267
- temp_path = temp_file.name
268
-
269
- # Perform your operations
270
- return load_content(temp_path, extension, loader_extra_config, llm)
271
-
272
-
147
+ return load_content(create_temp_file(file_content), extension, loader_extra_config, llm)
273
148
 
274
149
  def file_to_bytes(filepath):
275
150
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.211
3
+ Version: 0.3.213
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0