alita-sdk 0.3.465__py3-none-any.whl → 0.3.497__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (103) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +83 -1
  3. alita_sdk/cli/agent_loader.py +22 -4
  4. alita_sdk/cli/agent_ui.py +13 -3
  5. alita_sdk/cli/agents.py +1876 -186
  6. alita_sdk/cli/callbacks.py +96 -25
  7. alita_sdk/cli/cli.py +10 -1
  8. alita_sdk/cli/config.py +151 -9
  9. alita_sdk/cli/context/__init__.py +30 -0
  10. alita_sdk/cli/context/cleanup.py +198 -0
  11. alita_sdk/cli/context/manager.py +731 -0
  12. alita_sdk/cli/context/message.py +285 -0
  13. alita_sdk/cli/context/strategies.py +289 -0
  14. alita_sdk/cli/context/token_estimation.py +127 -0
  15. alita_sdk/cli/input_handler.py +167 -4
  16. alita_sdk/cli/inventory.py +1256 -0
  17. alita_sdk/cli/toolkit.py +14 -17
  18. alita_sdk/cli/toolkit_loader.py +35 -5
  19. alita_sdk/cli/tools/__init__.py +8 -1
  20. alita_sdk/cli/tools/filesystem.py +910 -64
  21. alita_sdk/cli/tools/planning.py +143 -157
  22. alita_sdk/cli/tools/terminal.py +154 -20
  23. alita_sdk/community/__init__.py +64 -8
  24. alita_sdk/community/inventory/__init__.py +224 -0
  25. alita_sdk/community/inventory/config.py +257 -0
  26. alita_sdk/community/inventory/enrichment.py +2137 -0
  27. alita_sdk/community/inventory/extractors.py +1469 -0
  28. alita_sdk/community/inventory/ingestion.py +3172 -0
  29. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  30. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  31. alita_sdk/community/inventory/parsers/base.py +295 -0
  32. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  33. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  34. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  35. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  36. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  37. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  38. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  39. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  40. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  41. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  42. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  43. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  44. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  45. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  46. alita_sdk/community/inventory/patterns/loader.py +348 -0
  47. alita_sdk/community/inventory/patterns/registry.py +198 -0
  48. alita_sdk/community/inventory/presets.py +535 -0
  49. alita_sdk/community/inventory/retrieval.py +1403 -0
  50. alita_sdk/community/inventory/toolkit.py +169 -0
  51. alita_sdk/community/inventory/visualize.py +1370 -0
  52. alita_sdk/configurations/bitbucket.py +0 -3
  53. alita_sdk/runtime/clients/client.py +108 -31
  54. alita_sdk/runtime/langchain/assistant.py +4 -2
  55. alita_sdk/runtime/langchain/constants.py +3 -1
  56. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  57. alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
  58. alita_sdk/runtime/langchain/langraph_agent.py +123 -31
  59. alita_sdk/runtime/llms/preloaded.py +2 -6
  60. alita_sdk/runtime/toolkits/__init__.py +2 -0
  61. alita_sdk/runtime/toolkits/application.py +1 -1
  62. alita_sdk/runtime/toolkits/mcp.py +107 -91
  63. alita_sdk/runtime/toolkits/planning.py +173 -0
  64. alita_sdk/runtime/toolkits/tools.py +59 -7
  65. alita_sdk/runtime/tools/artifact.py +46 -17
  66. alita_sdk/runtime/tools/function.py +2 -1
  67. alita_sdk/runtime/tools/llm.py +320 -32
  68. alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
  69. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  70. alita_sdk/runtime/tools/planning/models.py +246 -0
  71. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  72. alita_sdk/runtime/tools/vectorstore_base.py +44 -9
  73. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  74. alita_sdk/runtime/utils/mcp_client.py +465 -0
  75. alita_sdk/runtime/utils/mcp_oauth.py +80 -0
  76. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  77. alita_sdk/runtime/utils/streamlit.py +6 -10
  78. alita_sdk/runtime/utils/toolkit_utils.py +14 -5
  79. alita_sdk/tools/__init__.py +54 -27
  80. alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
  81. alita_sdk/tools/base_indexer_toolkit.py +99 -20
  82. alita_sdk/tools/bitbucket/__init__.py +2 -2
  83. alita_sdk/tools/chunkers/__init__.py +3 -1
  84. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  85. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  86. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  87. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  88. alita_sdk/tools/code_indexer_toolkit.py +55 -22
  89. alita_sdk/tools/confluence/api_wrapper.py +63 -14
  90. alita_sdk/tools/elitea_base.py +86 -21
  91. alita_sdk/tools/jira/__init__.py +1 -1
  92. alita_sdk/tools/jira/api_wrapper.py +91 -40
  93. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  94. alita_sdk/tools/qtest/__init__.py +1 -1
  95. alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
  96. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  97. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  98. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +2 -1
  99. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +103 -61
  100. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
  101. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
  102. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
  103. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0
@@ -128,12 +128,37 @@ BaseIndexDataParams = create_model(
128
128
 
129
129
 
130
130
  class BaseToolApiWrapper(BaseModel):
131
-
131
+
132
+ # Optional RunnableConfig for CLI/standalone usage (allows dispatch_custom_event to work)
133
+ _runnable_config: Optional[Dict[str, Any]] = None
134
+ # toolkit id propagated from backend
135
+ toolkit_id: int = 0
132
136
  def get_available_tools(self):
133
137
  raise NotImplementedError("Subclasses should implement this method")
134
138
 
135
- def _log_tool_event(self, message: str, tool_name: str = None):
136
- """Log data and dispatch custom event for the tool"""
139
+ def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
140
+ """
141
+ Set the RunnableConfig for dispatching custom events.
142
+
143
+ This is required when running outside of a LangChain agent context
144
+ (e.g., from CLI). Without a config containing a run_id,
145
+ dispatch_custom_event will fail with "Unable to dispatch an adhoc event
146
+ without a parent run id".
147
+
148
+ Args:
149
+ config: A RunnableConfig dict with at least {'run_id': uuid}
150
+ """
151
+ self._runnable_config = config
152
+
153
+ def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
154
+ """Log data and dispatch custom event for the tool.
155
+
156
+ Args:
157
+ message: The message to log
158
+ tool_name: Name of the tool (defaults to 'tool_progress')
159
+ config: Optional RunnableConfig. If not provided, uses self._runnable_config.
160
+ Required when running outside a LangChain agent context.
161
+ """
137
162
 
138
163
  try:
139
164
  from langchain_core.callbacks import dispatch_custom_event
@@ -142,6 +167,10 @@ class BaseToolApiWrapper(BaseModel):
142
167
  tool_name = 'tool_progress'
143
168
 
144
169
  logger.info(message)
170
+
171
+ # Use provided config, fall back to instance config
172
+ effective_config = config or self._runnable_config
173
+
145
174
  dispatch_custom_event(
146
175
  name="thinking_step",
147
176
  data={
@@ -149,6 +178,7 @@ class BaseToolApiWrapper(BaseModel):
149
178
  "tool_name": tool_name,
150
179
  "toolkit": self.__class__.__name__,
151
180
  },
181
+ config=effective_config,
152
182
  )
153
183
  except Exception as e:
154
184
  logger.warning(f"Failed to dispatch progress event: {str(e)}")
@@ -165,6 +195,11 @@ class BaseToolApiWrapper(BaseModel):
165
195
  # execution = str(execution)
166
196
  return execution
167
197
  except Exception as e:
198
+ # Re-raise McpAuthorizationRequired directly without wrapping
199
+ from alita_sdk.runtime.utils.mcp_oauth import McpAuthorizationRequired
200
+ if isinstance(e, McpAuthorizationRequired):
201
+ raise
202
+
168
203
  # Catch all tool execution exceptions and provide user-friendly error messages
169
204
  error_type = type(e).__name__
170
205
  error_message = str(e)
@@ -589,27 +624,37 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
589
624
  def loader(self,
590
625
  branch: Optional[str] = None,
591
626
  whitelist: Optional[List[str]] = None,
592
- blacklist: Optional[List[str]] = None) -> str:
627
+ blacklist: Optional[List[str]] = None,
628
+ chunked: bool = True) -> Generator[Document, None, None]:
593
629
  """
594
- Generates file content from a branch, respecting whitelist and blacklist patterns.
630
+ Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
595
631
 
596
632
  Parameters:
597
633
  - branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
598
634
  - whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
599
635
  - blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
636
+ - chunked (bool): If True (default), applies universal chunker based on file type.
637
+ If False, returns raw Documents without chunking.
600
638
 
601
639
  Returns:
602
- - generator: Yields content from files matching the whitelist but not the blacklist.
640
+ - generator: Yields Documents from files matching the whitelist but not the blacklist.
603
641
 
604
642
  Example:
605
643
  # Use 'feature-branch', include '.py' files, exclude 'test_' files
606
- file_generator = loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*'])
644
+ for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
645
+ print(doc.page_content)
607
646
 
608
647
  Notes:
609
648
  - Whitelist and blacklist use Unix shell-style wildcards.
610
649
  - Files must match the whitelist and not the blacklist to be included.
650
+ - When chunked=True:
651
+ - .md files → markdown chunker (header-based splitting)
652
+ - .py/.js/.ts/etc → code parser (TreeSitter-based)
653
+ - .json files → JSON chunker
654
+ - other files → default text chunker
611
655
  """
612
- from .chunkers.code.codeparser import parse_code_files_for_db
656
+ from langchain_core.documents import Document
657
+ import hashlib
613
658
 
614
659
  _files = self.__handle_get_files("", self.__get_branch(branch))
615
660
  self._log_tool_event(message="Listing files in branch", tool_name="loader")
@@ -627,32 +672,52 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
627
672
  or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
628
673
  return False
629
674
 
630
- def file_content_generator():
675
+ def raw_document_generator() -> Generator[Document, None, None]:
676
+ """Yields raw Documents without chunking."""
631
677
  self._log_tool_event(message="Reading the files", tool_name="loader")
632
- # log the progress of file reading
633
678
  total_files = len(_files)
679
+ processed = 0
680
+
634
681
  for idx, file in enumerate(_files, 1):
635
682
  if is_whitelisted(file) and not is_blacklisted(file):
636
- # read file ONLY if it matches whitelist and does not match blacklist
637
683
  try:
638
684
  file_content = self._read_file(file, self.__get_branch(branch))
639
685
  except Exception as e:
640
686
  logger.error(f"Failed to read file {file}: {e}")
641
- file_content = ""
687
+ continue
688
+
642
689
  if not file_content:
643
- # empty file, skip
644
690
  continue
645
- # hash the file content to ensure uniqueness
646
- import hashlib
691
+
692
+ # Hash the file content for uniqueness tracking
647
693
  file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
648
- yield {"file_name": file,
649
- "file_content": file_content,
650
- "commit_hash": file_hash}
694
+ processed += 1
695
+
696
+ yield Document(
697
+ page_content=file_content,
698
+ metadata={
699
+ 'file_path': file,
700
+ 'file_name': file,
701
+ 'source': file,
702
+ 'commit_hash': file_hash,
703
+ }
704
+ )
705
+
651
706
  if idx % 10 == 0 or idx == total_files:
652
- self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
653
- self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
707
+ self._log_tool_event(
708
+ message=f"{idx} out of {total_files} files checked, {processed} matched",
709
+ tool_name="loader"
710
+ )
711
+
712
+ self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
654
713
 
655
- return parse_code_files_for_db(file_content_generator())
714
+ if not chunked:
715
+ # Return raw documents without chunking
716
+ return raw_document_generator()
717
+
718
+ # Apply universal chunker based on file type
719
+ from .chunkers.universal_chunker import universal_chunker
720
+ return universal_chunker(raw_document_generator())
656
721
 
657
722
  def index_data(self,
658
723
  index_name: str,
@@ -68,7 +68,7 @@ class JiraToolkit(BaseToolkit):
68
68
  name,
69
69
  cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
70
70
  limit=(int, Field(description="Limit issues. Default is 5", gt=0, default=5)),
71
- api_version=(Optional[str], Field(description="Rest API version: optional. Default is 2", default="2")),
71
+ api_version=(Literal['2', '3'], Field(description="Rest API version: optional. Default is 2", default="3")),
72
72
  labels=(Optional[str], Field(
73
73
  description="List of comma separated labels used for labeling of agent's created or updated entities",
74
74
  default=None,
@@ -453,41 +453,63 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
453
453
  return super().validate_toolkit(values)
454
454
 
455
455
  def _parse_issues(self, issues: Dict) -> List[dict]:
456
- parsed = []
457
- for issue in issues["issues"]:
458
- if len(parsed) >= self.limit:
456
+ parsed: List[dict] = []
457
+ issues_list = issues.get("issues") if isinstance(issues, dict) else None
458
+ if not isinstance(issues_list, list):
459
+ return parsed
460
+
461
+ for issue in issues_list:
462
+ if self.limit and len(parsed) >= self.limit:
459
463
  break
460
- issue_fields = issue["fields"]
461
- key = issue["key"]
462
- id = issue["id"]
463
- summary = issue_fields["summary"]
464
- description = issue_fields["description"]
465
- created = issue_fields["created"][0:10]
466
- updated = issue_fields["updated"]
467
- duedate = issue_fields["duedate"]
468
- priority = issue_fields["priority"]["name"]
469
- status = issue_fields["status"]["name"]
470
- project_id = issue_fields["project"]["id"]
471
- issue_url = f"{self._client.url}browse/{key}"
472
- try:
473
- assignee = issue_fields["assignee"]["displayName"]
474
- except Exception:
475
- assignee = "None"
464
+
465
+ issue_fields = issue.get("fields") or {}
466
+ key = issue.get("key", "")
467
+ issue_id = issue.get("id", "")
468
+
469
+ summary = issue_fields.get("summary") or ""
470
+ description = issue_fields.get("description") or ""
471
+ created_raw = issue_fields.get("created") or ""
472
+ created = created_raw[:10] if created_raw else ""
473
+ updated = issue_fields.get("updated") or ""
474
+ duedate = issue_fields.get("duedate")
475
+
476
+ priority_info = issue_fields.get("priority") or {}
477
+ priority = priority_info.get("name") or "None"
478
+
479
+ status_info = issue_fields.get("status") or {}
480
+ status = status_info.get("name") or "Unknown"
481
+
482
+ project_info = issue_fields.get("project") or {}
483
+ project_id = project_info.get("id") or ""
484
+
485
+ issue_url = f"{self._client.url}browse/{key}" if key else self._client.url
486
+
487
+ assignee_info = issue_fields.get("assignee") or {}
488
+ assignee = assignee_info.get("displayName") or "None"
489
+
476
490
  rel_issues = {}
477
- for related_issue in issue_fields["issuelinks"]:
478
- if "inwardIssue" in related_issue.keys():
479
- rel_type = related_issue["type"]["inward"]
480
- rel_key = related_issue["inwardIssue"]["key"]
491
+ for related_issue in issue_fields.get("issuelinks") or []:
492
+ rel_type = None
493
+ rel_key = None
494
+ if related_issue.get("inwardIssue"):
495
+ rel_type = related_issue.get("type", {}).get("inward")
496
+ rel_key = related_issue["inwardIssue"].get("key")
481
497
  # rel_summary = related_issue["inwardIssue"]["fields"]["summary"]
482
- if "outwardIssue" in related_issue.keys():
483
- rel_type = related_issue["type"]["outward"]
484
- rel_key = related_issue["outwardIssue"]["key"]
498
+ elif related_issue.get("outwardIssue"):
499
+ rel_type = related_issue.get("type", {}).get("outward")
500
+ rel_key = related_issue["outwardIssue"].get("key")
485
501
  # rel_summary = related_issue["outwardIssue"]["fields"]["summary"]
486
- rel_issues = {"type": rel_type, "key": rel_key, "url": f"{self._client.url}browse/{rel_key}"}
502
+
503
+ if rel_type and rel_key:
504
+ rel_issues = {
505
+ "type": rel_type,
506
+ "key": rel_key,
507
+ "url": f"{self._client.url}browse/{rel_key}",
508
+ }
487
509
 
488
510
  parsed_issue = {
489
511
  "key": key,
490
- "id": id,
512
+ "id": issue_id,
491
513
  "projectId": project_id,
492
514
  "summary": summary,
493
515
  "description": description,
@@ -500,10 +522,13 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
500
522
  "url": issue_url,
501
523
  "related_issues": rel_issues,
502
524
  }
503
- for field in self.additional_fields:
504
- field_value = issue_fields.get(field, None)
525
+
526
+ for field in (self.additional_fields or []):
527
+ field_value = issue_fields.get(field)
505
528
  parsed_issue[field] = field_value
529
+
506
530
  parsed.append(parsed_issue)
531
+
507
532
  return parsed
508
533
 
509
534
  @staticmethod
@@ -749,13 +774,24 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
749
774
 
750
775
  attachment_data = []
751
776
  attachments = self._client.get_attachments_ids_from_issue(issue=jira_issue_key)
777
+ api_version = str(getattr(self._client, "api_version", "2"))
752
778
  for attachment in attachments:
753
779
  if attachment_pattern and not re.search(attachment_pattern, attachment['filename']):
754
780
  logger.info(f"Skipping attachment {attachment['filename']} as it does not match pattern {attachment_pattern}")
755
781
  continue
756
782
  logger.info(f"Processing attachment {attachment['filename']} with ID {attachment['attachment_id']}")
757
783
  try:
758
- attachment_content = self._client.get_attachment_content(attachment['attachment_id'])
784
+ attachment_content = None
785
+
786
+ # Cloud (REST v3) attachments require signed URLs returned from metadata
787
+ if api_version in {"3", "latest"} or self.cloud:
788
+ attachment_content = self._download_attachment_v3(
789
+ attachment['attachment_id'],
790
+ attachment['filename']
791
+ )
792
+
793
+ if attachment_content is None:
794
+ attachment_content = self._client.get_attachment_content(attachment['attachment_id'])
759
795
  except Exception as e:
760
796
  logger.error(
761
797
  f"Failed to download attachment {attachment['filename']} for issue {jira_issue_key}: {str(e)}")
@@ -797,15 +833,6 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
797
833
  logger.debug(response_string)
798
834
  return response_string
799
835
 
800
- def _extract_attachment_content(self, attachment):
801
- """Extract attachment's content if possible (used for api v.2)"""
802
-
803
- try:
804
- content = self._client.get(attachment['content'].replace(self.base_url, ''))
805
- except Exception as e:
806
- content = f"Unable to parse content of '{attachment['filename']}' due to: {str(e)}"
807
- return f"filename: {attachment['filename']}\ncontent: {content}"
808
-
809
836
  # Helper functions for image processing
810
837
  @staticmethod
811
838
  def _collect_context_for_image(content: str, image_marker: str, context_radius: int = 500) -> str:
@@ -1038,6 +1065,30 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
1038
1065
  logger.error(f"Error downloading attachment: {str(e)}")
1039
1066
  return None
1040
1067
 
1068
+ def _download_attachment_v3(self, attachment_id: str, filename: str | None = None) -> Optional[bytes]:
1069
+ """Download Jira attachment using metadata content URL (required for REST v3 / Cloud)."""
1070
+ try:
1071
+ metadata = self._client.get_attachment(attachment_id)
1072
+ except Exception as e:
1073
+ logger.error(f"Failed to retrieve metadata for attachment {attachment_id}: {str(e)}")
1074
+ return None
1075
+
1076
+ download_url = metadata.get('content') or metadata.get('_links', {}).get('content')
1077
+
1078
+ if not download_url:
1079
+ logger.warning(
1080
+ f"Attachment {attachment_id} ({filename}) metadata does not include a content URL; falling back.")
1081
+ return None
1082
+
1083
+ logger.info(f"Downloading attachment {attachment_id} via metadata content URL (v3).")
1084
+ content = self._download_attachment(download_url)
1085
+
1086
+ if content is None:
1087
+ logger.error(
1088
+ f"Failed to download attachment {attachment_id} ({filename}) from v3 content URL: {download_url}")
1089
+
1090
+ return content
1091
+
1041
1092
  def _extract_image_data(self, field_data):
1042
1093
  """
1043
1094
  Extracts image data from general JSON response.
@@ -7,6 +7,7 @@ from alita_sdk.tools.base_indexer_toolkit import BaseIndexerToolkit
7
7
 
8
8
  class NonCodeIndexerToolkit(BaseIndexerToolkit):
9
9
  def _get_indexed_data(self, index_name: str):
10
+ self._ensure_vectorstore_initialized()
10
11
  if not self.vector_adapter:
11
12
  raise ToolException("Vector adapter is not initialized. "
12
13
  "Check your configuration: embedding_model and vectorstore_type.")
@@ -37,7 +37,7 @@ class QtestToolkit(BaseToolkit):
37
37
  name,
38
38
  qtest_configuration=(QtestConfiguration, Field(description="QTest API token", json_schema_extra={
39
39
  'configuration_types': ['qtest']})),
40
- qtest_project_id=(int, Field(default=None, description="QTest project id")),
40
+ qtest_project_id=(int, Field(description="QTest project id")),
41
41
  no_of_tests_shown_in_dql_search=(Optional[int], Field(description="Max number of items returned by dql search",
42
42
  default=10)),
43
43
 
@@ -271,13 +271,13 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
271
271
  file_name = file.get('Name', '')
272
272
 
273
273
  # Check if file should be skipped based on skip_extensions
274
- if any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
274
+ if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
275
275
  for pattern in skip_extensions):
276
276
  continue
277
277
 
278
278
  # Check if file should be included based on include_extensions
279
279
  # If include_extensions is empty, process all files (that weren't skipped)
280
- if include_extensions and not (any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
280
+ if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
281
281
  for pattern in include_extensions)):
282
282
  continue
283
283
 
@@ -31,8 +31,8 @@ class VectorStoreAdapter(ABC):
31
31
  pass
32
32
 
33
33
  @abstractmethod
34
- def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
35
- """Clean the vectorstore collection by deleting all indexed data."""
34
+ def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
35
+ """Clean the vectorstore collection by deleting all indexed data. If including_index_meta is True, skip the index_meta records."""
36
36
  pass
37
37
 
38
38
  @abstractmethod
@@ -132,18 +132,22 @@ class PGVectorAdapter(VectorStoreAdapter):
132
132
  logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
133
133
  return []
134
134
 
135
- def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
136
- """Clean the vectorstore collection by deleting all indexed data."""
137
- # This logic deletes all data from the vectorstore collection without removal of collection.
138
- # Collection itself remains available for future indexing.
135
+ def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
136
+ """Clean the vectorstore collection by deleting all indexed data. If including_index_meta is True, skip the index_meta records."""
139
137
  from sqlalchemy.orm import Session
140
- from sqlalchemy import func
141
-
138
+ from sqlalchemy import func, or_
142
139
  store = vectorstore_wrapper.vectorstore
143
140
  with Session(store.session_maker.bind) as session:
144
- session.query(store.EmbeddingStore).filter(
145
- func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
146
- ).delete(synchronize_session=False)
141
+ if including_index_meta:
142
+ session.query(store.EmbeddingStore).filter(
143
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
144
+ ).delete(synchronize_session=False)
145
+ else:
146
+ session.query(store.EmbeddingStore).filter(
147
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name,
148
+ or_(func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type').is_(None),
149
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value)
150
+ ).delete(synchronize_session=False)
147
151
  session.commit()
148
152
 
149
153
  def is_vectorstore_type(self, vectorstore) -> bool:
@@ -334,8 +338,8 @@ class ChromaAdapter(VectorStoreAdapter):
334
338
  logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
335
339
  return []
336
340
 
337
- def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
338
- """Clean the vectorstore collection by deleting all indexed data."""
341
+ def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
342
+ """Clean the vectorstore collection by deleting all indexed data. including_index_meta is ignored."""
339
343
  vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, index_name))
340
344
 
341
345
  def get_indexed_data(self, vectorstore_wrapper):
@@ -1,3 +1,4 @@
1
+ import hashlib
1
2
  import json
2
3
  import logging
3
4
  from typing import Optional, Generator, Literal
@@ -284,22 +285,20 @@ class ZephyrEssentialApiWrapper(NonCodeIndexerToolkit):
284
285
  if isinstance(v, (str, int, float, bool, list, dict))
285
286
  }
286
287
  metadata['type'] = "TEST_CASE"
287
-
288
- yield Document(page_content="", metadata=metadata)
289
-
290
- def _extend_data(self, documents: Generator[Document, None, None]) -> Generator[Document, None, None]:
291
- for document in documents:
288
+ #
292
289
  try:
293
- if 'type' in document.metadata and document.metadata['type'] == "TEST_CASE":
294
- additional_content = self._process_test_case(document.metadata['key'])
295
- for steps_type, content in additional_content.items():
296
- if content:
297
- page_content = json.dumps(content)
298
- document.metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = page_content.encode('utf-8')
299
- document.metadata["steps_type"] = steps_type
290
+ additional_content = self._process_test_case(metadata['key'])
291
+ for steps_type, content in additional_content.items():
292
+ if content:
293
+ page_content = json.dumps(content)
294
+ content_hash = hashlib.sha256(page_content.encode('utf-8')).hexdigest()
295
+ metadata[IndexerKeywords.UPDATED_ON.value] = content_hash
296
+ metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = page_content.encode('utf-8')
297
+ metadata["steps_type"] = steps_type
300
298
  except Exception as e:
301
299
  logging.error(f"Failed to process document: {e}")
302
- yield document
300
+ #
301
+ yield Document(page_content="", metadata=metadata)
303
302
 
304
303
  def _process_test_case(self, key) -> dict:
305
304
  steps = self.get_test_case_test_steps(key)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.465
3
+ Version: 0.3.497
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -134,6 +134,7 @@ Provides-Extra: community
134
134
  Requires-Dist: retry-extended==0.2.3; extra == "community"
135
135
  Requires-Dist: pyobjtojson==0.3; extra == "community"
136
136
  Requires-Dist: elitea-analyse==0.1.2; extra == "community"
137
+ Requires-Dist: networkx>=3.0; extra == "community"
137
138
  Provides-Extra: all
138
139
  Requires-Dist: alita-sdk[runtime]; extra == "all"
139
140
  Requires-Dist: alita-sdk[tools]; extra == "all"