alita-sdk 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3601 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1256 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +64 -8
- alita_sdk/community/inventory/__init__.py +224 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +1 -1
- alita_sdk/runtime/clients/client.py +214 -42
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +373 -0
- alita_sdk/runtime/langchain/assistant.py +118 -30
- alita_sdk/runtime/langchain/constants.py +8 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +41 -12
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +116 -99
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +2 -2
- alita_sdk/runtime/langchain/langraph_agent.py +307 -71
- alita_sdk/runtime/langchain/utils.py +48 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/toolkits/__init__.py +26 -0
- alita_sdk/runtime/toolkits/application.py +9 -2
- alita_sdk/runtime/toolkits/artifact.py +18 -6
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +780 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/tools.py +205 -55
- alita_sdk/runtime/toolkits/vectorstore.py +9 -4
- alita_sdk/runtime/tools/__init__.py +11 -3
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/artifact.py +225 -12
- alita_sdk/runtime/tools/function.py +95 -5
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +212 -0
- alita_sdk/runtime/tools/llm.py +494 -102
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +4 -4
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +180 -79
- alita_sdk/runtime/tools/vectorstore.py +22 -21
- alita_sdk/runtime/tools/vectorstore_base.py +125 -52
- alita_sdk/runtime/utils/AlitaCallback.py +106 -20
- alita_sdk/runtime/utils/mcp_client.py +465 -0
- alita_sdk/runtime/utils/mcp_oauth.py +244 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +40 -13
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +12 -0
- alita_sdk/tools/__init__.py +77 -33
- alita_sdk/tools/ado/repos/__init__.py +7 -6
- alita_sdk/tools/ado/repos/repos_wrapper.py +11 -11
- alita_sdk/tools/ado/test_plan/__init__.py +7 -7
- alita_sdk/tools/ado/wiki/__init__.py +7 -11
- alita_sdk/tools/ado/wiki/ado_wrapper.py +89 -15
- alita_sdk/tools/ado/work_item/__init__.py +7 -11
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/advanced_jira_mining/__init__.py +8 -7
- alita_sdk/tools/aws/delta_lake/__init__.py +11 -9
- alita_sdk/tools/azure_ai/search/__init__.py +7 -6
- alita_sdk/tools/base_indexer_toolkit.py +345 -70
- alita_sdk/tools/bitbucket/__init__.py +9 -8
- alita_sdk/tools/bitbucket/api_wrapper.py +50 -6
- alita_sdk/tools/browser/__init__.py +4 -4
- alita_sdk/tools/carrier/__init__.py +4 -6
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +7 -6
- alita_sdk/tools/cloud/azure/__init__.py +7 -6
- alita_sdk/tools/cloud/gcp/__init__.py +7 -6
- alita_sdk/tools/cloud/k8s/__init__.py +7 -6
- alita_sdk/tools/code/linter/__init__.py +7 -7
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +8 -7
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +9 -8
- alita_sdk/tools/confluence/api_wrapper.py +171 -75
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/custom_open_api/__init__.py +9 -4
- alita_sdk/tools/elastic/__init__.py +8 -7
- alita_sdk/tools/elitea_base.py +492 -52
- alita_sdk/tools/figma/__init__.py +7 -7
- alita_sdk/tools/figma/api_wrapper.py +2 -1
- alita_sdk/tools/github/__init__.py +9 -9
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +62 -2
- alita_sdk/tools/gitlab/__init__.py +8 -8
- alita_sdk/tools/gitlab/api_wrapper.py +135 -33
- alita_sdk/tools/gitlab_org/__init__.py +7 -8
- alita_sdk/tools/google/bigquery/__init__.py +11 -12
- alita_sdk/tools/google_places/__init__.py +8 -7
- alita_sdk/tools/jira/__init__.py +9 -7
- alita_sdk/tools/jira/api_wrapper.py +100 -52
- alita_sdk/tools/keycloak/__init__.py +8 -7
- alita_sdk/tools/localgit/local_git.py +56 -54
- alita_sdk/tools/memory/__init__.py +1 -1
- alita_sdk/tools/non_code_indexer_toolkit.py +3 -2
- alita_sdk/tools/ocr/__init__.py +8 -7
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/pandas/__init__.py +8 -7
- alita_sdk/tools/postman/__init__.py +7 -8
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +8 -9
- alita_sdk/tools/qtest/__init__.py +16 -11
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +7 -8
- alita_sdk/tools/report_portal/__init__.py +9 -7
- alita_sdk/tools/salesforce/__init__.py +7 -7
- alita_sdk/tools/servicenow/__init__.py +10 -10
- alita_sdk/tools/sharepoint/__init__.py +7 -6
- alita_sdk/tools/sharepoint/api_wrapper.py +127 -36
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +7 -6
- alita_sdk/tools/sql/__init__.py +8 -7
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +7 -6
- alita_sdk/tools/testrail/__init__.py +8 -9
- alita_sdk/tools/utils/__init__.py +26 -4
- alita_sdk/tools/utils/content_parser.py +88 -60
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +76 -26
- alita_sdk/tools/xray/__init__.py +9 -7
- alita_sdk/tools/zephyr/__init__.py +7 -6
- alita_sdk/tools/zephyr_enterprise/__init__.py +8 -6
- alita_sdk/tools/zephyr_essential/__init__.py +7 -6
- alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
- alita_sdk/tools/zephyr_scale/__init__.py +7 -6
- alita_sdk/tools/zephyr_squad/__init__.py +7 -6
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +147 -2
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/RECORD +206 -130
- alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
|
@@ -7,12 +7,14 @@ from json import JSONDecodeError
|
|
|
7
7
|
from typing import Optional, List, Any, Dict, Callable, Generator, Literal
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
+
from atlassian.errors import ApiError
|
|
10
11
|
from langchain_community.document_loaders.confluence import ContentFormat
|
|
11
12
|
from langchain_core.documents import Document
|
|
12
13
|
from langchain_core.messages import HumanMessage
|
|
13
14
|
from langchain_core.tools import ToolException
|
|
14
15
|
from markdownify import markdownify
|
|
15
16
|
from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
|
|
17
|
+
from requests import HTTPError
|
|
16
18
|
from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
|
|
17
19
|
|
|
18
20
|
from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
|
|
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
194
196
|
keep_markdown_format: Optional[bool] = True
|
|
195
197
|
ocr_languages: Optional[str] = None
|
|
196
198
|
keep_newlines: Optional[bool] = True
|
|
199
|
+
_errors: Optional[list[str]] = None
|
|
197
200
|
_image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
|
|
198
201
|
|
|
199
202
|
@model_validator(mode='before')
|
|
@@ -477,28 +480,78 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
477
480
|
"""Gets pages with specific label in the Confluence space."""
|
|
478
481
|
|
|
479
482
|
start = 0
|
|
480
|
-
pages_info = []
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
483
|
+
pages_info: List[Dict[str, Any]] = []
|
|
484
|
+
seen_ids: set[str] = set()
|
|
485
|
+
|
|
486
|
+
# Use a while-loop driven by unique pages collected and
|
|
487
|
+
# presence of additional results instead of a fixed number
|
|
488
|
+
# of iterations based purely on max_pages/limit.
|
|
489
|
+
while len(pages_info) < (self.max_pages or 0):
|
|
490
|
+
pages = self.client.get_all_pages_by_label(
|
|
491
|
+
label,
|
|
492
|
+
start=start,
|
|
493
|
+
limit=self.limit,
|
|
494
|
+
) # , expand="body.view.value"
|
|
484
495
|
if not pages:
|
|
485
496
|
break
|
|
486
497
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
498
|
+
# Collect only ids we haven't processed yet to avoid
|
|
499
|
+
# calling get_page_by_id multiple times for the same
|
|
500
|
+
# Confluence page.
|
|
501
|
+
new_ids: List[str] = []
|
|
502
|
+
for p in pages:
|
|
503
|
+
page_id = p["id"] if isinstance(p, dict) else getattr(p, "id", None)
|
|
504
|
+
if page_id is None:
|
|
505
|
+
continue
|
|
506
|
+
if page_id in seen_ids:
|
|
507
|
+
continue
|
|
508
|
+
seen_ids.add(page_id)
|
|
509
|
+
new_ids.append(page_id)
|
|
510
|
+
|
|
511
|
+
if new_ids:
|
|
512
|
+
for page in self.get_pages_by_id(new_ids):
|
|
513
|
+
meta = getattr(page, "metadata", {}) or {}
|
|
514
|
+
page_id = meta.get("id")
|
|
515
|
+
page_title = meta.get("title")
|
|
516
|
+
page_url = meta.get("source")
|
|
517
|
+
content = getattr(page, "page_content", None)
|
|
518
|
+
|
|
519
|
+
if page_id is None:
|
|
520
|
+
continue
|
|
521
|
+
|
|
522
|
+
pages_info.append(
|
|
523
|
+
{
|
|
524
|
+
"page_id": page_id,
|
|
525
|
+
"page_title": page_title,
|
|
526
|
+
"page_url": page_url,
|
|
527
|
+
"content": content,
|
|
528
|
+
}
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
# Respect max_pages on unique pages collected.
|
|
532
|
+
if len(pages_info) >= (self.max_pages or 0):
|
|
533
|
+
break
|
|
534
|
+
|
|
535
|
+
# Advance the offset by the requested page size.
|
|
493
536
|
start += self.limit
|
|
494
|
-
|
|
537
|
+
|
|
538
|
+
# Defensive break: if the API returns fewer items than
|
|
539
|
+
# requested, there are likely no more pages to fetch.
|
|
540
|
+
if len(pages) < self.limit:
|
|
541
|
+
break
|
|
542
|
+
|
|
543
|
+
# Slice as an extra safety net in case of any race conditions
|
|
544
|
+
# around the max_pages guard in the loop above.
|
|
545
|
+
return pages_info[: (self.max_pages or len(pages_info))]
|
|
495
546
|
|
|
496
547
|
def is_public_page(self, page: dict) -> bool:
|
|
497
548
|
"""Check if a page is publicly accessible."""
|
|
498
549
|
restrictions = self.client.get_all_restrictions_for_content(page["id"])
|
|
499
550
|
|
|
500
551
|
return (
|
|
501
|
-
page["status"] == "current"
|
|
552
|
+
(page["status"] == "current"
|
|
553
|
+
# allow user to see archived content if needed
|
|
554
|
+
or page["status"] == "archived")
|
|
502
555
|
and not restrictions["read"]["restrictions"]["user"]["results"]
|
|
503
556
|
and not restrictions["read"]["restrictions"]["group"]["results"]
|
|
504
557
|
)
|
|
@@ -518,18 +571,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
518
571
|
),
|
|
519
572
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
|
520
573
|
)(self.client.get_page_by_id)
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
574
|
+
try:
|
|
575
|
+
page = get_page(
|
|
576
|
+
page_id=page_id, expand=f"{self.content_format.value},version"
|
|
577
|
+
)
|
|
578
|
+
except (ApiError, HTTPError) as e:
|
|
579
|
+
logger.error(f"Error fetching page with ID {page_id}: {e}")
|
|
580
|
+
page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
|
|
581
|
+
# store errors
|
|
582
|
+
if self._errors is None:
|
|
583
|
+
self._errors = []
|
|
584
|
+
self._errors.append(page_content_temp)
|
|
585
|
+
return Document(page_content=page_content_temp,
|
|
586
|
+
metadata={})
|
|
587
|
+
# TODO: update on toolkit advanced settings level as a separate feature
|
|
588
|
+
# if not self.include_restricted_content and not self.is_public_page(page):
|
|
589
|
+
# continue
|
|
526
590
|
yield self.process_page(page, skip_images)
|
|
527
591
|
|
|
592
|
+
def _log_errors(self):
|
|
593
|
+
""" Log errors encountered during toolkit execution. """
|
|
594
|
+
if self._errors:
|
|
595
|
+
logger.info(f"Errors encountered during toolkit execution: {self._errors}")
|
|
596
|
+
|
|
528
597
|
def read_page_by_id(self, page_id: str, skip_images: bool = False):
|
|
529
598
|
"""Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
|
|
530
599
|
result = list(self.get_pages_by_id([page_id], skip_images))
|
|
531
600
|
if not result:
|
|
532
|
-
"
|
|
601
|
+
return f"Pages not found. Errors: {self._errors}" if self._errors \
|
|
602
|
+
else "Pages not found or you do not have access to them."
|
|
533
603
|
return result[0].page_content
|
|
534
604
|
# return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
|
|
535
605
|
|
|
@@ -815,6 +885,10 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
815
885
|
from .loader import AlitaConfluenceLoader
|
|
816
886
|
from copy import copy
|
|
817
887
|
content_format = kwargs.get('content_format', 'view').lower()
|
|
888
|
+
|
|
889
|
+
self._index_include_attachments = kwargs.get('include_attachments', False)
|
|
890
|
+
self._include_extensions = kwargs.get('include_extensions', [])
|
|
891
|
+
self._skip_extensions = kwargs.get('skip_extensions', [])
|
|
818
892
|
base_params = {
|
|
819
893
|
'url': self.base_url,
|
|
820
894
|
'space_key': self.space,
|
|
@@ -847,65 +921,79 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
847
921
|
|
|
848
922
|
def _process_document(self, document: Document) -> Generator[Document, None, None]:
|
|
849
923
|
try:
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
924
|
+
if self._index_include_attachments:
|
|
925
|
+
page_id = document.metadata.get('id')
|
|
926
|
+
attachments = self.client.get_attachments_from_content(page_id)
|
|
927
|
+
if not attachments or not attachments.get('results'):
|
|
928
|
+
return f"No attachments found for page ID {page_id}."
|
|
929
|
+
|
|
930
|
+
# Get attachment history for created/updated info
|
|
931
|
+
history_map = {}
|
|
932
|
+
for attachment in attachments['results']:
|
|
933
|
+
try:
|
|
934
|
+
hist = self.client.history(attachment['id'])
|
|
935
|
+
history_map[attachment['id']] = hist
|
|
936
|
+
except Exception as e:
|
|
937
|
+
logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
|
|
938
|
+
history_map[attachment['id']] = None
|
|
939
|
+
|
|
940
|
+
import re
|
|
941
|
+
for attachment in attachments['results']:
|
|
942
|
+
title = attachment.get('title', '')
|
|
943
|
+
file_ext = title.lower().split('.')[-1] if '.' in title else ''
|
|
944
|
+
|
|
945
|
+
# Re-verify extension filters
|
|
946
|
+
# Check if file should be skipped based on skip_extensions
|
|
947
|
+
if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', title, re.IGNORECASE)
|
|
948
|
+
for pattern in self._skip_extensions):
|
|
949
|
+
continue
|
|
950
|
+
|
|
951
|
+
# Check if file should be included based on include_extensions
|
|
952
|
+
# If include_extensions is empty, process all files (that weren't skipped)
|
|
953
|
+
if self._include_extensions and not (
|
|
954
|
+
any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', title, re.IGNORECASE)
|
|
955
|
+
for pattern in self._include_extensions)):
|
|
956
|
+
continue
|
|
957
|
+
|
|
958
|
+
media_type = attachment.get('metadata', {}).get('mediaType', '')
|
|
959
|
+
# Core metadata extraction with history
|
|
960
|
+
hist = history_map.get(attachment['id']) or {}
|
|
961
|
+
created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
|
|
962
|
+
created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
|
|
963
|
+
last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
|
|
964
|
+
|
|
965
|
+
metadata = {
|
|
966
|
+
'name': title,
|
|
967
|
+
'size': attachment.get('extensions', {}).get('fileSize', None),
|
|
968
|
+
'creator': created_by,
|
|
969
|
+
'created': created_date,
|
|
970
|
+
'updated': last_updated,
|
|
971
|
+
'media_type': media_type,
|
|
972
|
+
'labels': [label['name'] for label in
|
|
973
|
+
attachment.get('metadata', {}).get('labels', {}).get('results', [])],
|
|
974
|
+
'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
|
|
975
|
+
'_links', {}).get('download') else None
|
|
976
|
+
}
|
|
876
977
|
|
|
877
|
-
|
|
878
|
-
'name': title,
|
|
879
|
-
'size': attachment.get('extensions', {}).get('fileSize', None),
|
|
880
|
-
'creator': created_by,
|
|
881
|
-
'created': created_date,
|
|
882
|
-
'updated': last_updated,
|
|
883
|
-
'media_type': media_type,
|
|
884
|
-
'labels': [label['name'] for label in
|
|
885
|
-
attachment.get('metadata', {}).get('labels', {}).get('results', [])],
|
|
886
|
-
'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
|
|
887
|
-
'_links', {}).get('download') else None
|
|
888
|
-
}
|
|
978
|
+
download_url = self.base_url.rstrip('/') + attachment['_links']['download']
|
|
889
979
|
|
|
890
|
-
|
|
980
|
+
try:
|
|
981
|
+
resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
|
|
982
|
+
if resp.status_code == 200:
|
|
983
|
+
content = resp.content
|
|
984
|
+
else:
|
|
985
|
+
content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
|
|
986
|
+
except Exception as e:
|
|
987
|
+
content = f"[Error downloading content: {str(e)}]"
|
|
891
988
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
if resp.status_code == 200:
|
|
895
|
-
content = resp.content
|
|
989
|
+
if isinstance(content, str):
|
|
990
|
+
yield Document(page_content=content, metadata=metadata)
|
|
896
991
|
else:
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
yield Document(page_content=content, metadata=metadata)
|
|
903
|
-
else:
|
|
904
|
-
yield Document(page_content="", metadata={
|
|
905
|
-
**metadata,
|
|
906
|
-
IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
|
|
907
|
-
IndexerKeywords.CONTENT_IN_BYTES.value: content
|
|
908
|
-
})
|
|
992
|
+
yield Document(page_content="", metadata={
|
|
993
|
+
**metadata,
|
|
994
|
+
IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
|
|
995
|
+
IndexerKeywords.CONTENT_IN_BYTES.value: content
|
|
996
|
+
})
|
|
909
997
|
except Exception as e:
|
|
910
998
|
yield from ()
|
|
911
999
|
|
|
@@ -1648,8 +1736,15 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
1648
1736
|
"include_restricted_content": (Optional[bool], Field(description="Include restricted content.", default=False)),
|
|
1649
1737
|
"include_archived_content": (Optional[bool], Field(description="Include archived content.", default=False)),
|
|
1650
1738
|
"include_attachments": (Optional[bool], Field(description="Include attachments.", default=False)),
|
|
1739
|
+
'include_extensions': (Optional[List[str]], Field(
|
|
1740
|
+
description="List of file extensions to include when processing attachments: i.e. ['*.png', '*.jpg']. "
|
|
1741
|
+
"If empty, all files will be processed (except skip_extensions).",
|
|
1742
|
+
default=[])),
|
|
1743
|
+
'skip_extensions': (Optional[List[str]], Field(
|
|
1744
|
+
description="List of file extensions to skip when processing attachments: i.e. ['*.png', '*.jpg']",
|
|
1745
|
+
default=[])),
|
|
1651
1746
|
"include_comments": (Optional[bool], Field(description="Include comments.", default=False)),
|
|
1652
|
-
"include_labels": (Optional[bool], Field(description="Include labels.", default=
|
|
1747
|
+
"include_labels": (Optional[bool], Field(description="Include labels.", default=False)),
|
|
1653
1748
|
"ocr_languages": (Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),
|
|
1654
1749
|
"keep_markdown_format": (Optional[bool], Field(description="Keep the markdown format.", default=True)),
|
|
1655
1750
|
"keep_newlines": (Optional[bool], Field(description="Keep newlines in the content.", default=True)),
|
|
@@ -1773,4 +1868,5 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
1773
1868
|
"description": self.get_page_attachments.__doc__,
|
|
1774
1869
|
"args_schema": GetPageAttachmentsInput,
|
|
1775
1870
|
}
|
|
1776
|
-
]
|
|
1871
|
+
]
|
|
1872
|
+
|
|
@@ -3,6 +3,7 @@ from typing import Optional, List
|
|
|
3
3
|
from logging import getLogger
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from langchain_core.documents import Document
|
|
6
7
|
|
|
7
8
|
logger = getLogger(__name__)
|
|
8
9
|
from PIL import Image
|
|
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
193
194
|
else:
|
|
194
195
|
return super().process_image(link, ocr_languages)
|
|
195
196
|
|
|
197
|
+
def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
|
|
198
|
+
content_format: ContentFormat, ocr_languages: Optional[str] = None,
|
|
199
|
+
keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
|
|
200
|
+
if not page.get("title"):
|
|
201
|
+
# if 'include_restricted_content' set to True, draft pages are loaded and can have no title
|
|
202
|
+
page["title"] = "Untitled"
|
|
203
|
+
return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
|
|
204
|
+
ocr_languages, keep_markdown_format, keep_newlines)
|
|
205
|
+
|
|
196
206
|
# TODO review usage
|
|
197
207
|
# def process_svg(
|
|
198
208
|
# self,
|
|
@@ -5,7 +5,7 @@ from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
|
5
5
|
|
|
6
6
|
from .api_wrapper import OpenApiWrapper
|
|
7
7
|
from ..base.tool import BaseAction
|
|
8
|
-
from ..utils import clean_string
|
|
8
|
+
from ..utils import clean_string
|
|
9
9
|
|
|
10
10
|
name = "openapi"
|
|
11
11
|
|
|
@@ -43,14 +43,19 @@ class OpenApiToolkit(BaseToolkit):
|
|
|
43
43
|
openapi_api_wrapper = OpenApiWrapper(**kwargs)
|
|
44
44
|
available_tools = openapi_api_wrapper.get_available_tools()
|
|
45
45
|
tools = []
|
|
46
|
-
|
|
46
|
+
# Use clean toolkit name for context (max 1000 chars in description)
|
|
47
|
+
toolkit_context = f" [Toolkit: {clean_string(toolkit_name)}]" if toolkit_name else ''
|
|
47
48
|
for tool in available_tools:
|
|
48
49
|
if selected_tools and tool["name"] not in selected_tools:
|
|
49
50
|
continue
|
|
51
|
+
# Add toolkit context to description with character limit
|
|
52
|
+
description = tool["description"]
|
|
53
|
+
if toolkit_context and len(description + toolkit_context) <= 1000:
|
|
54
|
+
description = description + toolkit_context
|
|
50
55
|
tools.append(BaseAction(
|
|
51
56
|
api_wrapper=openapi_api_wrapper,
|
|
52
|
-
name=
|
|
53
|
-
description=
|
|
57
|
+
name=tool["name"],
|
|
58
|
+
description=description,
|
|
54
59
|
args_schema=tool["args_schema"]
|
|
55
60
|
))
|
|
56
61
|
return cls(tools=tools)
|
|
@@ -5,7 +5,7 @@ from pydantic import BaseModel, ConfigDict, create_model, Field, SecretStr
|
|
|
5
5
|
|
|
6
6
|
from .api_wrapper import ELITEAElasticApiWrapper
|
|
7
7
|
from ..base.tool import BaseAction
|
|
8
|
-
from ..utils import clean_string,
|
|
8
|
+
from ..utils import clean_string, get_max_toolkit_length
|
|
9
9
|
|
|
10
10
|
name = "elastic"
|
|
11
11
|
|
|
@@ -19,15 +19,13 @@ def get_tools(tool):
|
|
|
19
19
|
|
|
20
20
|
class ElasticToolkit(BaseToolkit):
|
|
21
21
|
tools: list[BaseTool] = []
|
|
22
|
-
toolkit_max_length: int = 0
|
|
23
22
|
|
|
24
23
|
@staticmethod
|
|
25
24
|
def toolkit_config_schema() -> BaseModel:
|
|
26
25
|
selected_tools = {x['name']: x['args_schema'].schema() for x in ELITEAElasticApiWrapper.model_construct().get_available_tools()}
|
|
27
|
-
ElasticToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
28
26
|
return create_model(
|
|
29
27
|
name,
|
|
30
|
-
url=(str, Field(default=None, title="Elasticsearch URL", description="Elasticsearch URL", json_schema_extra={'toolkit_name': True
|
|
28
|
+
url=(str, Field(default=None, title="Elasticsearch URL", description="Elasticsearch URL", json_schema_extra={'toolkit_name': True})),
|
|
31
29
|
api_key=(
|
|
32
30
|
Optional[SecretStr],
|
|
33
31
|
Field(
|
|
@@ -48,14 +46,17 @@ class ElasticToolkit(BaseToolkit):
|
|
|
48
46
|
elastic_api_wrapper = ELITEAElasticApiWrapper(**kwargs)
|
|
49
47
|
available_tools = elastic_api_wrapper.get_available_tools()
|
|
50
48
|
tools = []
|
|
51
|
-
prefix = clean_string(toolkit_name, ElasticToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
|
52
49
|
for tool in available_tools:
|
|
53
50
|
if selected_tools and tool["name"] not in selected_tools:
|
|
54
51
|
continue
|
|
52
|
+
description = tool["description"]
|
|
53
|
+
if toolkit_name:
|
|
54
|
+
description = f"Toolkit: {toolkit_name}\n{description}"
|
|
55
|
+
description = description[:1000]
|
|
55
56
|
tools.append(BaseAction(
|
|
56
57
|
api_wrapper=elastic_api_wrapper,
|
|
57
|
-
name=
|
|
58
|
-
description=
|
|
58
|
+
name=tool["name"],
|
|
59
|
+
description=description,
|
|
59
60
|
args_schema=tool["args_schema"]
|
|
60
61
|
))
|
|
61
62
|
return cls(tools=tools)
|