alita-sdk 0.3.392__py3-none-any.whl → 0.3.409__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/runtime/clients/client.py +3 -2
- alita_sdk/runtime/langchain/assistant.py +11 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/langraph_agent.py +40 -12
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/tools.py +72 -62
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/llm.py +3 -2
- alita_sdk/tools/__init__.py +41 -31
- alita_sdk/tools/base_indexer_toolkit.py +26 -1
- alita_sdk/tools/code_indexer_toolkit.py +13 -3
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/sharepoint/api_wrapper.py +55 -11
- alita_sdk/tools/sharepoint/authorization_helper.py +131 -1
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +9 -1
- {alita_sdk-0.3.392.dist-info → alita_sdk-0.3.409.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.392.dist-info → alita_sdk-0.3.409.dist-info}/RECORD +20 -20
- {alita_sdk-0.3.392.dist-info → alita_sdk-0.3.409.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.392.dist-info → alita_sdk-0.3.409.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.392.dist-info → alita_sdk-0.3.409.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from langchain_core.tools import ToolException
|
|
4
5
|
from langgraph.store.base import BaseStore
|
|
@@ -34,74 +35,83 @@ def get_toolkits():
|
|
|
34
35
|
return core_toolkits + community_toolkits() + alita_toolkits()
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def get_tools(tools_list: list, alita_client, llm, memory_store: BaseStore = None) -> list:
|
|
38
|
+
def get_tools(tools_list: list, alita_client, llm, memory_store: BaseStore = None, debug_mode: Optional[bool] = False) -> list:
|
|
38
39
|
prompts = []
|
|
39
40
|
tools = []
|
|
40
41
|
|
|
41
42
|
for tool in tools_list:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
# TODO: update configuration of internal tools
|
|
73
|
-
elif tool['type'] == 'internal_tool':
|
|
74
|
-
if tool['name'] == 'pyodide':
|
|
75
|
-
tools += SandboxToolkit.get_toolkit(
|
|
76
|
-
stateful=False,
|
|
77
|
-
allow_net=True,
|
|
78
|
-
alita_client=alita_client,
|
|
43
|
+
try:
|
|
44
|
+
if tool['type'] == 'datasource':
|
|
45
|
+
tools.extend(DatasourcesToolkit.get_toolkit(
|
|
46
|
+
alita_client,
|
|
47
|
+
datasource_ids=[int(tool['settings']['datasource_id'])],
|
|
48
|
+
selected_tools=tool['settings']['selected_tools'],
|
|
49
|
+
toolkit_name=tool.get('toolkit_name', '') or tool.get('name', '')
|
|
50
|
+
).get_tools())
|
|
51
|
+
elif tool['type'] == 'application' and tool.get('agent_type', '') != 'pipeline' :
|
|
52
|
+
tools.extend(ApplicationToolkit.get_toolkit(
|
|
53
|
+
alita_client,
|
|
54
|
+
application_id=int(tool['settings']['application_id']),
|
|
55
|
+
application_version_id=int(tool['settings']['application_version_id']),
|
|
56
|
+
selected_tools=[]
|
|
57
|
+
).get_tools())
|
|
58
|
+
elif tool['type'] == 'application' and tool.get('agent_type', '') == 'pipeline':
|
|
59
|
+
# static get_toolkit returns a list of CompiledStateGraph stubs
|
|
60
|
+
tools.extend(SubgraphToolkit.get_toolkit(
|
|
61
|
+
alita_client,
|
|
62
|
+
application_id=int(tool['settings']['application_id']),
|
|
63
|
+
application_version_id=int(tool['settings']['application_version_id']),
|
|
64
|
+
app_api_key=alita_client.auth_token,
|
|
65
|
+
selected_tools=[],
|
|
66
|
+
llm=llm
|
|
67
|
+
))
|
|
68
|
+
elif tool['type'] == 'memory':
|
|
69
|
+
tools += MemoryToolkit.get_toolkit(
|
|
70
|
+
namespace=tool['settings'].get('namespace', str(tool['id'])),
|
|
71
|
+
pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
|
|
72
|
+
store=memory_store,
|
|
79
73
|
).get_tools()
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
# TODO: update configuration of internal tools
|
|
75
|
+
elif tool['type'] == 'internal_tool':
|
|
76
|
+
if tool['name'] == 'pyodide':
|
|
77
|
+
tools += SandboxToolkit.get_toolkit(
|
|
78
|
+
stateful=False,
|
|
79
|
+
allow_net=True,
|
|
80
|
+
alita_client=alita_client,
|
|
84
81
|
).get_tools()
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
82
|
+
elif tool['name'] == 'image_generation':
|
|
83
|
+
if alita_client and alita_client.model_image_generation:
|
|
84
|
+
tools += ImageGenerationToolkit.get_toolkit(
|
|
85
|
+
client=alita_client,
|
|
86
|
+
).get_tools()
|
|
87
|
+
else:
|
|
88
|
+
logger.warning("Image generation internal tool requested "
|
|
89
|
+
"but no image generation model configured")
|
|
90
|
+
elif tool['type'] == 'artifact':
|
|
91
|
+
tools.extend(ArtifactToolkit.get_toolkit(
|
|
92
|
+
client=alita_client,
|
|
93
|
+
bucket=tool['settings']['bucket'],
|
|
94
|
+
toolkit_name=tool.get('toolkit_name', ''),
|
|
95
|
+
selected_tools=tool['settings'].get('selected_tools', []),
|
|
96
|
+
llm=llm,
|
|
97
|
+
# indexer settings
|
|
98
|
+
pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
|
|
99
|
+
embedding_model=tool['settings'].get('embedding_model'),
|
|
100
|
+
collection_name=f"{tool.get('toolkit_name')}",
|
|
101
|
+
collection_schema = str(tool['id'])
|
|
102
|
+
).get_tools())
|
|
103
|
+
elif tool['type'] == 'vectorstore':
|
|
104
|
+
tools.extend(VectorStoreToolkit.get_toolkit(
|
|
105
|
+
llm=llm,
|
|
106
|
+
toolkit_name=tool.get('toolkit_name', ''),
|
|
107
|
+
**tool['settings']).get_tools())
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.error(f"Error initializing toolkit for tool '{tool.get('name', 'unknown')}': {e}", exc_info=True)
|
|
110
|
+
if debug_mode:
|
|
111
|
+
logger.info("Skipping tool initialization error due to debug mode.")
|
|
112
|
+
continue
|
|
113
|
+
else:
|
|
114
|
+
raise ToolException(f"Error initializing toolkit for tool '{tool.get('name', 'unknown')}': {e}")
|
|
105
115
|
|
|
106
116
|
if len(prompts) > 0:
|
|
107
117
|
tools += PromptToolkit.get_toolkit(alita_client, prompts).get_tools()
|
|
@@ -50,6 +50,8 @@ class Application(BaseTool):
|
|
|
50
50
|
application: Any
|
|
51
51
|
args_schema: Type[BaseModel] = applicationToolSchema
|
|
52
52
|
return_type: str = "str"
|
|
53
|
+
client: Any
|
|
54
|
+
args_runnable: dict = {}
|
|
53
55
|
|
|
54
56
|
@field_validator('name', mode='before')
|
|
55
57
|
@classmethod
|
|
@@ -66,6 +68,11 @@ class Application(BaseTool):
|
|
|
66
68
|
return self._run(*config, **all_kwargs)
|
|
67
69
|
|
|
68
70
|
def _run(self, *args, **kwargs):
|
|
71
|
+
if self.client and self.args_runnable:
|
|
72
|
+
# Recreate new LanggraphAgentRunnable in order to reflect the current input_mapping (it can be dynamic for pipelines).
|
|
73
|
+
# Actually, for pipelines agent toolkits LanggraphAgentRunnable is created (for LLMNode) before pipeline's schema parsing.
|
|
74
|
+
application_variables = {k: {"name": k, "value": v} for k, v in kwargs.items()}
|
|
75
|
+
self.application = self.client.application(**self.args_runnable, application_variables=application_variables)
|
|
69
76
|
response = self.application.invoke(formulate_query(kwargs))
|
|
70
77
|
if self.return_type == "str":
|
|
71
78
|
return response["output"]
|
alita_sdk/runtime/tools/llm.py
CHANGED
|
@@ -88,8 +88,7 @@ class LLMNode(BaseTool):
|
|
|
88
88
|
raise ToolException(f"LLMNode requires 'system' and 'task' parameters in input mapping. "
|
|
89
89
|
f"Actual params: {func_args}")
|
|
90
90
|
# cast to str in case user passes variable different from str
|
|
91
|
-
messages = [SystemMessage(content=str(func_args.get('system'))), HumanMessage(content=str(func_args.get('task')))]
|
|
92
|
-
messages.extend(func_args.get('chat_history', []))
|
|
91
|
+
messages = [SystemMessage(content=str(func_args.get('system'))), *func_args.get('chat_history', []), HumanMessage(content=str(func_args.get('task')))]
|
|
93
92
|
else:
|
|
94
93
|
# Flow for chat-based LLM node w/o prompt/task from pipeline but with messages in state
|
|
95
94
|
# verify messages structure
|
|
@@ -150,6 +149,8 @@ class LLMNode(BaseTool):
|
|
|
150
149
|
|
|
151
150
|
output_msgs = {"messages": new_messages}
|
|
152
151
|
if self.output_variables:
|
|
152
|
+
if self.output_variables[0] == 'messages':
|
|
153
|
+
return output_msgs
|
|
153
154
|
output_msgs[self.output_variables[0]] = current_completion.content if current_completion else None
|
|
154
155
|
|
|
155
156
|
return output_msgs
|
alita_sdk/tools/__init__.py
CHANGED
|
@@ -90,64 +90,74 @@ available_count = len(AVAILABLE_TOOLS)
|
|
|
90
90
|
total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
|
|
91
91
|
logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
|
|
92
92
|
|
|
93
|
+
|
|
93
94
|
def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
|
|
94
95
|
tools = []
|
|
96
|
+
|
|
95
97
|
for tool in tools_list:
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if not tool.get('settings'):
|
|
98
|
+
settings = tool.get('settings')
|
|
99
|
+
|
|
100
|
+
# Skip tools without settings early
|
|
101
|
+
if not settings:
|
|
101
102
|
logger.warning(f"Tool '{tool.get('type', '')}' has no settings, skipping...")
|
|
102
103
|
continue
|
|
103
|
-
|
|
104
|
-
tool
|
|
105
|
-
|
|
104
|
+
|
|
105
|
+
# Validate tool names once
|
|
106
|
+
selected_tools = settings.get('selected_tools', [])
|
|
107
|
+
invalid_tools = [name for name in selected_tools if isinstance(name, str) and name.startswith('_')]
|
|
108
|
+
if invalid_tools:
|
|
109
|
+
raise ValueError(f"Tool names {invalid_tools} from toolkit '{tool.get('type', '')}' cannot start with '_'")
|
|
110
|
+
|
|
111
|
+
# Cache tool type and add common settings
|
|
106
112
|
tool_type = tool['type']
|
|
113
|
+
settings['alita'] = alita
|
|
114
|
+
settings['llm'] = llm
|
|
115
|
+
settings['store'] = store
|
|
107
116
|
|
|
108
|
-
#
|
|
117
|
+
# Set pgvector collection schema if present
|
|
118
|
+
if settings.get('pgvector_configuration'):
|
|
119
|
+
settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
|
|
120
|
+
|
|
121
|
+
# Handle ADO special cases
|
|
109
122
|
if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
|
|
110
123
|
tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
|
|
124
|
+
continue
|
|
111
125
|
|
|
112
|
-
#
|
|
113
|
-
|
|
126
|
+
# Handle ADO repos aliases
|
|
127
|
+
if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
|
|
114
128
|
try:
|
|
115
|
-
|
|
116
|
-
tools.extend(get_tools_func(tool))
|
|
117
|
-
|
|
129
|
+
tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
|
|
118
130
|
except Exception as e:
|
|
119
|
-
logger.error(f"Error getting
|
|
120
|
-
|
|
131
|
+
logger.error(f"Error getting ADO repos tools: {e}")
|
|
132
|
+
continue
|
|
121
133
|
|
|
122
|
-
# Handle
|
|
123
|
-
|
|
134
|
+
# Handle standard tools
|
|
135
|
+
if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
|
|
124
136
|
try:
|
|
125
|
-
|
|
126
|
-
tools.extend(get_tools_func(tool))
|
|
137
|
+
tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
|
|
127
138
|
except Exception as e:
|
|
128
|
-
logger.error(f"Error getting
|
|
139
|
+
logger.error(f"Error getting tools for {tool_type}: {e}")
|
|
140
|
+
raise ToolException(f"Error getting tools for {tool_type}: {e}")
|
|
141
|
+
continue
|
|
129
142
|
|
|
130
143
|
# Handle custom modules
|
|
131
|
-
|
|
144
|
+
if settings.get("module"):
|
|
132
145
|
try:
|
|
133
|
-
settings = tool.get("settings", {})
|
|
134
146
|
mod = import_module(settings.pop("module"))
|
|
135
147
|
tkitclass = getattr(mod, settings.pop("class"))
|
|
136
|
-
|
|
137
|
-
get_toolkit_params = tool["settings"].copy()
|
|
148
|
+
get_toolkit_params = settings.copy()
|
|
138
149
|
get_toolkit_params["name"] = tool.get("name")
|
|
139
|
-
#
|
|
140
150
|
toolkit = tkitclass.get_toolkit(**get_toolkit_params)
|
|
141
151
|
tools.extend(toolkit.get_tools())
|
|
142
152
|
except Exception as e:
|
|
143
153
|
logger.error(f"Error in getting custom toolkit: {e}")
|
|
154
|
+
continue
|
|
144
155
|
|
|
156
|
+
# Tool not available
|
|
157
|
+
if tool_type in FAILED_IMPORTS:
|
|
158
|
+
logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
|
|
145
159
|
else:
|
|
146
|
-
|
|
147
|
-
if tool_type in FAILED_IMPORTS:
|
|
148
|
-
logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
|
|
149
|
-
else:
|
|
150
|
-
logger.warning(f"Unknown tool type: {tool_type}")
|
|
160
|
+
logger.warning(f"Unknown tool type: {tool_type}")
|
|
151
161
|
|
|
152
162
|
return tools
|
|
153
163
|
|
|
@@ -110,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
110
110
|
def __init__(self, **kwargs):
|
|
111
111
|
conn = kwargs.get('connection_string', None)
|
|
112
112
|
connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
|
|
113
|
-
collection_name = kwargs.get('
|
|
113
|
+
collection_name = kwargs.get('collection_schema')
|
|
114
114
|
|
|
115
115
|
if 'vectorstore_type' not in kwargs:
|
|
116
116
|
kwargs['vectorstore_type'] = 'PGVector'
|
|
@@ -160,6 +160,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
160
160
|
if clean_index:
|
|
161
161
|
self._clean_index(index_name)
|
|
162
162
|
#
|
|
163
|
+
self.index_meta_init(index_name, kwargs)
|
|
164
|
+
#
|
|
163
165
|
self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
|
|
164
166
|
self._log_tool_event(f"Loading the documents to index...{kwargs}")
|
|
165
167
|
documents = self._base_loader(**kwargs)
|
|
@@ -454,6 +456,29 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
454
456
|
reranking_config=reranking_config,
|
|
455
457
|
extended_search=extended_search
|
|
456
458
|
)
|
|
459
|
+
|
|
460
|
+
def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
|
|
461
|
+
index_meta = super().get_index_meta(index_name)
|
|
462
|
+
if not index_meta:
|
|
463
|
+
self._log_tool_event(
|
|
464
|
+
f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
|
|
465
|
+
tool_name="index_data"
|
|
466
|
+
)
|
|
467
|
+
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
468
|
+
created_on = time.time()
|
|
469
|
+
metadata = {
|
|
470
|
+
"collection": index_name,
|
|
471
|
+
"type": IndexerKeywords.INDEX_META_TYPE.value,
|
|
472
|
+
"indexed": 0,
|
|
473
|
+
"state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
|
|
474
|
+
"index_configuration": index_configuration,
|
|
475
|
+
"created_on": created_on,
|
|
476
|
+
"updated_on": created_on,
|
|
477
|
+
"history": "[]",
|
|
478
|
+
"task_id": None,
|
|
479
|
+
}
|
|
480
|
+
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
481
|
+
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
|
|
457
482
|
|
|
458
483
|
def index_meta_update(self, index_name: str, state: str, result: int):
|
|
459
484
|
index_meta_raw = super().get_index_meta(index_name)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import fnmatch
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
from typing import Optional, List, Generator
|
|
5
6
|
|
|
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
21
22
|
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
22
23
|
|
|
23
24
|
def key_fn(self, document: Document):
|
|
24
|
-
return document.metadata.get(
|
|
25
|
+
return document.metadata.get("filename")
|
|
25
26
|
|
|
26
27
|
def compare_fn(self, document: Document, idx_data):
|
|
27
28
|
return (document.metadata.get('commit_hash') and
|
|
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
def _extend_data(self, documents: Generator[Document, None, None]):
|
|
49
|
-
yield from
|
|
50
|
+
yield from documents
|
|
50
51
|
|
|
51
52
|
def _index_tool_params(self):
|
|
52
53
|
"""Return the parameters for indexing data."""
|
|
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
117
118
|
if not file_content:
|
|
118
119
|
# empty file, skip
|
|
119
120
|
continue
|
|
121
|
+
#
|
|
122
|
+
# ensure file content is a string
|
|
123
|
+
if isinstance(file_content, bytes):
|
|
124
|
+
file_content = file_content.decode("utf-8", errors="ignore")
|
|
125
|
+
elif isinstance(file_content, dict) and file.endswith('.json'):
|
|
126
|
+
file_content = json.dumps(file_content)
|
|
127
|
+
elif not isinstance(file_content, str):
|
|
128
|
+
file_content = str(file_content)
|
|
129
|
+
#
|
|
120
130
|
# hash the file content to ensure uniqueness
|
|
121
131
|
import hashlib
|
|
122
132
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
127
137
|
self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
|
|
128
138
|
self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
|
|
129
139
|
|
|
130
|
-
return file_content_generator()
|
|
140
|
+
return parse_code_files_for_db(file_content_generator())
|
|
131
141
|
|
|
132
142
|
def __handle_get_files(self, path: str, branch: str):
|
|
133
143
|
"""
|
|
@@ -3,6 +3,7 @@ from typing import Optional, List
|
|
|
3
3
|
from logging import getLogger
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from langchain_core.documents import Document
|
|
6
7
|
|
|
7
8
|
logger = getLogger(__name__)
|
|
8
9
|
from PIL import Image
|
|
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
193
194
|
else:
|
|
194
195
|
return super().process_image(link, ocr_languages)
|
|
195
196
|
|
|
197
|
+
def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
|
|
198
|
+
content_format: ContentFormat, ocr_languages: Optional[str] = None,
|
|
199
|
+
keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
|
|
200
|
+
if not page.get("title"):
|
|
201
|
+
# if 'include_restricted_content' set to True, draft pages are loaded and can have no title
|
|
202
|
+
page["title"] = "Untitled"
|
|
203
|
+
return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
|
|
204
|
+
ocr_languages, keep_markdown_format, keep_newlines)
|
|
205
|
+
|
|
196
206
|
# TODO review usage
|
|
197
207
|
# def process_svg(
|
|
198
208
|
# self,
|
|
@@ -127,8 +127,23 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
|
|
|
127
127
|
result.append(temp_props)
|
|
128
128
|
return result if result else ToolException("Can not get files or folder is empty. Please, double check folder name and read permissions.")
|
|
129
129
|
except Exception as e:
|
|
130
|
-
|
|
131
|
-
|
|
130
|
+
# attempt to get via graph api
|
|
131
|
+
try:
|
|
132
|
+
# attempt to get files via graph api
|
|
133
|
+
from .authorization_helper import SharepointAuthorizationHelper
|
|
134
|
+
auth_helper = SharepointAuthorizationHelper(
|
|
135
|
+
client_id=self.client_id,
|
|
136
|
+
client_secret=self.client_secret.get_secret_value(),
|
|
137
|
+
tenant="", # optional for graph api
|
|
138
|
+
scope="", # optional for graph api
|
|
139
|
+
token_json="", # optional for graph api
|
|
140
|
+
)
|
|
141
|
+
files = auth_helper.get_files_list(self.site_url, folder_name, limit_files)
|
|
142
|
+
return files
|
|
143
|
+
except Exception as graph_e:
|
|
144
|
+
logging.error(f"Failed to load files from sharepoint via base api: {e}")
|
|
145
|
+
logging.error(f"Failed to load files from sharepoint via graph api: {graph_e}")
|
|
146
|
+
return ToolException(f"Can not get files. Please, double check folder name and read permissions: {e} and {graph_e}")
|
|
132
147
|
|
|
133
148
|
def read_file(self, path,
|
|
134
149
|
is_capture_image: bool = False,
|
|
@@ -141,11 +156,28 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
|
|
|
141
156
|
self._client.load(file).execute_query()
|
|
142
157
|
|
|
143
158
|
file_content = file.read()
|
|
159
|
+
file_name = file.name
|
|
144
160
|
self._client.execute_query()
|
|
145
161
|
except Exception as e:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
162
|
+
# attempt to get via graph api
|
|
163
|
+
try:
|
|
164
|
+
# attempt to get files via graph api
|
|
165
|
+
from .authorization_helper import SharepointAuthorizationHelper
|
|
166
|
+
auth_helper = SharepointAuthorizationHelper(
|
|
167
|
+
client_id=self.client_id,
|
|
168
|
+
client_secret=self.client_secret.get_secret_value(),
|
|
169
|
+
tenant="", # optional for graph api
|
|
170
|
+
scope="", # optional for graph api
|
|
171
|
+
token_json="", # optional for graph api
|
|
172
|
+
)
|
|
173
|
+
file_content = auth_helper.get_file_content(self.site_url, path)
|
|
174
|
+
file_name = path.split('/')[-1]
|
|
175
|
+
except Exception as graph_e:
|
|
176
|
+
logging.error(f"Failed to load file from SharePoint via base api: {e}. Path: {path}. Please, double check file name and path.")
|
|
177
|
+
logging.error(f"Failed to load file from SharePoint via graph api: {graph_e}. Path: {path}. Please, double check file name and path.")
|
|
178
|
+
return ToolException(f"File not found. Please, check file name and path: {e} and {graph_e}")
|
|
179
|
+
#
|
|
180
|
+
return parse_file_content(file_name=file_name,
|
|
149
181
|
file_content=file_content,
|
|
150
182
|
is_capture_image=is_capture_image,
|
|
151
183
|
page_number=page_number,
|
|
@@ -219,12 +251,24 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
|
|
|
219
251
|
yield document
|
|
220
252
|
|
|
221
253
|
def _load_file_content_in_bytes(self, path):
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
254
|
+
try:
|
|
255
|
+
file = self._client.web.get_file_by_server_relative_path(path)
|
|
256
|
+
self._client.load(file).execute_query()
|
|
257
|
+
file_content = file.read()
|
|
258
|
+
self._client.execute_query()
|
|
259
|
+
#
|
|
260
|
+
return file_content
|
|
261
|
+
except Exception as e:
|
|
262
|
+
# attempt to get via graph api
|
|
263
|
+
from .authorization_helper import SharepointAuthorizationHelper
|
|
264
|
+
auth_helper = SharepointAuthorizationHelper(
|
|
265
|
+
client_id=self.client_id,
|
|
266
|
+
client_secret=self.client_secret.get_secret_value(),
|
|
267
|
+
tenant="", # optional for graph api
|
|
268
|
+
scope="", # optional for graph api
|
|
269
|
+
token_json="", # optional for graph api
|
|
270
|
+
)
|
|
271
|
+
return auth_helper.get_file_content(self.site_url, path)
|
|
228
272
|
|
|
229
273
|
def get_available_tools(self):
|
|
230
274
|
return super().get_available_tools() + [
|