dtSpark 1.0.10__py3-none-any.whl → 1.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtSpark/_version.txt +1 -1
- dtSpark/core/application.py +226 -0
- dtSpark/mcp_integration/tool_selector.py +5 -0
- dtSpark/resources/config.yaml.template +42 -0
- dtSpark/tools/builtin.py +1438 -0
- dtSpark/web/endpoints/chat.py +147 -0
- dtSpark/web/endpoints/main_menu.py +75 -0
- dtSpark/web/templates/chat.html +300 -0
- dtSpark/web/templates/main_menu.html +71 -29
- {dtspark-1.0.10.dist-info → dtspark-1.1.0a1.dist-info}/METADATA +7 -1
- {dtspark-1.0.10.dist-info → dtspark-1.1.0a1.dist-info}/RECORD +15 -15
- {dtspark-1.0.10.dist-info → dtspark-1.1.0a1.dist-info}/WHEEL +0 -0
- {dtspark-1.0.10.dist-info → dtspark-1.1.0a1.dist-info}/entry_points.txt +0 -0
- {dtspark-1.0.10.dist-info → dtspark-1.1.0a1.dist-info}/licenses/LICENSE +0 -0
- {dtspark-1.0.10.dist-info → dtspark-1.1.0a1.dist-info}/top_level.txt +0 -0
dtSpark/tools/builtin.py
CHANGED
|
@@ -63,6 +63,20 @@ def get_builtin_tools(config: Optional[Dict[str, Any]] = None) -> List[Dict[str,
|
|
|
63
63
|
tools.extend(fs_tools)
|
|
64
64
|
logging.info(f"Embedded filesystem tools enabled: {len(fs_tools)} tools added")
|
|
65
65
|
|
|
66
|
+
# Add document tools if enabled
|
|
67
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
68
|
+
if doc_config.get('enabled', False):
|
|
69
|
+
doc_tools = _get_document_tools(doc_config)
|
|
70
|
+
tools.extend(doc_tools)
|
|
71
|
+
logging.info(f"Embedded document tools enabled: {len(doc_tools)} tools added")
|
|
72
|
+
|
|
73
|
+
# Add archive tools if enabled
|
|
74
|
+
archive_config = config.get('embedded_tools', {}).get('archives', {})
|
|
75
|
+
if archive_config.get('enabled', False):
|
|
76
|
+
archive_tools = _get_archive_tools(archive_config)
|
|
77
|
+
tools.extend(archive_tools)
|
|
78
|
+
logging.info(f"Embedded archive tools enabled: {len(archive_tools)} tools added")
|
|
79
|
+
|
|
66
80
|
return tools
|
|
67
81
|
|
|
68
82
|
|
|
@@ -99,6 +113,33 @@ def execute_builtin_tool(tool_name: str, tool_input: Dict[str, Any],
|
|
|
99
113
|
return _execute_write_file(tool_input, config)
|
|
100
114
|
elif tool_name == "create_directories":
|
|
101
115
|
return _execute_create_directories(tool_input, config)
|
|
116
|
+
|
|
117
|
+
# Document tools
|
|
118
|
+
elif tool_name == "get_file_info":
|
|
119
|
+
return _execute_get_file_info(tool_input, config)
|
|
120
|
+
elif tool_name == "read_word_document":
|
|
121
|
+
return _execute_read_word_document(tool_input, config)
|
|
122
|
+
elif tool_name == "read_excel_document":
|
|
123
|
+
return _execute_read_excel_document(tool_input, config)
|
|
124
|
+
elif tool_name == "read_powerpoint_document":
|
|
125
|
+
return _execute_read_powerpoint_document(tool_input, config)
|
|
126
|
+
elif tool_name == "read_pdf_document":
|
|
127
|
+
return _execute_read_pdf_document(tool_input, config)
|
|
128
|
+
elif tool_name == "create_word_document":
|
|
129
|
+
return _execute_create_word_document(tool_input, config)
|
|
130
|
+
elif tool_name == "create_excel_document":
|
|
131
|
+
return _execute_create_excel_document(tool_input, config)
|
|
132
|
+
elif tool_name == "create_powerpoint_document":
|
|
133
|
+
return _execute_create_powerpoint_document(tool_input, config)
|
|
134
|
+
|
|
135
|
+
# Archive tools
|
|
136
|
+
elif tool_name == "list_archive_contents":
|
|
137
|
+
return _execute_list_archive_contents(tool_input, config)
|
|
138
|
+
elif tool_name == "read_archive_file":
|
|
139
|
+
return _execute_read_archive_file(tool_input, config)
|
|
140
|
+
elif tool_name == "extract_archive":
|
|
141
|
+
return _execute_extract_archive(tool_input, config)
|
|
142
|
+
|
|
102
143
|
else:
|
|
103
144
|
return {
|
|
104
145
|
"success": False,
|
|
@@ -831,3 +872,1400 @@ def _execute_create_directories(tool_input: Dict[str, Any],
|
|
|
831
872
|
except Exception as e:
|
|
832
873
|
logging.error(f"Error creating directories {dir_path}: {e}")
|
|
833
874
|
return {"success": False, "error": str(e)}
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
# ============================================================================
|
|
878
|
+
# Document Tools
|
|
879
|
+
# ============================================================================
|
|
880
|
+
|
|
881
|
+
def _get_document_tools(doc_config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
882
|
+
"""
|
|
883
|
+
Get document tool definitions based on configuration.
|
|
884
|
+
|
|
885
|
+
Args:
|
|
886
|
+
doc_config: Document tools configuration dictionary
|
|
887
|
+
|
|
888
|
+
Returns:
|
|
889
|
+
List of document tool definitions
|
|
890
|
+
"""
|
|
891
|
+
access_mode = doc_config.get('access_mode', 'read')
|
|
892
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
893
|
+
|
|
894
|
+
# File info tool (always included)
|
|
895
|
+
tools = [
|
|
896
|
+
{
|
|
897
|
+
"name": "get_file_info",
|
|
898
|
+
"description": f"Get detailed file information including type, size, MIME type, and extension. "
|
|
899
|
+
f"Works for any file within the allowed path ({allowed_path}). "
|
|
900
|
+
"Useful for determining how to process a file before reading it.",
|
|
901
|
+
"input_schema": {
|
|
902
|
+
"type": "object",
|
|
903
|
+
"properties": {
|
|
904
|
+
"path": {
|
|
905
|
+
"type": "string",
|
|
906
|
+
"description": "Path to the file (relative to allowed path or absolute within allowed path)"
|
|
907
|
+
}
|
|
908
|
+
},
|
|
909
|
+
"required": ["path"]
|
|
910
|
+
}
|
|
911
|
+
}
|
|
912
|
+
]
|
|
913
|
+
|
|
914
|
+
# Read-only document tools
|
|
915
|
+
tools.extend([
|
|
916
|
+
{
|
|
917
|
+
"name": "read_word_document",
|
|
918
|
+
"description": f"Extract text content from Microsoft Word documents (.docx) within the allowed path ({allowed_path}). "
|
|
919
|
+
"Returns the document text organised by paragraphs. Also extracts headings and tables if present.",
|
|
920
|
+
"input_schema": {
|
|
921
|
+
"type": "object",
|
|
922
|
+
"properties": {
|
|
923
|
+
"path": {
|
|
924
|
+
"type": "string",
|
|
925
|
+
"description": "Path to the .docx file"
|
|
926
|
+
},
|
|
927
|
+
"include_tables": {
|
|
928
|
+
"type": "boolean",
|
|
929
|
+
"description": "Include table content in the output",
|
|
930
|
+
"default": True
|
|
931
|
+
},
|
|
932
|
+
"include_headers_footers": {
|
|
933
|
+
"type": "boolean",
|
|
934
|
+
"description": "Include header and footer content",
|
|
935
|
+
"default": False
|
|
936
|
+
}
|
|
937
|
+
},
|
|
938
|
+
"required": ["path"]
|
|
939
|
+
}
|
|
940
|
+
},
|
|
941
|
+
{
|
|
942
|
+
"name": "read_excel_document",
|
|
943
|
+
"description": f"Extract data from Microsoft Excel documents (.xlsx) within the allowed path ({allowed_path}). "
|
|
944
|
+
"Returns spreadsheet data as structured JSON. Can read specific sheets or all sheets.",
|
|
945
|
+
"input_schema": {
|
|
946
|
+
"type": "object",
|
|
947
|
+
"properties": {
|
|
948
|
+
"path": {
|
|
949
|
+
"type": "string",
|
|
950
|
+
"description": "Path to the .xlsx file"
|
|
951
|
+
},
|
|
952
|
+
"sheet_name": {
|
|
953
|
+
"type": "string",
|
|
954
|
+
"description": "Specific sheet name to read. If not provided, reads the active sheet."
|
|
955
|
+
},
|
|
956
|
+
"include_all_sheets": {
|
|
957
|
+
"type": "boolean",
|
|
958
|
+
"description": "Read all sheets in the workbook",
|
|
959
|
+
"default": False
|
|
960
|
+
},
|
|
961
|
+
"max_rows": {
|
|
962
|
+
"type": "integer",
|
|
963
|
+
"description": "Maximum number of rows to read (0 = use config default)",
|
|
964
|
+
"default": 0
|
|
965
|
+
}
|
|
966
|
+
},
|
|
967
|
+
"required": ["path"]
|
|
968
|
+
}
|
|
969
|
+
},
|
|
970
|
+
{
|
|
971
|
+
"name": "read_powerpoint_document",
|
|
972
|
+
"description": f"Extract text content from Microsoft PowerPoint documents (.pptx) within the allowed path ({allowed_path}). "
|
|
973
|
+
"Returns text organised by slide, including titles, body text, and notes.",
|
|
974
|
+
"input_schema": {
|
|
975
|
+
"type": "object",
|
|
976
|
+
"properties": {
|
|
977
|
+
"path": {
|
|
978
|
+
"type": "string",
|
|
979
|
+
"description": "Path to the .pptx file"
|
|
980
|
+
},
|
|
981
|
+
"include_notes": {
|
|
982
|
+
"type": "boolean",
|
|
983
|
+
"description": "Include speaker notes in the output",
|
|
984
|
+
"default": True
|
|
985
|
+
}
|
|
986
|
+
},
|
|
987
|
+
"required": ["path"]
|
|
988
|
+
}
|
|
989
|
+
},
|
|
990
|
+
{
|
|
991
|
+
"name": "read_pdf_document",
|
|
992
|
+
"description": f"Extract text content from PDF documents within the allowed path ({allowed_path}). "
|
|
993
|
+
"Returns text organised by page. Can extract metadata and specific pages.",
|
|
994
|
+
"input_schema": {
|
|
995
|
+
"type": "object",
|
|
996
|
+
"properties": {
|
|
997
|
+
"path": {
|
|
998
|
+
"type": "string",
|
|
999
|
+
"description": "Path to the .pdf file"
|
|
1000
|
+
},
|
|
1001
|
+
"page_numbers": {
|
|
1002
|
+
"type": "array",
|
|
1003
|
+
"items": {"type": "integer"},
|
|
1004
|
+
"description": "Specific page numbers to extract (1-indexed). If not provided, extracts all pages."
|
|
1005
|
+
},
|
|
1006
|
+
"include_metadata": {
|
|
1007
|
+
"type": "boolean",
|
|
1008
|
+
"description": "Include document metadata (author, title, etc.)",
|
|
1009
|
+
"default": True
|
|
1010
|
+
}
|
|
1011
|
+
},
|
|
1012
|
+
"required": ["path"]
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
])
|
|
1016
|
+
|
|
1017
|
+
# Write/create tools (only if access_mode is read_write)
|
|
1018
|
+
if access_mode == 'read_write':
|
|
1019
|
+
tools.extend([
|
|
1020
|
+
{
|
|
1021
|
+
"name": "create_word_document",
|
|
1022
|
+
"description": f"Create a Microsoft Word document (.docx) within the allowed path ({allowed_path}). "
|
|
1023
|
+
"Supports creating from scratch with structured content, or using a template with placeholder replacement. "
|
|
1024
|
+
"When using a template, placeholders in the format {{{{placeholder_name}}}} will be replaced with provided values.",
|
|
1025
|
+
"input_schema": {
|
|
1026
|
+
"type": "object",
|
|
1027
|
+
"properties": {
|
|
1028
|
+
"path": {
|
|
1029
|
+
"type": "string",
|
|
1030
|
+
"description": "Path for the output .docx file"
|
|
1031
|
+
},
|
|
1032
|
+
"content": {
|
|
1033
|
+
"type": "object",
|
|
1034
|
+
"description": "Document content structure",
|
|
1035
|
+
"properties": {
|
|
1036
|
+
"title": {"type": "string", "description": "Document title"},
|
|
1037
|
+
"paragraphs": {
|
|
1038
|
+
"type": "array",
|
|
1039
|
+
"items": {
|
|
1040
|
+
"type": "object",
|
|
1041
|
+
"properties": {
|
|
1042
|
+
"text": {"type": "string"},
|
|
1043
|
+
"style": {"type": "string", "description": "Style: Normal, Heading 1, Heading 2, Heading 3, Title"}
|
|
1044
|
+
}
|
|
1045
|
+
},
|
|
1046
|
+
"description": "List of paragraphs with optional styles"
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
},
|
|
1050
|
+
"template_path": {
|
|
1051
|
+
"type": "string",
|
|
1052
|
+
"description": "Path to a .docx template file. If provided, placeholders will be replaced."
|
|
1053
|
+
},
|
|
1054
|
+
"placeholders": {
|
|
1055
|
+
"type": "object",
|
|
1056
|
+
"description": "Dictionary of placeholder names to values for template replacement",
|
|
1057
|
+
"additionalProperties": {"type": "string"}
|
|
1058
|
+
}
|
|
1059
|
+
},
|
|
1060
|
+
"required": ["path"]
|
|
1061
|
+
}
|
|
1062
|
+
},
|
|
1063
|
+
{
|
|
1064
|
+
"name": "create_excel_document",
|
|
1065
|
+
"description": f"Create a Microsoft Excel document (.xlsx) within the allowed path ({allowed_path}). "
|
|
1066
|
+
"Creates spreadsheets from structured data. Supports multiple sheets.",
|
|
1067
|
+
"input_schema": {
|
|
1068
|
+
"type": "object",
|
|
1069
|
+
"properties": {
|
|
1070
|
+
"path": {
|
|
1071
|
+
"type": "string",
|
|
1072
|
+
"description": "Path for the output .xlsx file"
|
|
1073
|
+
},
|
|
1074
|
+
"sheets": {
|
|
1075
|
+
"type": "array",
|
|
1076
|
+
"items": {
|
|
1077
|
+
"type": "object",
|
|
1078
|
+
"properties": {
|
|
1079
|
+
"name": {"type": "string", "description": "Sheet name"},
|
|
1080
|
+
"headers": {"type": "array", "items": {"type": "string"}, "description": "Column headers"},
|
|
1081
|
+
"data": {
|
|
1082
|
+
"type": "array",
|
|
1083
|
+
"items": {"type": "array"},
|
|
1084
|
+
"description": "2D array of cell values (rows of columns)"
|
|
1085
|
+
}
|
|
1086
|
+
},
|
|
1087
|
+
"required": ["name", "data"]
|
|
1088
|
+
},
|
|
1089
|
+
"description": "List of sheets to create"
|
|
1090
|
+
}
|
|
1091
|
+
},
|
|
1092
|
+
"required": ["path", "sheets"]
|
|
1093
|
+
}
|
|
1094
|
+
},
|
|
1095
|
+
{
|
|
1096
|
+
"name": "create_powerpoint_document",
|
|
1097
|
+
"description": f"Create a Microsoft PowerPoint document (.pptx) within the allowed path ({allowed_path}). "
|
|
1098
|
+
"Creates presentations with title and content slides. Supports templates with placeholder replacement.",
|
|
1099
|
+
"input_schema": {
|
|
1100
|
+
"type": "object",
|
|
1101
|
+
"properties": {
|
|
1102
|
+
"path": {
|
|
1103
|
+
"type": "string",
|
|
1104
|
+
"description": "Path for the output .pptx file"
|
|
1105
|
+
},
|
|
1106
|
+
"slides": {
|
|
1107
|
+
"type": "array",
|
|
1108
|
+
"items": {
|
|
1109
|
+
"type": "object",
|
|
1110
|
+
"properties": {
|
|
1111
|
+
"layout": {
|
|
1112
|
+
"type": "string",
|
|
1113
|
+
"description": "Slide layout: title, title_content, content, blank"
|
|
1114
|
+
},
|
|
1115
|
+
"title": {"type": "string", "description": "Slide title"},
|
|
1116
|
+
"content": {
|
|
1117
|
+
"type": "array",
|
|
1118
|
+
"items": {"type": "string"},
|
|
1119
|
+
"description": "Bullet points or paragraphs"
|
|
1120
|
+
},
|
|
1121
|
+
"notes": {"type": "string", "description": "Speaker notes"}
|
|
1122
|
+
}
|
|
1123
|
+
},
|
|
1124
|
+
"description": "List of slides to create"
|
|
1125
|
+
},
|
|
1126
|
+
"template_path": {
|
|
1127
|
+
"type": "string",
|
|
1128
|
+
"description": "Path to a .pptx template file"
|
|
1129
|
+
},
|
|
1130
|
+
"placeholders": {
|
|
1131
|
+
"type": "object",
|
|
1132
|
+
"description": "Dictionary of placeholder names to values for template replacement",
|
|
1133
|
+
"additionalProperties": {"type": "string"}
|
|
1134
|
+
}
|
|
1135
|
+
},
|
|
1136
|
+
"required": ["path", "slides"]
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
])
|
|
1140
|
+
|
|
1141
|
+
return tools
|
|
1142
|
+
|
|
1143
|
+
|
|
1144
|
+
def _execute_get_file_info(tool_input: Dict[str, Any],
|
|
1145
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1146
|
+
"""Execute the get_file_info tool."""
|
|
1147
|
+
if not config.get('embedded_tools'):
|
|
1148
|
+
return {"success": False, "error": "Document tools not configured"}
|
|
1149
|
+
|
|
1150
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
1151
|
+
if not doc_config.get('enabled', False):
|
|
1152
|
+
return {"success": False, "error": "Document tools are not enabled"}
|
|
1153
|
+
|
|
1154
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
1155
|
+
file_path = tool_input.get('path')
|
|
1156
|
+
|
|
1157
|
+
if not file_path:
|
|
1158
|
+
return {"success": False, "error": "File path is required"}
|
|
1159
|
+
|
|
1160
|
+
validation = _validate_path(file_path, allowed_path)
|
|
1161
|
+
if not validation['valid']:
|
|
1162
|
+
return {"success": False, "error": validation['error']}
|
|
1163
|
+
|
|
1164
|
+
full_path = Path(validation['resolved_path'])
|
|
1165
|
+
|
|
1166
|
+
if not full_path.exists():
|
|
1167
|
+
return {"success": False, "error": f"File does not exist: {file_path}"}
|
|
1168
|
+
|
|
1169
|
+
if not full_path.is_file():
|
|
1170
|
+
return {"success": False, "error": f"Path is not a file: {file_path}"}
|
|
1171
|
+
|
|
1172
|
+
try:
|
|
1173
|
+
import mimetypes
|
|
1174
|
+
stat_info = full_path.stat()
|
|
1175
|
+
|
|
1176
|
+
# Try to get MIME type
|
|
1177
|
+
mime_type, _ = mimetypes.guess_type(str(full_path))
|
|
1178
|
+
|
|
1179
|
+
# Try python-magic for more accurate detection
|
|
1180
|
+
try:
|
|
1181
|
+
import magic
|
|
1182
|
+
mime_type_magic = magic.from_file(str(full_path), mime=True)
|
|
1183
|
+
if mime_type_magic:
|
|
1184
|
+
mime_type = mime_type_magic
|
|
1185
|
+
except ImportError:
|
|
1186
|
+
pass
|
|
1187
|
+
except Exception:
|
|
1188
|
+
pass
|
|
1189
|
+
|
|
1190
|
+
result = {
|
|
1191
|
+
"path": file_path,
|
|
1192
|
+
"full_path": str(full_path),
|
|
1193
|
+
"filename": full_path.name,
|
|
1194
|
+
"extension": full_path.suffix.lower(),
|
|
1195
|
+
"mime_type": mime_type or "application/octet-stream",
|
|
1196
|
+
"size_bytes": stat_info.st_size,
|
|
1197
|
+
"size_human": _format_size(stat_info.st_size),
|
|
1198
|
+
"modified": datetime.fromtimestamp(stat_info.st_mtime).isoformat(),
|
|
1199
|
+
"created": datetime.fromtimestamp(stat_info.st_ctime).isoformat()
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
logging.info(f"Got file info: {file_path}")
|
|
1203
|
+
return {"success": True, "result": result}
|
|
1204
|
+
|
|
1205
|
+
except Exception as e:
|
|
1206
|
+
logging.error(f"Error getting file info {file_path}: {e}")
|
|
1207
|
+
return {"success": False, "error": str(e)}
|
|
1208
|
+
|
|
1209
|
+
|
|
1210
|
+
def _format_size(size_bytes: int) -> str:
|
|
1211
|
+
"""Format file size in human-readable format."""
|
|
1212
|
+
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
|
1213
|
+
if size_bytes < 1024.0:
|
|
1214
|
+
return f"{size_bytes:.2f} {unit}"
|
|
1215
|
+
size_bytes /= 1024.0
|
|
1216
|
+
return f"{size_bytes:.2f} PB"
|
|
1217
|
+
|
|
1218
|
+
|
|
1219
|
+
def _execute_read_word_document(tool_input: Dict[str, Any],
|
|
1220
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1221
|
+
"""Execute the read_word_document tool."""
|
|
1222
|
+
if not config.get('embedded_tools'):
|
|
1223
|
+
return {"success": False, "error": "Document tools not configured"}
|
|
1224
|
+
|
|
1225
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
1226
|
+
if not doc_config.get('enabled', False):
|
|
1227
|
+
return {"success": False, "error": "Document tools are not enabled"}
|
|
1228
|
+
|
|
1229
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
1230
|
+
max_size_mb = doc_config.get('max_file_size_mb', 50)
|
|
1231
|
+
|
|
1232
|
+
file_path = tool_input.get('path')
|
|
1233
|
+
include_tables = tool_input.get('include_tables', True)
|
|
1234
|
+
include_headers_footers = tool_input.get('include_headers_footers', False)
|
|
1235
|
+
|
|
1236
|
+
if not file_path:
|
|
1237
|
+
return {"success": False, "error": "File path is required"}
|
|
1238
|
+
|
|
1239
|
+
validation = _validate_path(file_path, allowed_path)
|
|
1240
|
+
if not validation['valid']:
|
|
1241
|
+
return {"success": False, "error": validation['error']}
|
|
1242
|
+
|
|
1243
|
+
full_path = Path(validation['resolved_path'])
|
|
1244
|
+
|
|
1245
|
+
if not full_path.exists():
|
|
1246
|
+
return {"success": False, "error": f"File does not exist: {file_path}"}
|
|
1247
|
+
|
|
1248
|
+
if full_path.suffix.lower() != '.docx':
|
|
1249
|
+
return {"success": False, "error": f"File is not a Word document (.docx): {file_path}"}
|
|
1250
|
+
|
|
1251
|
+
if full_path.stat().st_size > max_size_mb * 1024 * 1024:
|
|
1252
|
+
return {"success": False, "error": f"File exceeds maximum size of {max_size_mb} MB"}
|
|
1253
|
+
|
|
1254
|
+
try:
|
|
1255
|
+
from docx import Document
|
|
1256
|
+
doc = Document(str(full_path))
|
|
1257
|
+
|
|
1258
|
+
paragraphs = []
|
|
1259
|
+
for para in doc.paragraphs:
|
|
1260
|
+
if para.text.strip():
|
|
1261
|
+
paragraphs.append({
|
|
1262
|
+
"text": para.text,
|
|
1263
|
+
"style": para.style.name if para.style else "Normal"
|
|
1264
|
+
})
|
|
1265
|
+
|
|
1266
|
+
tables = []
|
|
1267
|
+
if include_tables:
|
|
1268
|
+
for table in doc.tables:
|
|
1269
|
+
table_data = []
|
|
1270
|
+
for row in table.rows:
|
|
1271
|
+
row_data = [cell.text for cell in row.cells]
|
|
1272
|
+
table_data.append(row_data)
|
|
1273
|
+
if table_data:
|
|
1274
|
+
tables.append(table_data)
|
|
1275
|
+
|
|
1276
|
+
headers_footers = []
|
|
1277
|
+
if include_headers_footers:
|
|
1278
|
+
for section in doc.sections:
|
|
1279
|
+
if section.header and section.header.paragraphs:
|
|
1280
|
+
for para in section.header.paragraphs:
|
|
1281
|
+
if para.text.strip():
|
|
1282
|
+
headers_footers.append({"type": "header", "text": para.text})
|
|
1283
|
+
if section.footer and section.footer.paragraphs:
|
|
1284
|
+
for para in section.footer.paragraphs:
|
|
1285
|
+
if para.text.strip():
|
|
1286
|
+
headers_footers.append({"type": "footer", "text": para.text})
|
|
1287
|
+
|
|
1288
|
+
result = {
|
|
1289
|
+
"path": file_path,
|
|
1290
|
+
"full_path": str(full_path),
|
|
1291
|
+
"paragraph_count": len(paragraphs),
|
|
1292
|
+
"paragraphs": paragraphs,
|
|
1293
|
+
"table_count": len(tables),
|
|
1294
|
+
"tables": tables if tables else None,
|
|
1295
|
+
"headers_footers": headers_footers if headers_footers else None
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
logging.info(f"Read Word document: {file_path} ({len(paragraphs)} paragraphs, {len(tables)} tables)")
|
|
1299
|
+
return {"success": True, "result": result}
|
|
1300
|
+
|
|
1301
|
+
except Exception as e:
|
|
1302
|
+
logging.error(f"Error reading Word document {file_path}: {e}")
|
|
1303
|
+
return {"success": False, "error": str(e)}
|
|
1304
|
+
|
|
1305
|
+
|
|
1306
|
+
def _execute_read_excel_document(tool_input: Dict[str, Any],
|
|
1307
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1308
|
+
"""Execute the read_excel_document tool."""
|
|
1309
|
+
if not config.get('embedded_tools'):
|
|
1310
|
+
return {"success": False, "error": "Document tools not configured"}
|
|
1311
|
+
|
|
1312
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
1313
|
+
if not doc_config.get('enabled', False):
|
|
1314
|
+
return {"success": False, "error": "Document tools are not enabled"}
|
|
1315
|
+
|
|
1316
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
1317
|
+
max_size_mb = doc_config.get('max_file_size_mb', 50)
|
|
1318
|
+
default_max_rows = doc_config.get('reading', {}).get('max_excel_rows', 10000)
|
|
1319
|
+
|
|
1320
|
+
file_path = tool_input.get('path')
|
|
1321
|
+
sheet_name = tool_input.get('sheet_name')
|
|
1322
|
+
include_all_sheets = tool_input.get('include_all_sheets', False)
|
|
1323
|
+
max_rows = tool_input.get('max_rows', 0) or default_max_rows
|
|
1324
|
+
|
|
1325
|
+
if not file_path:
|
|
1326
|
+
return {"success": False, "error": "File path is required"}
|
|
1327
|
+
|
|
1328
|
+
validation = _validate_path(file_path, allowed_path)
|
|
1329
|
+
if not validation['valid']:
|
|
1330
|
+
return {"success": False, "error": validation['error']}
|
|
1331
|
+
|
|
1332
|
+
full_path = Path(validation['resolved_path'])
|
|
1333
|
+
|
|
1334
|
+
if not full_path.exists():
|
|
1335
|
+
return {"success": False, "error": f"File does not exist: {file_path}"}
|
|
1336
|
+
|
|
1337
|
+
if full_path.suffix.lower() != '.xlsx':
|
|
1338
|
+
return {"success": False, "error": f"File is not an Excel document (.xlsx): {file_path}"}
|
|
1339
|
+
|
|
1340
|
+
if full_path.stat().st_size > max_size_mb * 1024 * 1024:
|
|
1341
|
+
return {"success": False, "error": f"File exceeds maximum size of {max_size_mb} MB"}
|
|
1342
|
+
|
|
1343
|
+
try:
|
|
1344
|
+
from openpyxl import load_workbook
|
|
1345
|
+
wb = load_workbook(str(full_path), read_only=True, data_only=True)
|
|
1346
|
+
|
|
1347
|
+
sheets_data = {}
|
|
1348
|
+
sheet_names = wb.sheetnames
|
|
1349
|
+
|
|
1350
|
+
if include_all_sheets:
|
|
1351
|
+
sheets_to_read = sheet_names
|
|
1352
|
+
elif sheet_name:
|
|
1353
|
+
if sheet_name not in sheet_names:
|
|
1354
|
+
return {"success": False, "error": f"Sheet '{sheet_name}' not found. Available: {sheet_names}"}
|
|
1355
|
+
sheets_to_read = [sheet_name]
|
|
1356
|
+
else:
|
|
1357
|
+
sheets_to_read = [wb.active.title] if wb.active else sheet_names[:1]
|
|
1358
|
+
|
|
1359
|
+
for sname in sheets_to_read:
|
|
1360
|
+
ws = wb[sname]
|
|
1361
|
+
rows = []
|
|
1362
|
+
row_count = 0
|
|
1363
|
+
for row in ws.iter_rows(values_only=True):
|
|
1364
|
+
if row_count >= max_rows:
|
|
1365
|
+
break
|
|
1366
|
+
rows.append(list(row))
|
|
1367
|
+
row_count += 1
|
|
1368
|
+
|
|
1369
|
+
sheets_data[sname] = {
|
|
1370
|
+
"rows": rows,
|
|
1371
|
+
"row_count": len(rows),
|
|
1372
|
+
"truncated": row_count >= max_rows
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
wb.close()
|
|
1376
|
+
|
|
1377
|
+
result = {
|
|
1378
|
+
"path": file_path,
|
|
1379
|
+
"full_path": str(full_path),
|
|
1380
|
+
"sheet_names": sheet_names,
|
|
1381
|
+
"sheets_read": list(sheets_data.keys()),
|
|
1382
|
+
"data": sheets_data,
|
|
1383
|
+
"max_rows_limit": max_rows
|
|
1384
|
+
}
|
|
1385
|
+
|
|
1386
|
+
logging.info(f"Read Excel document: {file_path} ({len(sheets_data)} sheets)")
|
|
1387
|
+
return {"success": True, "result": result}
|
|
1388
|
+
|
|
1389
|
+
except Exception as e:
|
|
1390
|
+
logging.error(f"Error reading Excel document {file_path}: {e}")
|
|
1391
|
+
return {"success": False, "error": str(e)}
|
|
1392
|
+
|
|
1393
|
+
|
|
1394
|
+
def _execute_read_powerpoint_document(tool_input: Dict[str, Any],
|
|
1395
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1396
|
+
"""Execute the read_powerpoint_document tool."""
|
|
1397
|
+
if not config.get('embedded_tools'):
|
|
1398
|
+
return {"success": False, "error": "Document tools not configured"}
|
|
1399
|
+
|
|
1400
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
1401
|
+
if not doc_config.get('enabled', False):
|
|
1402
|
+
return {"success": False, "error": "Document tools are not enabled"}
|
|
1403
|
+
|
|
1404
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
1405
|
+
max_size_mb = doc_config.get('max_file_size_mb', 50)
|
|
1406
|
+
|
|
1407
|
+
file_path = tool_input.get('path')
|
|
1408
|
+
include_notes = tool_input.get('include_notes', True)
|
|
1409
|
+
|
|
1410
|
+
if not file_path:
|
|
1411
|
+
return {"success": False, "error": "File path is required"}
|
|
1412
|
+
|
|
1413
|
+
validation = _validate_path(file_path, allowed_path)
|
|
1414
|
+
if not validation['valid']:
|
|
1415
|
+
return {"success": False, "error": validation['error']}
|
|
1416
|
+
|
|
1417
|
+
full_path = Path(validation['resolved_path'])
|
|
1418
|
+
|
|
1419
|
+
if not full_path.exists():
|
|
1420
|
+
return {"success": False, "error": f"File does not exist: {file_path}"}
|
|
1421
|
+
|
|
1422
|
+
if full_path.suffix.lower() != '.pptx':
|
|
1423
|
+
return {"success": False, "error": f"File is not a PowerPoint document (.pptx): {file_path}"}
|
|
1424
|
+
|
|
1425
|
+
if full_path.stat().st_size > max_size_mb * 1024 * 1024:
|
|
1426
|
+
return {"success": False, "error": f"File exceeds maximum size of {max_size_mb} MB"}
|
|
1427
|
+
|
|
1428
|
+
try:
|
|
1429
|
+
from pptx import Presentation
|
|
1430
|
+
prs = Presentation(str(full_path))
|
|
1431
|
+
|
|
1432
|
+
slides = []
|
|
1433
|
+
for idx, slide in enumerate(prs.slides, 1):
|
|
1434
|
+
slide_data = {
|
|
1435
|
+
"slide_number": idx,
|
|
1436
|
+
"title": None,
|
|
1437
|
+
"content": []
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1440
|
+
for shape in slide.shapes:
|
|
1441
|
+
if shape.has_text_frame:
|
|
1442
|
+
for para in shape.text_frame.paragraphs:
|
|
1443
|
+
text = para.text.strip()
|
|
1444
|
+
if text:
|
|
1445
|
+
if shape.is_placeholder and hasattr(shape, 'placeholder_format'):
|
|
1446
|
+
if shape.placeholder_format.type == 1: # Title
|
|
1447
|
+
slide_data["title"] = text
|
|
1448
|
+
else:
|
|
1449
|
+
slide_data["content"].append(text)
|
|
1450
|
+
else:
|
|
1451
|
+
slide_data["content"].append(text)
|
|
1452
|
+
|
|
1453
|
+
if include_notes and slide.has_notes_slide:
|
|
1454
|
+
notes_frame = slide.notes_slide.notes_text_frame
|
|
1455
|
+
if notes_frame:
|
|
1456
|
+
notes_text = notes_frame.text.strip()
|
|
1457
|
+
if notes_text:
|
|
1458
|
+
slide_data["notes"] = notes_text
|
|
1459
|
+
|
|
1460
|
+
slides.append(slide_data)
|
|
1461
|
+
|
|
1462
|
+
result = {
|
|
1463
|
+
"path": file_path,
|
|
1464
|
+
"full_path": str(full_path),
|
|
1465
|
+
"slide_count": len(slides),
|
|
1466
|
+
"slides": slides
|
|
1467
|
+
}
|
|
1468
|
+
|
|
1469
|
+
logging.info(f"Read PowerPoint document: {file_path} ({len(slides)} slides)")
|
|
1470
|
+
return {"success": True, "result": result}
|
|
1471
|
+
|
|
1472
|
+
except Exception as e:
|
|
1473
|
+
logging.error(f"Error reading PowerPoint document {file_path}: {e}")
|
|
1474
|
+
return {"success": False, "error": str(e)}
|
|
1475
|
+
|
|
1476
|
+
|
|
1477
|
+
def _execute_read_pdf_document(tool_input: Dict[str, Any],
|
|
1478
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1479
|
+
"""Execute the read_pdf_document tool."""
|
|
1480
|
+
if not config.get('embedded_tools'):
|
|
1481
|
+
return {"success": False, "error": "Document tools not configured"}
|
|
1482
|
+
|
|
1483
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
1484
|
+
if not doc_config.get('enabled', False):
|
|
1485
|
+
return {"success": False, "error": "Document tools are not enabled"}
|
|
1486
|
+
|
|
1487
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
1488
|
+
max_size_mb = doc_config.get('max_file_size_mb', 50)
|
|
1489
|
+
max_pages = doc_config.get('reading', {}).get('max_pdf_pages', 100)
|
|
1490
|
+
|
|
1491
|
+
file_path = tool_input.get('path')
|
|
1492
|
+
page_numbers = tool_input.get('page_numbers')
|
|
1493
|
+
include_metadata = tool_input.get('include_metadata', True)
|
|
1494
|
+
|
|
1495
|
+
if not file_path:
|
|
1496
|
+
return {"success": False, "error": "File path is required"}
|
|
1497
|
+
|
|
1498
|
+
validation = _validate_path(file_path, allowed_path)
|
|
1499
|
+
if not validation['valid']:
|
|
1500
|
+
return {"success": False, "error": validation['error']}
|
|
1501
|
+
|
|
1502
|
+
full_path = Path(validation['resolved_path'])
|
|
1503
|
+
|
|
1504
|
+
if not full_path.exists():
|
|
1505
|
+
return {"success": False, "error": f"File does not exist: {file_path}"}
|
|
1506
|
+
|
|
1507
|
+
if full_path.suffix.lower() != '.pdf':
|
|
1508
|
+
return {"success": False, "error": f"File is not a PDF document (.pdf): {file_path}"}
|
|
1509
|
+
|
|
1510
|
+
if full_path.stat().st_size > max_size_mb * 1024 * 1024:
|
|
1511
|
+
return {"success": False, "error": f"File exceeds maximum size of {max_size_mb} MB"}
|
|
1512
|
+
|
|
1513
|
+
try:
|
|
1514
|
+
import pdfplumber
|
|
1515
|
+
|
|
1516
|
+
pages_data = []
|
|
1517
|
+
metadata = None
|
|
1518
|
+
|
|
1519
|
+
with pdfplumber.open(str(full_path)) as pdf:
|
|
1520
|
+
total_pages = len(pdf.pages)
|
|
1521
|
+
|
|
1522
|
+
if include_metadata:
|
|
1523
|
+
metadata = pdf.metadata
|
|
1524
|
+
|
|
1525
|
+
# Determine which pages to extract
|
|
1526
|
+
if page_numbers:
|
|
1527
|
+
pages_to_read = [p - 1 for p in page_numbers if 0 < p <= total_pages]
|
|
1528
|
+
else:
|
|
1529
|
+
pages_to_read = list(range(min(total_pages, max_pages)))
|
|
1530
|
+
|
|
1531
|
+
for page_idx in pages_to_read:
|
|
1532
|
+
page = pdf.pages[page_idx]
|
|
1533
|
+
text = page.extract_text() or ""
|
|
1534
|
+
pages_data.append({
|
|
1535
|
+
"page_number": page_idx + 1,
|
|
1536
|
+
"text": text,
|
|
1537
|
+
"width": page.width,
|
|
1538
|
+
"height": page.height
|
|
1539
|
+
})
|
|
1540
|
+
|
|
1541
|
+
result = {
|
|
1542
|
+
"path": file_path,
|
|
1543
|
+
"full_path": str(full_path),
|
|
1544
|
+
"total_pages": total_pages,
|
|
1545
|
+
"pages_extracted": len(pages_data),
|
|
1546
|
+
"pages": pages_data,
|
|
1547
|
+
"truncated": len(pages_data) < total_pages and not page_numbers,
|
|
1548
|
+
"metadata": metadata
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
logging.info(f"Read PDF document: {file_path} ({len(pages_data)} pages)")
|
|
1552
|
+
return {"success": True, "result": result}
|
|
1553
|
+
|
|
1554
|
+
except Exception as e:
|
|
1555
|
+
logging.error(f"Error reading PDF document {file_path}: {e}")
|
|
1556
|
+
return {"success": False, "error": str(e)}
|
|
1557
|
+
|
|
1558
|
+
|
|
1559
|
+
def _execute_create_word_document(tool_input: Dict[str, Any],
|
|
1560
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1561
|
+
"""Execute the create_word_document tool."""
|
|
1562
|
+
if not config.get('embedded_tools'):
|
|
1563
|
+
return {"success": False, "error": "Document tools not configured"}
|
|
1564
|
+
|
|
1565
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
1566
|
+
if not doc_config.get('enabled', False):
|
|
1567
|
+
return {"success": False, "error": "Document tools are not enabled"}
|
|
1568
|
+
|
|
1569
|
+
if doc_config.get('access_mode', 'read') != 'read_write':
|
|
1570
|
+
return {"success": False, "error": "Write operations require access_mode: read_write"}
|
|
1571
|
+
|
|
1572
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
1573
|
+
templates_path = doc_config.get('creation', {}).get('templates_path')
|
|
1574
|
+
|
|
1575
|
+
file_path = tool_input.get('path')
|
|
1576
|
+
content = tool_input.get('content', {})
|
|
1577
|
+
template_path = tool_input.get('template_path')
|
|
1578
|
+
placeholders = tool_input.get('placeholders', {})
|
|
1579
|
+
|
|
1580
|
+
if not file_path:
|
|
1581
|
+
return {"success": False, "error": "Output file path is required"}
|
|
1582
|
+
|
|
1583
|
+
validation = _validate_path(file_path, allowed_path)
|
|
1584
|
+
if not validation['valid']:
|
|
1585
|
+
return {"success": False, "error": validation['error']}
|
|
1586
|
+
|
|
1587
|
+
full_path = Path(validation['resolved_path'])
|
|
1588
|
+
|
|
1589
|
+
if not full_path.parent.exists():
|
|
1590
|
+
return {"success": False, "error": f"Parent directory does not exist: {full_path.parent}"}
|
|
1591
|
+
|
|
1592
|
+
try:
|
|
1593
|
+
from docx import Document
|
|
1594
|
+
|
|
1595
|
+
# Use template if provided
|
|
1596
|
+
if template_path:
|
|
1597
|
+
# Validate template path
|
|
1598
|
+
if templates_path:
|
|
1599
|
+
template_full = Path(templates_path) / template_path
|
|
1600
|
+
else:
|
|
1601
|
+
template_validation = _validate_path(template_path, allowed_path)
|
|
1602
|
+
if not template_validation['valid']:
|
|
1603
|
+
return {"success": False, "error": f"Template path error: {template_validation['error']}"}
|
|
1604
|
+
template_full = Path(template_validation['resolved_path'])
|
|
1605
|
+
|
|
1606
|
+
if not template_full.exists():
|
|
1607
|
+
return {"success": False, "error": f"Template does not exist: {template_path}"}
|
|
1608
|
+
|
|
1609
|
+
doc = Document(str(template_full))
|
|
1610
|
+
|
|
1611
|
+
# Replace placeholders in paragraphs
|
|
1612
|
+
for para in doc.paragraphs:
|
|
1613
|
+
for key, value in placeholders.items():
|
|
1614
|
+
if f"{{{{{key}}}}}" in para.text:
|
|
1615
|
+
for run in para.runs:
|
|
1616
|
+
run.text = run.text.replace(f"{{{{{key}}}}}", str(value))
|
|
1617
|
+
|
|
1618
|
+
# Replace placeholders in tables
|
|
1619
|
+
for table in doc.tables:
|
|
1620
|
+
for row in table.rows:
|
|
1621
|
+
for cell in row.cells:
|
|
1622
|
+
for key, value in placeholders.items():
|
|
1623
|
+
if f"{{{{{key}}}}}" in cell.text:
|
|
1624
|
+
cell.text = cell.text.replace(f"{{{{{key}}}}}", str(value))
|
|
1625
|
+
|
|
1626
|
+
else:
|
|
1627
|
+
doc = Document()
|
|
1628
|
+
|
|
1629
|
+
# Add title if provided
|
|
1630
|
+
if content.get('title'):
|
|
1631
|
+
doc.add_heading(content['title'], 0)
|
|
1632
|
+
|
|
1633
|
+
# Add paragraphs
|
|
1634
|
+
for para_data in content.get('paragraphs', []):
|
|
1635
|
+
text = para_data.get('text', '')
|
|
1636
|
+
style = para_data.get('style', 'Normal')
|
|
1637
|
+
if style.startswith('Heading'):
|
|
1638
|
+
level = int(style.split()[-1]) if style.split()[-1].isdigit() else 1
|
|
1639
|
+
doc.add_heading(text, level)
|
|
1640
|
+
else:
|
|
1641
|
+
doc.add_paragraph(text, style=style)
|
|
1642
|
+
|
|
1643
|
+
doc.save(str(full_path))
|
|
1644
|
+
|
|
1645
|
+
result = {
|
|
1646
|
+
"path": file_path,
|
|
1647
|
+
"full_path": str(full_path),
|
|
1648
|
+
"size_bytes": full_path.stat().st_size,
|
|
1649
|
+
"used_template": template_path is not None,
|
|
1650
|
+
"placeholders_replaced": list(placeholders.keys()) if placeholders else []
|
|
1651
|
+
}
|
|
1652
|
+
|
|
1653
|
+
logging.info(f"Created Word document: {file_path}")
|
|
1654
|
+
return {"success": True, "result": result}
|
|
1655
|
+
|
|
1656
|
+
except Exception as e:
|
|
1657
|
+
logging.error(f"Error creating Word document {file_path}: {e}")
|
|
1658
|
+
return {"success": False, "error": str(e)}
|
|
1659
|
+
|
|
1660
|
+
|
|
1661
|
+
def _execute_create_excel_document(tool_input: Dict[str, Any],
|
|
1662
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1663
|
+
"""Execute the create_excel_document tool."""
|
|
1664
|
+
if not config.get('embedded_tools'):
|
|
1665
|
+
return {"success": False, "error": "Document tools not configured"}
|
|
1666
|
+
|
|
1667
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
1668
|
+
if not doc_config.get('enabled', False):
|
|
1669
|
+
return {"success": False, "error": "Document tools are not enabled"}
|
|
1670
|
+
|
|
1671
|
+
if doc_config.get('access_mode', 'read') != 'read_write':
|
|
1672
|
+
return {"success": False, "error": "Write operations require access_mode: read_write"}
|
|
1673
|
+
|
|
1674
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
1675
|
+
|
|
1676
|
+
file_path = tool_input.get('path')
|
|
1677
|
+
sheets = tool_input.get('sheets', [])
|
|
1678
|
+
|
|
1679
|
+
if not file_path:
|
|
1680
|
+
return {"success": False, "error": "Output file path is required"}
|
|
1681
|
+
|
|
1682
|
+
if not sheets:
|
|
1683
|
+
return {"success": False, "error": "At least one sheet is required"}
|
|
1684
|
+
|
|
1685
|
+
validation = _validate_path(file_path, allowed_path)
|
|
1686
|
+
if not validation['valid']:
|
|
1687
|
+
return {"success": False, "error": validation['error']}
|
|
1688
|
+
|
|
1689
|
+
full_path = Path(validation['resolved_path'])
|
|
1690
|
+
|
|
1691
|
+
if not full_path.parent.exists():
|
|
1692
|
+
return {"success": False, "error": f"Parent directory does not exist: {full_path.parent}"}
|
|
1693
|
+
|
|
1694
|
+
try:
|
|
1695
|
+
from openpyxl import Workbook
|
|
1696
|
+
|
|
1697
|
+
wb = Workbook()
|
|
1698
|
+
# Remove default sheet
|
|
1699
|
+
if 'Sheet' in wb.sheetnames:
|
|
1700
|
+
del wb['Sheet']
|
|
1701
|
+
|
|
1702
|
+
for sheet_data in sheets:
|
|
1703
|
+
sheet_name = sheet_data.get('name', 'Sheet')
|
|
1704
|
+
headers = sheet_data.get('headers', [])
|
|
1705
|
+
data = sheet_data.get('data', [])
|
|
1706
|
+
|
|
1707
|
+
ws = wb.create_sheet(title=sheet_name)
|
|
1708
|
+
|
|
1709
|
+
# Add headers if provided
|
|
1710
|
+
if headers:
|
|
1711
|
+
for col, header in enumerate(headers, 1):
|
|
1712
|
+
ws.cell(row=1, column=col, value=header)
|
|
1713
|
+
start_row = 2
|
|
1714
|
+
else:
|
|
1715
|
+
start_row = 1
|
|
1716
|
+
|
|
1717
|
+
# Add data
|
|
1718
|
+
for row_idx, row_data in enumerate(data, start_row):
|
|
1719
|
+
for col_idx, value in enumerate(row_data, 1):
|
|
1720
|
+
ws.cell(row=row_idx, column=col_idx, value=value)
|
|
1721
|
+
|
|
1722
|
+
wb.save(str(full_path))
|
|
1723
|
+
|
|
1724
|
+
result = {
|
|
1725
|
+
"path": file_path,
|
|
1726
|
+
"full_path": str(full_path),
|
|
1727
|
+
"size_bytes": full_path.stat().st_size,
|
|
1728
|
+
"sheets_created": [s.get('name', 'Sheet') for s in sheets]
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
logging.info(f"Created Excel document: {file_path}")
|
|
1732
|
+
return {"success": True, "result": result}
|
|
1733
|
+
|
|
1734
|
+
except Exception as e:
|
|
1735
|
+
logging.error(f"Error creating Excel document {file_path}: {e}")
|
|
1736
|
+
return {"success": False, "error": str(e)}
|
|
1737
|
+
|
|
1738
|
+
|
|
1739
|
+
def _execute_create_powerpoint_document(tool_input: Dict[str, Any],
|
|
1740
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1741
|
+
"""Execute the create_powerpoint_document tool."""
|
|
1742
|
+
if not config.get('embedded_tools'):
|
|
1743
|
+
return {"success": False, "error": "Document tools not configured"}
|
|
1744
|
+
|
|
1745
|
+
doc_config = config.get('embedded_tools', {}).get('documents', {})
|
|
1746
|
+
if not doc_config.get('enabled', False):
|
|
1747
|
+
return {"success": False, "error": "Document tools are not enabled"}
|
|
1748
|
+
|
|
1749
|
+
if doc_config.get('access_mode', 'read') != 'read_write':
|
|
1750
|
+
return {"success": False, "error": "Write operations require access_mode: read_write"}
|
|
1751
|
+
|
|
1752
|
+
allowed_path = doc_config.get('allowed_path', '.')
|
|
1753
|
+
templates_path = doc_config.get('creation', {}).get('templates_path')
|
|
1754
|
+
|
|
1755
|
+
file_path = tool_input.get('path')
|
|
1756
|
+
slides_data = tool_input.get('slides', [])
|
|
1757
|
+
template_path = tool_input.get('template_path')
|
|
1758
|
+
placeholders = tool_input.get('placeholders', {})
|
|
1759
|
+
|
|
1760
|
+
if not file_path:
|
|
1761
|
+
return {"success": False, "error": "Output file path is required"}
|
|
1762
|
+
|
|
1763
|
+
if not slides_data and not template_path:
|
|
1764
|
+
return {"success": False, "error": "Either slides or template_path is required"}
|
|
1765
|
+
|
|
1766
|
+
validation = _validate_path(file_path, allowed_path)
|
|
1767
|
+
if not validation['valid']:
|
|
1768
|
+
return {"success": False, "error": validation['error']}
|
|
1769
|
+
|
|
1770
|
+
full_path = Path(validation['resolved_path'])
|
|
1771
|
+
|
|
1772
|
+
if not full_path.parent.exists():
|
|
1773
|
+
return {"success": False, "error": f"Parent directory does not exist: {full_path.parent}"}
|
|
1774
|
+
|
|
1775
|
+
try:
|
|
1776
|
+
from pptx import Presentation
|
|
1777
|
+
from pptx.util import Inches, Pt
|
|
1778
|
+
|
|
1779
|
+
# Use template if provided
|
|
1780
|
+
if template_path:
|
|
1781
|
+
if templates_path:
|
|
1782
|
+
template_full = Path(templates_path) / template_path
|
|
1783
|
+
else:
|
|
1784
|
+
template_validation = _validate_path(template_path, allowed_path)
|
|
1785
|
+
if not template_validation['valid']:
|
|
1786
|
+
return {"success": False, "error": f"Template path error: {template_validation['error']}"}
|
|
1787
|
+
template_full = Path(template_validation['resolved_path'])
|
|
1788
|
+
|
|
1789
|
+
if not template_full.exists():
|
|
1790
|
+
return {"success": False, "error": f"Template does not exist: {template_path}"}
|
|
1791
|
+
|
|
1792
|
+
prs = Presentation(str(template_full))
|
|
1793
|
+
|
|
1794
|
+
# Replace placeholders in existing slides
|
|
1795
|
+
for slide in prs.slides:
|
|
1796
|
+
for shape in slide.shapes:
|
|
1797
|
+
if shape.has_text_frame:
|
|
1798
|
+
for para in shape.text_frame.paragraphs:
|
|
1799
|
+
for run in para.runs:
|
|
1800
|
+
for key, value in placeholders.items():
|
|
1801
|
+
if f"{{{{{key}}}}}" in run.text:
|
|
1802
|
+
run.text = run.text.replace(f"{{{{{key}}}}}", str(value))
|
|
1803
|
+
else:
|
|
1804
|
+
prs = Presentation()
|
|
1805
|
+
|
|
1806
|
+
# Add new slides
|
|
1807
|
+
for slide_data in slides_data:
|
|
1808
|
+
layout_name = slide_data.get('layout', 'title_content')
|
|
1809
|
+
title = slide_data.get('title', '')
|
|
1810
|
+
content = slide_data.get('content', [])
|
|
1811
|
+
notes = slide_data.get('notes', '')
|
|
1812
|
+
|
|
1813
|
+
# Map layout names to indices
|
|
1814
|
+
layout_map = {
|
|
1815
|
+
'title': 0,
|
|
1816
|
+
'title_content': 1,
|
|
1817
|
+
'content': 5,
|
|
1818
|
+
'blank': 6
|
|
1819
|
+
}
|
|
1820
|
+
layout_idx = layout_map.get(layout_name, 1)
|
|
1821
|
+
|
|
1822
|
+
if layout_idx < len(prs.slide_layouts):
|
|
1823
|
+
slide_layout = prs.slide_layouts[layout_idx]
|
|
1824
|
+
else:
|
|
1825
|
+
slide_layout = prs.slide_layouts[0]
|
|
1826
|
+
|
|
1827
|
+
slide = prs.slides.add_slide(slide_layout)
|
|
1828
|
+
|
|
1829
|
+
# Set title
|
|
1830
|
+
if title and slide.shapes.title:
|
|
1831
|
+
slide.shapes.title.text = title
|
|
1832
|
+
|
|
1833
|
+
# Add content
|
|
1834
|
+
if content:
|
|
1835
|
+
for shape in slide.shapes:
|
|
1836
|
+
if shape.has_text_frame and shape != slide.shapes.title:
|
|
1837
|
+
tf = shape.text_frame
|
|
1838
|
+
tf.clear()
|
|
1839
|
+
for i, text in enumerate(content):
|
|
1840
|
+
if i == 0:
|
|
1841
|
+
tf.paragraphs[0].text = text
|
|
1842
|
+
else:
|
|
1843
|
+
p = tf.add_paragraph()
|
|
1844
|
+
p.text = text
|
|
1845
|
+
break
|
|
1846
|
+
|
|
1847
|
+
# Add notes
|
|
1848
|
+
if notes:
|
|
1849
|
+
notes_slide = slide.notes_slide
|
|
1850
|
+
notes_slide.notes_text_frame.text = notes
|
|
1851
|
+
|
|
1852
|
+
prs.save(str(full_path))
|
|
1853
|
+
|
|
1854
|
+
result = {
|
|
1855
|
+
"path": file_path,
|
|
1856
|
+
"full_path": str(full_path),
|
|
1857
|
+
"size_bytes": full_path.stat().st_size,
|
|
1858
|
+
"slides_added": len(slides_data),
|
|
1859
|
+
"used_template": template_path is not None,
|
|
1860
|
+
"placeholders_replaced": list(placeholders.keys()) if placeholders else []
|
|
1861
|
+
}
|
|
1862
|
+
|
|
1863
|
+
logging.info(f"Created PowerPoint document: {file_path}")
|
|
1864
|
+
return {"success": True, "result": result}
|
|
1865
|
+
|
|
1866
|
+
except Exception as e:
|
|
1867
|
+
logging.error(f"Error creating PowerPoint document {file_path}: {e}")
|
|
1868
|
+
return {"success": False, "error": str(e)}
|
|
1869
|
+
|
|
1870
|
+
|
|
1871
|
+
# ============================================================================
|
|
1872
|
+
# Archive Tools
|
|
1873
|
+
# ============================================================================
|
|
1874
|
+
|
|
1875
|
+
def _get_archive_tools(archive_config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
1876
|
+
"""
|
|
1877
|
+
Get archive tool definitions based on configuration.
|
|
1878
|
+
|
|
1879
|
+
Args:
|
|
1880
|
+
archive_config: Archive tools configuration dictionary
|
|
1881
|
+
|
|
1882
|
+
Returns:
|
|
1883
|
+
List of archive tool definitions
|
|
1884
|
+
"""
|
|
1885
|
+
access_mode = archive_config.get('access_mode', 'read')
|
|
1886
|
+
allowed_path = archive_config.get('allowed_path', '.')
|
|
1887
|
+
|
|
1888
|
+
tools = [
|
|
1889
|
+
{
|
|
1890
|
+
"name": "list_archive_contents",
|
|
1891
|
+
"description": f"List the contents of an archive file within the allowed path ({allowed_path}). "
|
|
1892
|
+
"Supports .zip, .tar, .tar.gz, .tgz, and .tar.bz2 files. "
|
|
1893
|
+
"Returns file names, sizes, and modification times.",
|
|
1894
|
+
"input_schema": {
|
|
1895
|
+
"type": "object",
|
|
1896
|
+
"properties": {
|
|
1897
|
+
"path": {
|
|
1898
|
+
"type": "string",
|
|
1899
|
+
"description": "Path to the archive file"
|
|
1900
|
+
}
|
|
1901
|
+
},
|
|
1902
|
+
"required": ["path"]
|
|
1903
|
+
}
|
|
1904
|
+
},
|
|
1905
|
+
{
|
|
1906
|
+
"name": "read_archive_file",
|
|
1907
|
+
"description": f"Read a specific file from within an archive without extracting to disk ({allowed_path}). "
|
|
1908
|
+
"Returns the file content. Text files are returned as strings, binary files as base64.",
|
|
1909
|
+
"input_schema": {
|
|
1910
|
+
"type": "object",
|
|
1911
|
+
"properties": {
|
|
1912
|
+
"archive_path": {
|
|
1913
|
+
"type": "string",
|
|
1914
|
+
"description": "Path to the archive file"
|
|
1915
|
+
},
|
|
1916
|
+
"file_path": {
|
|
1917
|
+
"type": "string",
|
|
1918
|
+
"description": "Path of the file within the archive to read"
|
|
1919
|
+
},
|
|
1920
|
+
"encoding": {
|
|
1921
|
+
"type": "string",
|
|
1922
|
+
"description": "Text encoding to use when reading as text (default: utf-8)",
|
|
1923
|
+
"default": "utf-8"
|
|
1924
|
+
},
|
|
1925
|
+
"as_binary": {
|
|
1926
|
+
"type": "boolean",
|
|
1927
|
+
"description": "Force reading as binary (returns base64)",
|
|
1928
|
+
"default": False
|
|
1929
|
+
}
|
|
1930
|
+
},
|
|
1931
|
+
"required": ["archive_path", "file_path"]
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1934
|
+
]
|
|
1935
|
+
|
|
1936
|
+
# Extract tool (only if access_mode is read_write)
|
|
1937
|
+
if access_mode == 'read_write':
|
|
1938
|
+
tools.append({
|
|
1939
|
+
"name": "extract_archive",
|
|
1940
|
+
"description": f"Extract an archive file to a destination directory within the allowed path ({allowed_path}). "
|
|
1941
|
+
"Supports .zip, .tar, .tar.gz, .tgz, and .tar.bz2 files. "
|
|
1942
|
+
"Can extract all files or specific files only.",
|
|
1943
|
+
"input_schema": {
|
|
1944
|
+
"type": "object",
|
|
1945
|
+
"properties": {
|
|
1946
|
+
"archive_path": {
|
|
1947
|
+
"type": "string",
|
|
1948
|
+
"description": "Path to the archive file"
|
|
1949
|
+
},
|
|
1950
|
+
"destination": {
|
|
1951
|
+
"type": "string",
|
|
1952
|
+
"description": "Destination directory for extracted files"
|
|
1953
|
+
},
|
|
1954
|
+
"files": {
|
|
1955
|
+
"type": "array",
|
|
1956
|
+
"items": {"type": "string"},
|
|
1957
|
+
"description": "Specific files to extract. If not provided, extracts all files."
|
|
1958
|
+
},
|
|
1959
|
+
"overwrite": {
|
|
1960
|
+
"type": "boolean",
|
|
1961
|
+
"description": "Overwrite existing files",
|
|
1962
|
+
"default": False
|
|
1963
|
+
}
|
|
1964
|
+
},
|
|
1965
|
+
"required": ["archive_path", "destination"]
|
|
1966
|
+
}
|
|
1967
|
+
})
|
|
1968
|
+
|
|
1969
|
+
return tools
|
|
1970
|
+
|
|
1971
|
+
|
|
1972
|
+
def _get_archive_type(file_path: Path) -> Optional[str]:
|
|
1973
|
+
"""Determine archive type from file extension."""
|
|
1974
|
+
suffix = file_path.suffix.lower()
|
|
1975
|
+
name = file_path.name.lower()
|
|
1976
|
+
|
|
1977
|
+
if suffix == '.zip':
|
|
1978
|
+
return 'zip'
|
|
1979
|
+
elif suffix == '.tar':
|
|
1980
|
+
return 'tar'
|
|
1981
|
+
elif name.endswith('.tar.gz') or suffix == '.tgz':
|
|
1982
|
+
return 'tar.gz'
|
|
1983
|
+
elif name.endswith('.tar.bz2'):
|
|
1984
|
+
return 'tar.bz2'
|
|
1985
|
+
return None
|
|
1986
|
+
|
|
1987
|
+
|
|
1988
|
+
def _execute_list_archive_contents(tool_input: Dict[str, Any],
|
|
1989
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1990
|
+
"""Execute the list_archive_contents tool."""
|
|
1991
|
+
import zipfile
|
|
1992
|
+
import tarfile
|
|
1993
|
+
|
|
1994
|
+
if not config.get('embedded_tools'):
|
|
1995
|
+
return {"success": False, "error": "Archive tools not configured"}
|
|
1996
|
+
|
|
1997
|
+
archive_config = config.get('embedded_tools', {}).get('archives', {})
|
|
1998
|
+
if not archive_config.get('enabled', False):
|
|
1999
|
+
return {"success": False, "error": "Archive tools are not enabled"}
|
|
2000
|
+
|
|
2001
|
+
allowed_path = archive_config.get('allowed_path', '.')
|
|
2002
|
+
max_size_mb = archive_config.get('max_file_size_mb', 100)
|
|
2003
|
+
max_files = archive_config.get('max_files_to_list', 1000)
|
|
2004
|
+
|
|
2005
|
+
file_path = tool_input.get('path')
|
|
2006
|
+
|
|
2007
|
+
if not file_path:
|
|
2008
|
+
return {"success": False, "error": "Archive path is required"}
|
|
2009
|
+
|
|
2010
|
+
validation = _validate_path(file_path, allowed_path)
|
|
2011
|
+
if not validation['valid']:
|
|
2012
|
+
return {"success": False, "error": validation['error']}
|
|
2013
|
+
|
|
2014
|
+
full_path = Path(validation['resolved_path'])
|
|
2015
|
+
|
|
2016
|
+
if not full_path.exists():
|
|
2017
|
+
return {"success": False, "error": f"Archive does not exist: {file_path}"}
|
|
2018
|
+
|
|
2019
|
+
if full_path.stat().st_size > max_size_mb * 1024 * 1024:
|
|
2020
|
+
return {"success": False, "error": f"Archive exceeds maximum size of {max_size_mb} MB"}
|
|
2021
|
+
|
|
2022
|
+
archive_type = _get_archive_type(full_path)
|
|
2023
|
+
if not archive_type:
|
|
2024
|
+
return {"success": False, "error": f"Unsupported archive format: {full_path.suffix}"}
|
|
2025
|
+
|
|
2026
|
+
try:
|
|
2027
|
+
files = []
|
|
2028
|
+
|
|
2029
|
+
if archive_type == 'zip':
|
|
2030
|
+
with zipfile.ZipFile(str(full_path), 'r') as zf:
|
|
2031
|
+
for info in zf.infolist()[:max_files]:
|
|
2032
|
+
files.append({
|
|
2033
|
+
"path": info.filename,
|
|
2034
|
+
"size_bytes": info.file_size,
|
|
2035
|
+
"compressed_size": info.compress_size,
|
|
2036
|
+
"is_directory": info.is_dir(),
|
|
2037
|
+
"modified": datetime(*info.date_time).isoformat() if info.date_time else None
|
|
2038
|
+
})
|
|
2039
|
+
else:
|
|
2040
|
+
mode = 'r:gz' if archive_type == 'tar.gz' else 'r:bz2' if archive_type == 'tar.bz2' else 'r'
|
|
2041
|
+
with tarfile.open(str(full_path), mode) as tf:
|
|
2042
|
+
count = 0
|
|
2043
|
+
for member in tf:
|
|
2044
|
+
if count >= max_files:
|
|
2045
|
+
break
|
|
2046
|
+
files.append({
|
|
2047
|
+
"path": member.name,
|
|
2048
|
+
"size_bytes": member.size,
|
|
2049
|
+
"is_directory": member.isdir(),
|
|
2050
|
+
"modified": datetime.fromtimestamp(member.mtime).isoformat() if member.mtime else None
|
|
2051
|
+
})
|
|
2052
|
+
count += 1
|
|
2053
|
+
|
|
2054
|
+
result = {
|
|
2055
|
+
"path": file_path,
|
|
2056
|
+
"full_path": str(full_path),
|
|
2057
|
+
"archive_type": archive_type,
|
|
2058
|
+
"total_files": len(files),
|
|
2059
|
+
"truncated": len(files) >= max_files,
|
|
2060
|
+
"files": files
|
|
2061
|
+
}
|
|
2062
|
+
|
|
2063
|
+
logging.info(f"Listed archive contents: {file_path} ({len(files)} files)")
|
|
2064
|
+
return {"success": True, "result": result}
|
|
2065
|
+
|
|
2066
|
+
except Exception as e:
|
|
2067
|
+
logging.error(f"Error listing archive {file_path}: {e}")
|
|
2068
|
+
return {"success": False, "error": str(e)}
|
|
2069
|
+
|
|
2070
|
+
|
|
2071
|
+
def _execute_read_archive_file(tool_input: Dict[str, Any],
|
|
2072
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
2073
|
+
"""Execute the read_archive_file tool."""
|
|
2074
|
+
import zipfile
|
|
2075
|
+
import tarfile
|
|
2076
|
+
|
|
2077
|
+
if not config.get('embedded_tools'):
|
|
2078
|
+
return {"success": False, "error": "Archive tools not configured"}
|
|
2079
|
+
|
|
2080
|
+
archive_config = config.get('embedded_tools', {}).get('archives', {})
|
|
2081
|
+
if not archive_config.get('enabled', False):
|
|
2082
|
+
return {"success": False, "error": "Archive tools are not enabled"}
|
|
2083
|
+
|
|
2084
|
+
allowed_path = archive_config.get('allowed_path', '.')
|
|
2085
|
+
max_size_mb = archive_config.get('max_file_size_mb', 100)
|
|
2086
|
+
|
|
2087
|
+
archive_path = tool_input.get('archive_path')
|
|
2088
|
+
file_path = tool_input.get('file_path')
|
|
2089
|
+
encoding = tool_input.get('encoding', 'utf-8')
|
|
2090
|
+
as_binary = tool_input.get('as_binary', False)
|
|
2091
|
+
|
|
2092
|
+
if not archive_path:
|
|
2093
|
+
return {"success": False, "error": "Archive path is required"}
|
|
2094
|
+
|
|
2095
|
+
if not file_path:
|
|
2096
|
+
return {"success": False, "error": "File path within archive is required"}
|
|
2097
|
+
|
|
2098
|
+
validation = _validate_path(archive_path, allowed_path)
|
|
2099
|
+
if not validation['valid']:
|
|
2100
|
+
return {"success": False, "error": validation['error']}
|
|
2101
|
+
|
|
2102
|
+
full_path = Path(validation['resolved_path'])
|
|
2103
|
+
|
|
2104
|
+
if not full_path.exists():
|
|
2105
|
+
return {"success": False, "error": f"Archive does not exist: {archive_path}"}
|
|
2106
|
+
|
|
2107
|
+
if full_path.stat().st_size > max_size_mb * 1024 * 1024:
|
|
2108
|
+
return {"success": False, "error": f"Archive exceeds maximum size of {max_size_mb} MB"}
|
|
2109
|
+
|
|
2110
|
+
archive_type = _get_archive_type(full_path)
|
|
2111
|
+
if not archive_type:
|
|
2112
|
+
return {"success": False, "error": f"Unsupported archive format: {full_path.suffix}"}
|
|
2113
|
+
|
|
2114
|
+
try:
|
|
2115
|
+
content = None
|
|
2116
|
+
|
|
2117
|
+
if archive_type == 'zip':
|
|
2118
|
+
with zipfile.ZipFile(str(full_path), 'r') as zf:
|
|
2119
|
+
if file_path not in zf.namelist():
|
|
2120
|
+
return {"success": False, "error": f"File not found in archive: {file_path}"}
|
|
2121
|
+
content = zf.read(file_path)
|
|
2122
|
+
else:
|
|
2123
|
+
mode = 'r:gz' if archive_type == 'tar.gz' else 'r:bz2' if archive_type == 'tar.bz2' else 'r'
|
|
2124
|
+
with tarfile.open(str(full_path), mode) as tf:
|
|
2125
|
+
try:
|
|
2126
|
+
member = tf.getmember(file_path)
|
|
2127
|
+
f = tf.extractfile(member)
|
|
2128
|
+
if f:
|
|
2129
|
+
content = f.read()
|
|
2130
|
+
else:
|
|
2131
|
+
return {"success": False, "error": f"Cannot read directory: {file_path}"}
|
|
2132
|
+
except KeyError:
|
|
2133
|
+
return {"success": False, "error": f"File not found in archive: {file_path}"}
|
|
2134
|
+
|
|
2135
|
+
# Try to decode as text unless binary requested
|
|
2136
|
+
if as_binary:
|
|
2137
|
+
result = {
|
|
2138
|
+
"archive_path": archive_path,
|
|
2139
|
+
"file_path": file_path,
|
|
2140
|
+
"content_base64": base64.b64encode(content).decode('utf-8'),
|
|
2141
|
+
"size_bytes": len(content),
|
|
2142
|
+
"is_binary": True
|
|
2143
|
+
}
|
|
2144
|
+
else:
|
|
2145
|
+
try:
|
|
2146
|
+
text_content = content.decode(encoding)
|
|
2147
|
+
result = {
|
|
2148
|
+
"archive_path": archive_path,
|
|
2149
|
+
"file_path": file_path,
|
|
2150
|
+
"content": text_content,
|
|
2151
|
+
"size_bytes": len(content),
|
|
2152
|
+
"encoding": encoding,
|
|
2153
|
+
"is_binary": False
|
|
2154
|
+
}
|
|
2155
|
+
except UnicodeDecodeError:
|
|
2156
|
+
result = {
|
|
2157
|
+
"archive_path": archive_path,
|
|
2158
|
+
"file_path": file_path,
|
|
2159
|
+
"content_base64": base64.b64encode(content).decode('utf-8'),
|
|
2160
|
+
"size_bytes": len(content),
|
|
2161
|
+
"is_binary": True,
|
|
2162
|
+
"note": f"Could not decode as {encoding}, returned as base64"
|
|
2163
|
+
}
|
|
2164
|
+
|
|
2165
|
+
logging.info(f"Read file from archive: {archive_path}/{file_path}")
|
|
2166
|
+
return {"success": True, "result": result}
|
|
2167
|
+
|
|
2168
|
+
except Exception as e:
|
|
2169
|
+
logging.error(f"Error reading from archive {archive_path}: {e}")
|
|
2170
|
+
return {"success": False, "error": str(e)}
|
|
2171
|
+
|
|
2172
|
+
|
|
2173
|
+
def _execute_extract_archive(tool_input: Dict[str, Any],
|
|
2174
|
+
config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
2175
|
+
"""Execute the extract_archive tool."""
|
|
2176
|
+
import zipfile
|
|
2177
|
+
import tarfile
|
|
2178
|
+
|
|
2179
|
+
if not config.get('embedded_tools'):
|
|
2180
|
+
return {"success": False, "error": "Archive tools not configured"}
|
|
2181
|
+
|
|
2182
|
+
archive_config = config.get('embedded_tools', {}).get('archives', {})
|
|
2183
|
+
if not archive_config.get('enabled', False):
|
|
2184
|
+
return {"success": False, "error": "Archive tools are not enabled"}
|
|
2185
|
+
|
|
2186
|
+
if archive_config.get('access_mode', 'read') != 'read_write':
|
|
2187
|
+
return {"success": False, "error": "Extract operations require access_mode: read_write"}
|
|
2188
|
+
|
|
2189
|
+
allowed_path = archive_config.get('allowed_path', '.')
|
|
2190
|
+
max_size_mb = archive_config.get('max_file_size_mb', 100)
|
|
2191
|
+
|
|
2192
|
+
archive_path = tool_input.get('archive_path')
|
|
2193
|
+
destination = tool_input.get('destination')
|
|
2194
|
+
files_to_extract = tool_input.get('files')
|
|
2195
|
+
overwrite = tool_input.get('overwrite', False)
|
|
2196
|
+
|
|
2197
|
+
if not archive_path:
|
|
2198
|
+
return {"success": False, "error": "Archive path is required"}
|
|
2199
|
+
|
|
2200
|
+
if not destination:
|
|
2201
|
+
return {"success": False, "error": "Destination directory is required"}
|
|
2202
|
+
|
|
2203
|
+
# Validate archive path
|
|
2204
|
+
archive_validation = _validate_path(archive_path, allowed_path)
|
|
2205
|
+
if not archive_validation['valid']:
|
|
2206
|
+
return {"success": False, "error": archive_validation['error']}
|
|
2207
|
+
|
|
2208
|
+
full_archive_path = Path(archive_validation['resolved_path'])
|
|
2209
|
+
|
|
2210
|
+
# Validate destination path
|
|
2211
|
+
dest_validation = _validate_path(destination, allowed_path)
|
|
2212
|
+
if not dest_validation['valid']:
|
|
2213
|
+
return {"success": False, "error": dest_validation['error']}
|
|
2214
|
+
|
|
2215
|
+
full_dest_path = Path(dest_validation['resolved_path'])
|
|
2216
|
+
|
|
2217
|
+
if not full_archive_path.exists():
|
|
2218
|
+
return {"success": False, "error": f"Archive does not exist: {archive_path}"}
|
|
2219
|
+
|
|
2220
|
+
if full_archive_path.stat().st_size > max_size_mb * 1024 * 1024:
|
|
2221
|
+
return {"success": False, "error": f"Archive exceeds maximum size of {max_size_mb} MB"}
|
|
2222
|
+
|
|
2223
|
+
archive_type = _get_archive_type(full_archive_path)
|
|
2224
|
+
if not archive_type:
|
|
2225
|
+
return {"success": False, "error": f"Unsupported archive format: {full_archive_path.suffix}"}
|
|
2226
|
+
|
|
2227
|
+
try:
|
|
2228
|
+
# Create destination directory
|
|
2229
|
+
full_dest_path.mkdir(parents=True, exist_ok=True)
|
|
2230
|
+
|
|
2231
|
+
extracted_files = []
|
|
2232
|
+
|
|
2233
|
+
if archive_type == 'zip':
|
|
2234
|
+
with zipfile.ZipFile(str(full_archive_path), 'r') as zf:
|
|
2235
|
+
members = files_to_extract if files_to_extract else zf.namelist()
|
|
2236
|
+
for member in members:
|
|
2237
|
+
if member in zf.namelist():
|
|
2238
|
+
dest_file = full_dest_path / member
|
|
2239
|
+
if dest_file.exists() and not overwrite:
|
|
2240
|
+
continue
|
|
2241
|
+
zf.extract(member, str(full_dest_path))
|
|
2242
|
+
extracted_files.append(member)
|
|
2243
|
+
else:
|
|
2244
|
+
mode = 'r:gz' if archive_type == 'tar.gz' else 'r:bz2' if archive_type == 'tar.bz2' else 'r'
|
|
2245
|
+
with tarfile.open(str(full_archive_path), mode) as tf:
|
|
2246
|
+
if files_to_extract:
|
|
2247
|
+
members = [tf.getmember(f) for f in files_to_extract if f in tf.getnames()]
|
|
2248
|
+
else:
|
|
2249
|
+
members = tf.getmembers()
|
|
2250
|
+
|
|
2251
|
+
for member in members:
|
|
2252
|
+
dest_file = full_dest_path / member.name
|
|
2253
|
+
if dest_file.exists() and not overwrite:
|
|
2254
|
+
continue
|
|
2255
|
+
tf.extract(member, str(full_dest_path))
|
|
2256
|
+
extracted_files.append(member.name)
|
|
2257
|
+
|
|
2258
|
+
result = {
|
|
2259
|
+
"archive_path": archive_path,
|
|
2260
|
+
"destination": destination,
|
|
2261
|
+
"full_destination": str(full_dest_path),
|
|
2262
|
+
"files_extracted": len(extracted_files),
|
|
2263
|
+
"extracted": extracted_files
|
|
2264
|
+
}
|
|
2265
|
+
|
|
2266
|
+
logging.info(f"Extracted archive: {archive_path} -> {destination} ({len(extracted_files)} files)")
|
|
2267
|
+
return {"success": True, "result": result}
|
|
2268
|
+
|
|
2269
|
+
except Exception as e:
|
|
2270
|
+
logging.error(f"Error extracting archive {archive_path}: {e}")
|
|
2271
|
+
return {"success": False, "error": str(e)}
|