aiecs 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +1 -1
- aiecs/aiecs_client.py +1 -1
- aiecs/config/config.py +38 -0
- aiecs/domain/__init__.py +95 -0
- aiecs/domain/community/__init__.py +159 -0
- aiecs/domain/community/agent_adapter.py +516 -0
- aiecs/domain/community/analytics.py +465 -0
- aiecs/domain/community/collaborative_workflow.py +99 -7
- aiecs/domain/community/communication_hub.py +649 -0
- aiecs/domain/community/community_builder.py +322 -0
- aiecs/domain/community/community_integration.py +365 -12
- aiecs/domain/community/community_manager.py +481 -5
- aiecs/domain/community/decision_engine.py +459 -13
- aiecs/domain/community/exceptions.py +238 -0
- aiecs/domain/community/models/__init__.py +36 -0
- aiecs/domain/community/resource_manager.py +1 -1
- aiecs/domain/community/shared_context_manager.py +621 -0
- aiecs/domain/context/context_engine.py +37 -33
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +207 -0
- aiecs/infrastructure/persistence/file_storage.py +41 -28
- aiecs/llm/__init__.py +44 -7
- aiecs/llm/callbacks/__init__.py +12 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +1 -1
- aiecs/llm/client_factory.py +23 -6
- aiecs/llm/clients/__init__.py +35 -0
- aiecs/llm/{base_client.py → clients/base_client.py} +73 -1
- aiecs/llm/{googleai_client.py → clients/googleai_client.py} +19 -15
- aiecs/llm/{openai_client.py → clients/openai_client.py} +9 -14
- aiecs/llm/{vertex_client.py → clients/vertex_client.py} +15 -15
- aiecs/llm/{xai_client.py → clients/xai_client.py} +36 -50
- aiecs/llm/config/__init__.py +54 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +237 -0
- aiecs/llm/config/model_config.py +132 -0
- aiecs/llm/utils/__init__.py +11 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +32 -2
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +15 -0
- aiecs/scripts/aid/version_manager.py +224 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +1 -0
- aiecs/tools/__init__.py +23 -23
- aiecs/tools/docs/__init__.py +5 -2
- aiecs/tools/docs/ai_document_orchestrator.py +39 -26
- aiecs/tools/docs/ai_document_writer_orchestrator.py +61 -38
- aiecs/tools/docs/content_insertion_tool.py +48 -28
- aiecs/tools/docs/document_creator_tool.py +47 -29
- aiecs/tools/docs/document_layout_tool.py +35 -20
- aiecs/tools/docs/document_parser_tool.py +56 -36
- aiecs/tools/docs/document_writer_tool.py +115 -62
- aiecs/tools/schema_generator.py +56 -56
- aiecs/tools/statistics/__init__.py +82 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +581 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +473 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +629 -0
- aiecs/tools/statistics/data_loader_tool.py +518 -0
- aiecs/tools/statistics/data_profiler_tool.py +599 -0
- aiecs/tools/statistics/data_transformer_tool.py +531 -0
- aiecs/tools/statistics/data_visualizer_tool.py +460 -0
- aiecs/tools/statistics/model_trainer_tool.py +470 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +426 -0
- aiecs/tools/task_tools/chart_tool.py +2 -1
- aiecs/tools/task_tools/image_tool.py +43 -43
- aiecs/tools/task_tools/office_tool.py +39 -36
- aiecs/tools/task_tools/pandas_tool.py +37 -33
- aiecs/tools/task_tools/report_tool.py +67 -56
- aiecs/tools/task_tools/research_tool.py +32 -31
- aiecs/tools/task_tools/scraper_tool.py +53 -46
- aiecs/tools/task_tools/search_tool.py +1123 -0
- aiecs/tools/task_tools/stats_tool.py +20 -15
- aiecs/tools/tool_executor/__init__.py +2 -2
- aiecs/tools/tool_executor/tool_executor.py +3 -3
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/METADATA +5 -1
- aiecs-1.2.1.dist-info/RECORD +144 -0
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/entry_points.txt +1 -0
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.1.0.dist-info/RECORD +0 -114
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/WHEEL +0 -0
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -24,8 +24,7 @@ from typing import Dict, Any, List, Optional, Union, Tuple
|
|
|
24
24
|
from enum import Enum
|
|
25
25
|
from pathlib import Path
|
|
26
26
|
|
|
27
|
-
from pydantic import BaseModel, Field, ValidationError
|
|
28
|
-
from pydantic_settings import BaseSettings
|
|
27
|
+
from pydantic import BaseModel, Field, ValidationError, ConfigDict
|
|
29
28
|
|
|
30
29
|
from aiecs.tools.base_tool import BaseTool
|
|
31
30
|
from aiecs.tools import register_tool
|
|
@@ -86,17 +85,6 @@ class HeaderFooterPosition(str, Enum):
|
|
|
86
85
|
FOOTER_RIGHT = "footer_right"
|
|
87
86
|
|
|
88
87
|
|
|
89
|
-
class DocumentLayoutSettings(BaseSettings):
|
|
90
|
-
"""Configuration for DocumentLayoutTool"""
|
|
91
|
-
temp_dir: str = os.path.join(tempfile.gettempdir(), 'document_layouts')
|
|
92
|
-
default_page_size: PageSize = PageSize.A4
|
|
93
|
-
default_orientation: PageOrientation = PageOrientation.PORTRAIT
|
|
94
|
-
default_margins: Dict[str, float] = {"top": 2.5, "bottom": 2.5, "left": 2.5, "right": 2.5}
|
|
95
|
-
auto_adjust_layout: bool = True
|
|
96
|
-
preserve_formatting: bool = True
|
|
97
|
-
|
|
98
|
-
class Config:
|
|
99
|
-
env_prefix = "DOC_LAYOUT_"
|
|
100
88
|
|
|
101
89
|
|
|
102
90
|
class DocumentLayoutError(Exception):
|
|
@@ -133,15 +121,42 @@ class DocumentLayoutTool(BaseTool):
|
|
|
133
121
|
- ContentInsertionTool for complex content positioning
|
|
134
122
|
"""
|
|
135
123
|
|
|
124
|
+
# Configuration schema
|
|
125
|
+
class Config(BaseModel):
|
|
126
|
+
"""Configuration for the document layout tool"""
|
|
127
|
+
model_config = ConfigDict(env_prefix="DOC_LAYOUT_")
|
|
128
|
+
|
|
129
|
+
temp_dir: str = Field(
|
|
130
|
+
default=os.path.join(tempfile.gettempdir(), 'document_layouts'),
|
|
131
|
+
description="Temporary directory for layout processing"
|
|
132
|
+
)
|
|
133
|
+
default_page_size: str = Field(
|
|
134
|
+
default="a4",
|
|
135
|
+
description="Default page size"
|
|
136
|
+
)
|
|
137
|
+
default_orientation: str = Field(
|
|
138
|
+
default="portrait",
|
|
139
|
+
description="Default page orientation"
|
|
140
|
+
)
|
|
141
|
+
default_margins: Dict[str, float] = Field(
|
|
142
|
+
default={"top": 2.5, "bottom": 2.5, "left": 2.5, "right": 2.5},
|
|
143
|
+
description="Default page margins in centimeters (top, bottom, left, right)"
|
|
144
|
+
)
|
|
145
|
+
auto_adjust_layout: bool = Field(
|
|
146
|
+
default=True,
|
|
147
|
+
description="Whether to automatically adjust layout for optimal presentation"
|
|
148
|
+
)
|
|
149
|
+
preserve_formatting: bool = Field(
|
|
150
|
+
default=True,
|
|
151
|
+
description="Whether to preserve existing formatting when applying layouts"
|
|
152
|
+
)
|
|
153
|
+
|
|
136
154
|
def __init__(self, config: Optional[Dict] = None):
|
|
137
155
|
"""Initialize Document Layout Tool with settings"""
|
|
138
156
|
super().__init__(config)
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
|
|
143
|
-
except ValidationError as e:
|
|
144
|
-
raise ValueError(f"Invalid settings: {e}")
|
|
157
|
+
|
|
158
|
+
# Parse configuration
|
|
159
|
+
self.config = self.Config(**(config or {}))
|
|
145
160
|
|
|
146
161
|
self.logger = logging.getLogger(__name__)
|
|
147
162
|
|
|
@@ -156,7 +171,7 @@ class DocumentLayoutTool(BaseTool):
|
|
|
156
171
|
|
|
157
172
|
def _init_directories(self):
|
|
158
173
|
"""Initialize required directories"""
|
|
159
|
-
os.makedirs(self.
|
|
174
|
+
os.makedirs(self.config.temp_dir, exist_ok=True)
|
|
160
175
|
|
|
161
176
|
def _init_layout_presets(self):
|
|
162
177
|
"""Initialize built-in layout presets"""
|
|
@@ -11,7 +11,6 @@ import tempfile
|
|
|
11
11
|
|
|
12
12
|
import httpx
|
|
13
13
|
from pydantic import BaseModel, Field, ValidationError, ConfigDict
|
|
14
|
-
from pydantic_settings import BaseSettings
|
|
15
14
|
|
|
16
15
|
from aiecs.tools.base_tool import BaseTool
|
|
17
16
|
from aiecs.tools import register_tool
|
|
@@ -50,21 +49,6 @@ class OutputFormat(str, Enum):
|
|
|
50
49
|
HTML = "html"
|
|
51
50
|
|
|
52
51
|
|
|
53
|
-
class DocumentParserSettings(BaseSettings):
|
|
54
|
-
"""Configuration for DocumentParserTool"""
|
|
55
|
-
user_agent: str = "DocumentParser/1.0"
|
|
56
|
-
max_file_size: int = 50 * 1024 * 1024 # 50MB
|
|
57
|
-
temp_dir: str = os.path.join(tempfile.gettempdir(), 'document_parser')
|
|
58
|
-
default_encoding: str = "utf-8"
|
|
59
|
-
timeout: int = 30
|
|
60
|
-
max_pages: int = 1000 # For large PDF files
|
|
61
|
-
|
|
62
|
-
# Cloud storage settings
|
|
63
|
-
enable_cloud_storage: bool = True
|
|
64
|
-
gcs_bucket_name: str = "aiecs-documents"
|
|
65
|
-
gcs_project_id: Optional[str] = None
|
|
66
|
-
|
|
67
|
-
model_config = ConfigDict(env_prefix="DOC_PARSER_")
|
|
68
52
|
|
|
69
53
|
|
|
70
54
|
class DocumentParserError(Exception):
|
|
@@ -102,21 +86,57 @@ class DocumentParserTool(BaseTool):
|
|
|
102
86
|
- ImageTool for image OCR
|
|
103
87
|
"""
|
|
104
88
|
|
|
89
|
+
# Configuration schema
|
|
90
|
+
class Config(BaseModel):
|
|
91
|
+
"""Configuration for the document parser tool"""
|
|
92
|
+
model_config = ConfigDict(env_prefix="DOC_PARSER_")
|
|
93
|
+
|
|
94
|
+
user_agent: str = Field(
|
|
95
|
+
default="DocumentParser/1.0",
|
|
96
|
+
description="User agent for HTTP requests"
|
|
97
|
+
)
|
|
98
|
+
max_file_size: int = Field(
|
|
99
|
+
default=50 * 1024 * 1024,
|
|
100
|
+
description="Maximum file size in bytes"
|
|
101
|
+
)
|
|
102
|
+
temp_dir: str = Field(
|
|
103
|
+
default=os.path.join(tempfile.gettempdir(), 'document_parser'),
|
|
104
|
+
description="Temporary directory for document processing"
|
|
105
|
+
)
|
|
106
|
+
default_encoding: str = Field(
|
|
107
|
+
default="utf-8",
|
|
108
|
+
description="Default encoding for text files"
|
|
109
|
+
)
|
|
110
|
+
timeout: int = Field(
|
|
111
|
+
default=30,
|
|
112
|
+
description="Timeout for HTTP requests in seconds"
|
|
113
|
+
)
|
|
114
|
+
max_pages: int = Field(
|
|
115
|
+
default=1000,
|
|
116
|
+
description="Maximum number of pages to process for large documents"
|
|
117
|
+
)
|
|
118
|
+
enable_cloud_storage: bool = Field(
|
|
119
|
+
default=True,
|
|
120
|
+
description="Whether to enable cloud storage integration"
|
|
121
|
+
)
|
|
122
|
+
gcs_bucket_name: str = Field(
|
|
123
|
+
default="aiecs-documents",
|
|
124
|
+
description="Google Cloud Storage bucket name"
|
|
125
|
+
)
|
|
126
|
+
gcs_project_id: Optional[str] = Field(
|
|
127
|
+
default=None,
|
|
128
|
+
description="Google Cloud Storage project ID"
|
|
129
|
+
)
|
|
130
|
+
|
|
105
131
|
def __init__(self, config: Optional[Dict] = None):
|
|
106
132
|
"""Initialize DocumentParserTool with settings"""
|
|
107
133
|
super().__init__(config)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
# For BaseSettings, use dictionary unpacking
|
|
112
|
-
self.settings = DocumentParserSettings(**config)
|
|
113
|
-
except ValidationError as e:
|
|
114
|
-
raise ValueError(f"Invalid settings: {e}")
|
|
115
|
-
else:
|
|
116
|
-
self.settings = DocumentParserSettings()
|
|
134
|
+
|
|
135
|
+
# Parse configuration
|
|
136
|
+
self.config = self.Config(**(config or {}))
|
|
117
137
|
|
|
118
138
|
self.logger = logging.getLogger(__name__)
|
|
119
|
-
os.makedirs(self.
|
|
139
|
+
os.makedirs(self.config.temp_dir, exist_ok=True)
|
|
120
140
|
|
|
121
141
|
# Initialize dependent tools
|
|
122
142
|
self._init_dependent_tools()
|
|
@@ -151,15 +171,15 @@ class DocumentParserTool(BaseTool):
|
|
|
151
171
|
"""Initialize cloud storage for document retrieval"""
|
|
152
172
|
self.file_storage = None
|
|
153
173
|
|
|
154
|
-
if self.
|
|
174
|
+
if self.config.enable_cloud_storage:
|
|
155
175
|
try:
|
|
156
176
|
from aiecs.infrastructure.persistence.file_storage import FileStorage
|
|
157
177
|
|
|
158
178
|
storage_config = {
|
|
159
|
-
'gcs_bucket_name': self.
|
|
160
|
-
'gcs_project_id': self.
|
|
179
|
+
'gcs_bucket_name': self.config.gcs_bucket_name,
|
|
180
|
+
'gcs_project_id': self.config.gcs_project_id,
|
|
161
181
|
'enable_local_fallback': True,
|
|
162
|
-
'local_storage_path': self.
|
|
182
|
+
'local_storage_path': self.config.temp_dir
|
|
163
183
|
}
|
|
164
184
|
|
|
165
185
|
self.file_storage = FileStorage(storage_config)
|
|
@@ -538,7 +558,7 @@ class DocumentParserTool(BaseTool):
|
|
|
538
558
|
# Generate temp file path
|
|
539
559
|
parsed_url = urlparse(url)
|
|
540
560
|
filename = os.path.basename(parsed_url.path) or "document"
|
|
541
|
-
temp_path = os.path.join(self.
|
|
561
|
+
temp_path = os.path.join(self.config.temp_dir, f"download_{hash(url)}_{filename}")
|
|
542
562
|
|
|
543
563
|
# Download using scraper tool
|
|
544
564
|
result = asyncio.run(self.scraper_tool.get_httpx(
|
|
@@ -573,7 +593,7 @@ class DocumentParserTool(BaseTool):
|
|
|
573
593
|
|
|
574
594
|
# Generate local temp file path
|
|
575
595
|
temp_filename = f"cloud_download_{hash(source)}_{Path(storage_path).name}"
|
|
576
|
-
temp_path = os.path.join(self.
|
|
596
|
+
temp_path = os.path.join(self.config.temp_dir, temp_filename)
|
|
577
597
|
|
|
578
598
|
self.logger.info(f"Downloading from cloud storage: {source} -> {temp_path}")
|
|
579
599
|
|
|
@@ -720,7 +740,7 @@ class DocumentParserTool(BaseTool):
|
|
|
720
740
|
def _parse_text_document(self, file_path: str, doc_type: DocumentType, strategy: ParsingStrategy) -> Union[str, Dict[str, Any]]:
|
|
721
741
|
"""Parse text-based documents"""
|
|
722
742
|
try:
|
|
723
|
-
with open(file_path, 'r', encoding=self.
|
|
743
|
+
with open(file_path, 'r', encoding=self.config.default_encoding, errors='ignore') as f:
|
|
724
744
|
content = f.read()
|
|
725
745
|
|
|
726
746
|
if strategy == ParsingStrategy.TEXT_ONLY:
|
|
@@ -836,7 +856,7 @@ class DocumentParserTool(BaseTool):
|
|
|
836
856
|
|
|
837
857
|
if self._is_url(source):
|
|
838
858
|
# Clean up URL downloaded files
|
|
839
|
-
temp_pattern = os.path.join(self.
|
|
859
|
+
temp_pattern = os.path.join(self.config.temp_dir, f"download_{hash(source)}_*")
|
|
840
860
|
for temp_file in glob.glob(temp_pattern):
|
|
841
861
|
try:
|
|
842
862
|
os.remove(temp_file)
|
|
@@ -846,7 +866,7 @@ class DocumentParserTool(BaseTool):
|
|
|
846
866
|
|
|
847
867
|
elif self._is_cloud_storage_path(source) or self._is_storage_id(source):
|
|
848
868
|
# Clean up cloud storage downloaded files
|
|
849
|
-
temp_pattern = os.path.join(self.
|
|
869
|
+
temp_pattern = os.path.join(self.config.temp_dir, f"cloud_download_{hash(source)}_*")
|
|
850
870
|
for temp_file in glob.glob(temp_pattern):
|
|
851
871
|
try:
|
|
852
872
|
os.remove(temp_file)
|
|
@@ -897,7 +917,7 @@ class DocumentParserTool(BaseTool):
|
|
|
897
917
|
def _extract_text_fallback(self, file_path: str) -> str:
|
|
898
918
|
"""Fallback text extraction method"""
|
|
899
919
|
try:
|
|
900
|
-
with open(file_path, 'r', encoding=self.
|
|
920
|
+
with open(file_path, 'r', encoding=self.config.default_encoding, errors='ignore') as f:
|
|
901
921
|
return f.read()
|
|
902
922
|
except:
|
|
903
923
|
with open(file_path, 'rb') as f:
|
|
@@ -12,9 +12,8 @@ from datetime import datetime
|
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
import tempfile
|
|
14
14
|
|
|
15
|
-
from pydantic import BaseModel, Field
|
|
15
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
16
16
|
from pydantic import ValidationError as PydanticValidationError
|
|
17
|
-
from pydantic_settings import BaseSettings
|
|
18
17
|
|
|
19
18
|
from aiecs.tools.base_tool import BaseTool
|
|
20
19
|
from aiecs.tools import register_tool
|
|
@@ -85,35 +84,6 @@ class ValidationLevel(str, Enum):
|
|
|
85
84
|
ENTERPRISE = "enterprise" # 企业级验证(安全、合规)
|
|
86
85
|
|
|
87
86
|
|
|
88
|
-
class DocumentWriterSettings(BaseSettings):
|
|
89
|
-
"""Configuration for DocumentWriterTool"""
|
|
90
|
-
temp_dir: str = os.path.join(tempfile.gettempdir(), 'document_writer')
|
|
91
|
-
backup_dir: str = os.path.join(tempfile.gettempdir(), 'document_backups')
|
|
92
|
-
output_dir: Optional[str] = None # Output directory
|
|
93
|
-
max_file_size: int = 100 * 1024 * 1024 # 100MB
|
|
94
|
-
max_backup_versions: int = 10
|
|
95
|
-
default_encoding: str = "utf-8"
|
|
96
|
-
enable_backup: bool = True
|
|
97
|
-
enable_versioning: bool = True
|
|
98
|
-
enable_content_validation: bool = True
|
|
99
|
-
enable_security_scan: bool = True
|
|
100
|
-
atomic_write: bool = True # 原子写入
|
|
101
|
-
validation_level: str = "basic" # Validation level
|
|
102
|
-
timeout_seconds: int = 60 # Operation timeout
|
|
103
|
-
auto_backup: bool = True # Auto backup before write
|
|
104
|
-
atomic_writes: bool = True # Atomic write operations
|
|
105
|
-
default_format: DocumentFormat = DocumentFormat.MARKDOWN # Default document format
|
|
106
|
-
version_control: bool = True # Enable version control
|
|
107
|
-
security_scan: bool = True # Enable security scanning
|
|
108
|
-
|
|
109
|
-
# 云存储设置
|
|
110
|
-
enable_cloud_storage: bool = True
|
|
111
|
-
gcs_bucket_name: str = "aiecs-documents"
|
|
112
|
-
gcs_project_id: Optional[str] = None
|
|
113
|
-
|
|
114
|
-
class Config:
|
|
115
|
-
env_prefix = "DOC_WRITER_"
|
|
116
|
-
extra = "allow" # Allow extra fields for flexibility
|
|
117
87
|
|
|
118
88
|
|
|
119
89
|
class DocumentWriterError(Exception):
|
|
@@ -171,25 +141,108 @@ class DocumentWriterTool(BaseTool):
|
|
|
171
141
|
- Audit logging
|
|
172
142
|
"""
|
|
173
143
|
|
|
144
|
+
# Configuration schema
|
|
145
|
+
class Config(BaseModel):
|
|
146
|
+
"""Configuration for the document writer tool"""
|
|
147
|
+
model_config = ConfigDict(env_prefix="DOC_WRITER_")
|
|
148
|
+
|
|
149
|
+
temp_dir: str = Field(
|
|
150
|
+
default=os.path.join(tempfile.gettempdir(), 'document_writer'),
|
|
151
|
+
description="Temporary directory for document processing"
|
|
152
|
+
)
|
|
153
|
+
backup_dir: str = Field(
|
|
154
|
+
default=os.path.join(tempfile.gettempdir(), 'document_backups'),
|
|
155
|
+
description="Directory for document backups"
|
|
156
|
+
)
|
|
157
|
+
output_dir: Optional[str] = Field(
|
|
158
|
+
default=None,
|
|
159
|
+
description="Default output directory for documents"
|
|
160
|
+
)
|
|
161
|
+
max_file_size: int = Field(
|
|
162
|
+
default=100 * 1024 * 1024,
|
|
163
|
+
description="Maximum file size in bytes"
|
|
164
|
+
)
|
|
165
|
+
max_backup_versions: int = Field(
|
|
166
|
+
default=10,
|
|
167
|
+
description="Maximum number of backup versions to keep"
|
|
168
|
+
)
|
|
169
|
+
default_encoding: str = Field(
|
|
170
|
+
default="utf-8",
|
|
171
|
+
description="Default text encoding for documents"
|
|
172
|
+
)
|
|
173
|
+
enable_backup: bool = Field(
|
|
174
|
+
default=True,
|
|
175
|
+
description="Whether to enable automatic backup functionality"
|
|
176
|
+
)
|
|
177
|
+
enable_versioning: bool = Field(
|
|
178
|
+
default=True,
|
|
179
|
+
description="Whether to enable document versioning"
|
|
180
|
+
)
|
|
181
|
+
enable_content_validation: bool = Field(
|
|
182
|
+
default=True,
|
|
183
|
+
description="Whether to enable content validation"
|
|
184
|
+
)
|
|
185
|
+
enable_security_scan: bool = Field(
|
|
186
|
+
default=True,
|
|
187
|
+
description="Whether to enable security scanning"
|
|
188
|
+
)
|
|
189
|
+
atomic_write: bool = Field(
|
|
190
|
+
default=True,
|
|
191
|
+
description="Whether to use atomic write operations"
|
|
192
|
+
)
|
|
193
|
+
validation_level: str = Field(
|
|
194
|
+
default="basic",
|
|
195
|
+
description="Content validation level"
|
|
196
|
+
)
|
|
197
|
+
timeout_seconds: int = Field(
|
|
198
|
+
default=60,
|
|
199
|
+
description="Operation timeout in seconds"
|
|
200
|
+
)
|
|
201
|
+
auto_backup: bool = Field(
|
|
202
|
+
default=True,
|
|
203
|
+
description="Whether to automatically backup before write operations"
|
|
204
|
+
)
|
|
205
|
+
atomic_writes: bool = Field(
|
|
206
|
+
default=True,
|
|
207
|
+
description="Whether to use atomic write operations"
|
|
208
|
+
)
|
|
209
|
+
default_format: str = Field(
|
|
210
|
+
default="md",
|
|
211
|
+
description="Default document format"
|
|
212
|
+
)
|
|
213
|
+
version_control: bool = Field(
|
|
214
|
+
default=True,
|
|
215
|
+
description="Whether to enable version control"
|
|
216
|
+
)
|
|
217
|
+
security_scan: bool = Field(
|
|
218
|
+
default=True,
|
|
219
|
+
description="Whether to enable security scanning"
|
|
220
|
+
)
|
|
221
|
+
enable_cloud_storage: bool = Field(
|
|
222
|
+
default=True,
|
|
223
|
+
description="Whether to enable cloud storage integration"
|
|
224
|
+
)
|
|
225
|
+
gcs_bucket_name: str = Field(
|
|
226
|
+
default="aiecs-documents",
|
|
227
|
+
description="Google Cloud Storage bucket name"
|
|
228
|
+
)
|
|
229
|
+
gcs_project_id: Optional[str] = Field(
|
|
230
|
+
default=None,
|
|
231
|
+
description="Google Cloud Storage project ID"
|
|
232
|
+
)
|
|
233
|
+
|
|
174
234
|
def __init__(self, config: Optional[Dict] = None):
|
|
175
235
|
"""Initialize DocumentWriterTool with settings"""
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
self.settings = DocumentWriterSettings()
|
|
182
|
-
if config:
|
|
183
|
-
try:
|
|
184
|
-
self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
|
|
185
|
-
except PydanticValidationError as e:
|
|
186
|
-
raise ValueError(f"Invalid settings: {e}")
|
|
236
|
+
super().__init__(config)
|
|
237
|
+
|
|
238
|
+
# Parse configuration
|
|
239
|
+
self.config = self.Config(**(config or {}))
|
|
187
240
|
|
|
188
241
|
self.logger = logging.getLogger(__name__)
|
|
189
242
|
|
|
190
243
|
# Create necessary directories
|
|
191
|
-
os.makedirs(self.
|
|
192
|
-
os.makedirs(self.
|
|
244
|
+
os.makedirs(self.config.temp_dir, exist_ok=True)
|
|
245
|
+
os.makedirs(self.config.backup_dir, exist_ok=True)
|
|
193
246
|
|
|
194
247
|
# Initialize cloud storage
|
|
195
248
|
self._init_cloud_storage()
|
|
@@ -201,15 +254,15 @@ class DocumentWriterTool(BaseTool):
|
|
|
201
254
|
"""Initialize cloud storage for document writing"""
|
|
202
255
|
self.file_storage = None
|
|
203
256
|
|
|
204
|
-
if self.
|
|
257
|
+
if self.config.enable_cloud_storage:
|
|
205
258
|
try:
|
|
206
259
|
from aiecs.infrastructure.persistence.file_storage import FileStorage
|
|
207
260
|
|
|
208
261
|
storage_config = {
|
|
209
|
-
'gcs_bucket_name': self.
|
|
210
|
-
'gcs_project_id': self.
|
|
262
|
+
'gcs_bucket_name': self.config.gcs_bucket_name,
|
|
263
|
+
'gcs_project_id': self.config.gcs_project_id,
|
|
211
264
|
'enable_local_fallback': True,
|
|
212
|
-
'local_storage_path': self.
|
|
265
|
+
'local_storage_path': self.config.temp_dir
|
|
213
266
|
}
|
|
214
267
|
|
|
215
268
|
self.file_storage = FileStorage(storage_config)
|
|
@@ -327,7 +380,7 @@ class DocumentWriterTool(BaseTool):
|
|
|
327
380
|
|
|
328
381
|
# Step 4: Create backup if needed
|
|
329
382
|
backup_info = None
|
|
330
|
-
if self.
|
|
383
|
+
if self.config.enable_backup and mode in [WriteMode.OVERWRITE, WriteMode.UPDATE]:
|
|
331
384
|
backup_info = self._create_backup(target_path, backup_comment)
|
|
332
385
|
|
|
333
386
|
# Step 5: Execute atomic write
|
|
@@ -487,8 +540,8 @@ class DocumentWriterTool(BaseTool):
|
|
|
487
540
|
|
|
488
541
|
# Size validation
|
|
489
542
|
content_size = self._calculate_content_size(content)
|
|
490
|
-
if content_size > self.
|
|
491
|
-
raise ValueError(f"Content size {content_size} exceeds maximum {self.
|
|
543
|
+
if content_size > self.config.max_file_size:
|
|
544
|
+
raise ValueError(f"Content size {content_size} exceeds maximum {self.config.max_file_size}")
|
|
492
545
|
|
|
493
546
|
# Permission validation
|
|
494
547
|
if not self._check_write_permission(target_path, mode):
|
|
@@ -523,7 +576,7 @@ class DocumentWriterTool(BaseTool):
|
|
|
523
576
|
processed_content = str(content)
|
|
524
577
|
|
|
525
578
|
# Content validation
|
|
526
|
-
if self.
|
|
579
|
+
if self.config.enable_content_validation:
|
|
527
580
|
self._validate_content(processed_content, format, validation_level)
|
|
528
581
|
|
|
529
582
|
# Calculate metadata
|
|
@@ -549,15 +602,15 @@ class DocumentWriterTool(BaseTool):
|
|
|
549
602
|
"is_cloud_path": self._is_cloud_storage_path(target_path),
|
|
550
603
|
"requires_backup": False,
|
|
551
604
|
"requires_versioning": False,
|
|
552
|
-
"atomic_operation": self.
|
|
605
|
+
"atomic_operation": self.config.atomic_write
|
|
553
606
|
}
|
|
554
607
|
|
|
555
608
|
if mode == WriteMode.CREATE and plan["file_exists"]:
|
|
556
609
|
raise DocumentWriterError(f"File already exists: {target_path}")
|
|
557
610
|
|
|
558
611
|
if mode in [WriteMode.OVERWRITE, WriteMode.UPDATE] and plan["file_exists"]:
|
|
559
|
-
plan["requires_backup"] = self.
|
|
560
|
-
plan["requires_versioning"] = self.
|
|
612
|
+
plan["requires_backup"] = self.config.enable_backup
|
|
613
|
+
plan["requires_versioning"] = self.config.enable_versioning
|
|
561
614
|
|
|
562
615
|
if mode == WriteMode.APPEND and not plan["file_exists"]:
|
|
563
616
|
# Convert to CREATE mode
|
|
@@ -576,7 +629,7 @@ class DocumentWriterTool(BaseTool):
|
|
|
576
629
|
file_suffix = Path(target_path).suffix
|
|
577
630
|
|
|
578
631
|
backup_filename = f"{file_stem}_backup_{timestamp}{file_suffix}"
|
|
579
|
-
backup_path = os.path.join(self.
|
|
632
|
+
backup_path = os.path.join(self.config.backup_dir, backup_filename)
|
|
580
633
|
|
|
581
634
|
# Copy file to backup location
|
|
582
635
|
if self._is_cloud_storage_path(target_path):
|
|
@@ -715,7 +768,7 @@ class DocumentWriterTool(BaseTool):
|
|
|
715
768
|
def _handle_versioning(self, target_path: str, content_metadata: Dict, metadata: Optional[Dict]) -> Optional[Dict]:
|
|
716
769
|
"""Handle document versioning"""
|
|
717
770
|
|
|
718
|
-
if not self.
|
|
771
|
+
if not self.config.enable_versioning:
|
|
719
772
|
return None
|
|
720
773
|
|
|
721
774
|
try:
|
|
@@ -733,8 +786,8 @@ class DocumentWriterTool(BaseTool):
|
|
|
733
786
|
versions.append(version_info)
|
|
734
787
|
|
|
735
788
|
# Keep only recent versions
|
|
736
|
-
if len(versions) > self.
|
|
737
|
-
versions = versions[-self.
|
|
789
|
+
if len(versions) > self.config.max_backup_versions:
|
|
790
|
+
versions = versions[-self.config.max_backup_versions:]
|
|
738
791
|
|
|
739
792
|
self._save_version_history(version_file, versions)
|
|
740
793
|
|
|
@@ -1044,7 +1097,7 @@ class DocumentWriterTool(BaseTool):
|
|
|
1044
1097
|
|
|
1045
1098
|
# Log to audit file
|
|
1046
1099
|
try:
|
|
1047
|
-
audit_file = os.path.join(self.
|
|
1100
|
+
audit_file = os.path.join(self.config.temp_dir, "write_audit.log")
|
|
1048
1101
|
with open(audit_file, "a") as f:
|
|
1049
1102
|
f.write(json.dumps(audit_info) + "\n")
|
|
1050
1103
|
except Exception as e:
|
|
@@ -1514,7 +1567,7 @@ class DocumentWriterTool(BaseTool):
|
|
|
1514
1567
|
|
|
1515
1568
|
def _store_clipboard_content(self, content: str):
|
|
1516
1569
|
"""Store content in clipboard (simplified implementation)"""
|
|
1517
|
-
clipboard_file = os.path.join(self.
|
|
1570
|
+
clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
|
|
1518
1571
|
try:
|
|
1519
1572
|
with open(clipboard_file, 'w', encoding='utf-8') as f:
|
|
1520
1573
|
f.write(content)
|
|
@@ -1523,7 +1576,7 @@ class DocumentWriterTool(BaseTool):
|
|
|
1523
1576
|
|
|
1524
1577
|
def _get_clipboard_content(self) -> str:
|
|
1525
1578
|
"""Get content from clipboard"""
|
|
1526
|
-
clipboard_file = os.path.join(self.
|
|
1579
|
+
clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
|
|
1527
1580
|
try:
|
|
1528
1581
|
with open(clipboard_file, 'r', encoding='utf-8') as f:
|
|
1529
1582
|
return f.read()
|