aiecs 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (90) hide show
  1. aiecs/__init__.py +75 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +295 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +341 -0
  7. aiecs/config/__init__.py +15 -0
  8. aiecs/config/config.py +117 -0
  9. aiecs/config/registry.py +19 -0
  10. aiecs/core/__init__.py +46 -0
  11. aiecs/core/interface/__init__.py +34 -0
  12. aiecs/core/interface/execution_interface.py +150 -0
  13. aiecs/core/interface/storage_interface.py +214 -0
  14. aiecs/domain/__init__.py +20 -0
  15. aiecs/domain/context/__init__.py +28 -0
  16. aiecs/domain/context/content_engine.py +982 -0
  17. aiecs/domain/context/conversation_models.py +306 -0
  18. aiecs/domain/execution/__init__.py +12 -0
  19. aiecs/domain/execution/model.py +49 -0
  20. aiecs/domain/task/__init__.py +13 -0
  21. aiecs/domain/task/dsl_processor.py +460 -0
  22. aiecs/domain/task/model.py +50 -0
  23. aiecs/domain/task/task_context.py +257 -0
  24. aiecs/infrastructure/__init__.py +26 -0
  25. aiecs/infrastructure/messaging/__init__.py +13 -0
  26. aiecs/infrastructure/messaging/celery_task_manager.py +341 -0
  27. aiecs/infrastructure/messaging/websocket_manager.py +289 -0
  28. aiecs/infrastructure/monitoring/__init__.py +12 -0
  29. aiecs/infrastructure/monitoring/executor_metrics.py +138 -0
  30. aiecs/infrastructure/monitoring/structured_logger.py +50 -0
  31. aiecs/infrastructure/monitoring/tracing_manager.py +376 -0
  32. aiecs/infrastructure/persistence/__init__.py +12 -0
  33. aiecs/infrastructure/persistence/database_manager.py +286 -0
  34. aiecs/infrastructure/persistence/file_storage.py +671 -0
  35. aiecs/infrastructure/persistence/redis_client.py +162 -0
  36. aiecs/llm/__init__.py +54 -0
  37. aiecs/llm/base_client.py +99 -0
  38. aiecs/llm/client_factory.py +339 -0
  39. aiecs/llm/custom_callbacks.py +228 -0
  40. aiecs/llm/openai_client.py +125 -0
  41. aiecs/llm/vertex_client.py +186 -0
  42. aiecs/llm/xai_client.py +184 -0
  43. aiecs/main.py +351 -0
  44. aiecs/scripts/DEPENDENCY_SYSTEM_SUMMARY.md +241 -0
  45. aiecs/scripts/README_DEPENDENCY_CHECKER.md +309 -0
  46. aiecs/scripts/README_WEASEL_PATCH.md +126 -0
  47. aiecs/scripts/__init__.py +3 -0
  48. aiecs/scripts/dependency_checker.py +825 -0
  49. aiecs/scripts/dependency_fixer.py +348 -0
  50. aiecs/scripts/download_nlp_data.py +348 -0
  51. aiecs/scripts/fix_weasel_validator.py +121 -0
  52. aiecs/scripts/fix_weasel_validator.sh +82 -0
  53. aiecs/scripts/patch_weasel_library.sh +188 -0
  54. aiecs/scripts/quick_dependency_check.py +269 -0
  55. aiecs/scripts/run_weasel_patch.sh +41 -0
  56. aiecs/scripts/setup_nlp_data.sh +217 -0
  57. aiecs/tasks/__init__.py +2 -0
  58. aiecs/tasks/worker.py +111 -0
  59. aiecs/tools/__init__.py +196 -0
  60. aiecs/tools/base_tool.py +202 -0
  61. aiecs/tools/langchain_adapter.py +361 -0
  62. aiecs/tools/task_tools/__init__.py +82 -0
  63. aiecs/tools/task_tools/chart_tool.py +704 -0
  64. aiecs/tools/task_tools/classfire_tool.py +901 -0
  65. aiecs/tools/task_tools/image_tool.py +397 -0
  66. aiecs/tools/task_tools/office_tool.py +600 -0
  67. aiecs/tools/task_tools/pandas_tool.py +565 -0
  68. aiecs/tools/task_tools/report_tool.py +499 -0
  69. aiecs/tools/task_tools/research_tool.py +363 -0
  70. aiecs/tools/task_tools/scraper_tool.py +548 -0
  71. aiecs/tools/task_tools/search_api.py +7 -0
  72. aiecs/tools/task_tools/stats_tool.py +513 -0
  73. aiecs/tools/temp_file_manager.py +126 -0
  74. aiecs/tools/tool_executor/__init__.py +35 -0
  75. aiecs/tools/tool_executor/tool_executor.py +518 -0
  76. aiecs/utils/LLM_output_structor.py +409 -0
  77. aiecs/utils/__init__.py +23 -0
  78. aiecs/utils/base_callback.py +50 -0
  79. aiecs/utils/execution_utils.py +158 -0
  80. aiecs/utils/logging.py +1 -0
  81. aiecs/utils/prompt_loader.py +13 -0
  82. aiecs/utils/token_usage_repository.py +279 -0
  83. aiecs/ws/__init__.py +0 -0
  84. aiecs/ws/socket_server.py +41 -0
  85. aiecs-1.0.0.dist-info/METADATA +610 -0
  86. aiecs-1.0.0.dist-info/RECORD +90 -0
  87. aiecs-1.0.0.dist-info/WHEEL +5 -0
  88. aiecs-1.0.0.dist-info/entry_points.txt +7 -0
  89. aiecs-1.0.0.dist-info/licenses/LICENSE +225 -0
  90. aiecs-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,671 @@
1
+ """
2
+ File Storage Implementation with Google Cloud Storage
3
+
4
+ Provides file storage capabilities using Google Cloud Storage as the backend,
5
+ with support for local fallback and caching.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import logging
11
+ import asyncio
12
+ import aiofiles
13
+ from typing import Dict, List, Any, Optional, Union, BinaryIO
14
+ from datetime import datetime, timedelta
15
+ from pathlib import Path
16
+ import hashlib
17
+ import gzip
18
+ import pickle
19
+
20
+ try:
21
+ from google.cloud import storage
22
+ from google.cloud.exceptions import NotFound, GoogleCloudError
23
+ from google.auth.exceptions import DefaultCredentialsError
24
+ GCS_AVAILABLE = True
25
+ except ImportError:
26
+ GCS_AVAILABLE = False
27
+ storage = None
28
+ NotFound = Exception
29
+ GoogleCloudError = Exception
30
+ DefaultCredentialsError = Exception
31
+
32
+ from ..monitoring.executor_metrics import ExecutorMetrics
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class FileStorageError(Exception):
38
+ """Base exception for file storage operations."""
39
+ pass
40
+
41
+
42
+ class FileStorageConfig:
43
+ """Configuration for file storage."""
44
+
45
+ def __init__(self, config: Dict[str, Any]):
46
+ # Google Cloud Storage settings
47
+ self.gcs_bucket_name = config.get('gcs_bucket_name', 'multi-task-storage')
48
+ self.gcs_project_id = config.get('gcs_project_id')
49
+ self.gcs_credentials_path = config.get('gcs_credentials_path')
50
+ self.gcs_location = config.get('gcs_location', 'US')
51
+
52
+ # Local storage fallback
53
+ self.local_storage_path = config.get('local_storage_path', './storage')
54
+ self.enable_local_fallback = config.get('enable_local_fallback', True)
55
+
56
+ # Cache settings
57
+ self.enable_cache = config.get('enable_cache', True)
58
+ self.cache_ttl_seconds = config.get('cache_ttl_seconds', 3600)
59
+ self.max_cache_size_mb = config.get('max_cache_size_mb', 100)
60
+
61
+ # Performance settings
62
+ self.chunk_size = config.get('chunk_size', 8192)
63
+ self.max_retries = config.get('max_retries', 3)
64
+ self.timeout_seconds = config.get('timeout_seconds', 30)
65
+
66
+ # Compression settings
67
+ self.enable_compression = config.get('enable_compression', True)
68
+ self.compression_threshold_bytes = config.get('compression_threshold_bytes', 1024)
69
+
70
+ # Security settings
71
+ self.enable_encryption = config.get('enable_encryption', False)
72
+ self.encryption_key = config.get('encryption_key')
73
+
74
+
75
+ class FileStorage:
76
+ """
77
+ File storage implementation with Google Cloud Storage backend.
78
+
79
+ Features:
80
+ - Google Cloud Storage as primary backend
81
+ - Local filesystem fallback
82
+ - In-memory caching with TTL
83
+ - Automatic compression for large files
84
+ - Retry logic with exponential backoff
85
+ - Metrics collection
86
+ """
87
+
88
+ def __init__(self, config: Dict[str, Any]):
89
+ self.config = FileStorageConfig(config)
90
+ self._gcs_client = None
91
+ self._gcs_bucket = None
92
+ self._cache = {}
93
+ self._cache_timestamps = {}
94
+ self._initialized = False
95
+
96
+ # Metrics
97
+ self.metrics = ExecutorMetrics(enable_metrics=True)
98
+
99
+ # Ensure local storage directory exists
100
+ if self.config.enable_local_fallback:
101
+ Path(self.config.local_storage_path).mkdir(parents=True, exist_ok=True)
102
+
103
+ async def initialize(self) -> bool:
104
+ """
105
+ Initialize the file storage system.
106
+
107
+ Returns:
108
+ True if initialization was successful
109
+ """
110
+ try:
111
+ if GCS_AVAILABLE:
112
+ await self._init_gcs()
113
+ else:
114
+ logger.warning("Google Cloud Storage not available, using local storage only")
115
+
116
+ self._initialized = True
117
+ logger.info("File storage initialized successfully")
118
+ return True
119
+
120
+ except Exception as e:
121
+ logger.error(f"Failed to initialize file storage: {e}")
122
+ if not self.config.enable_local_fallback:
123
+ raise FileStorageError(f"Storage initialization failed: {e}")
124
+
125
+ logger.info("Falling back to local storage only")
126
+ self._initialized = True
127
+ return True
128
+
129
+ async def _init_gcs(self):
130
+ """Initialize Google Cloud Storage client."""
131
+ try:
132
+ # Set credentials if provided
133
+ if self.config.gcs_credentials_path:
134
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.config.gcs_credentials_path
135
+
136
+ # Create client
137
+ self._gcs_client = storage.Client(project=self.config.gcs_project_id)
138
+
139
+ # Get or create bucket
140
+ try:
141
+ self._gcs_bucket = self._gcs_client.bucket(self.config.gcs_bucket_name)
142
+ # Test bucket access
143
+ self._gcs_bucket.reload()
144
+ logger.info(f"Connected to GCS bucket: {self.config.gcs_bucket_name}")
145
+
146
+ except NotFound:
147
+ # Create bucket if it doesn't exist
148
+ self._gcs_bucket = self._gcs_client.create_bucket(
149
+ self.config.gcs_bucket_name,
150
+ location=self.config.gcs_location
151
+ )
152
+ logger.info(f"Created GCS bucket: {self.config.gcs_bucket_name}")
153
+
154
+ except DefaultCredentialsError:
155
+ logger.warning("GCS credentials not found, using local storage only")
156
+ self._gcs_client = None
157
+ self._gcs_bucket = None
158
+
159
+ except Exception as e:
160
+ logger.error(f"Failed to initialize GCS: {e}")
161
+ self._gcs_client = None
162
+ self._gcs_bucket = None
163
+
164
+ async def store(self, key: str, data: Union[str, bytes, Dict[str, Any]],
165
+ metadata: Optional[Dict[str, Any]] = None) -> bool:
166
+ """
167
+ Store data with the given key.
168
+
169
+ Args:
170
+ key: Storage key
171
+ data: Data to store
172
+ metadata: Optional metadata
173
+
174
+ Returns:
175
+ True if storage was successful
176
+ """
177
+ if not self._initialized:
178
+ await self.initialize()
179
+
180
+ start_time = datetime.utcnow()
181
+
182
+ try:
183
+ # Serialize data
184
+ serialized_data = await self._serialize_data(data)
185
+
186
+ # Compress if enabled and data is large enough
187
+ if (self.config.enable_compression and
188
+ len(serialized_data) > self.config.compression_threshold_bytes):
189
+ serialized_data = gzip.compress(serialized_data)
190
+ compressed = True
191
+ else:
192
+ compressed = False
193
+
194
+ # Store in cache
195
+ if self.config.enable_cache:
196
+ self._cache[key] = {
197
+ 'data': data,
198
+ 'metadata': metadata,
199
+ 'compressed': compressed
200
+ }
201
+ self._cache_timestamps[key] = datetime.utcnow()
202
+ await self._cleanup_cache()
203
+
204
+ # Store in GCS if available
205
+ if self._gcs_bucket:
206
+ success = await self._store_gcs(key, serialized_data, metadata, compressed)
207
+ if success:
208
+ self.metrics.record_operation('gcs_store_success', 1)
209
+ duration = (datetime.utcnow() - start_time).total_seconds()
210
+ self.metrics.record_duration('gcs_store_duration', duration)
211
+ return True
212
+
213
+ # Fallback to local storage
214
+ if self.config.enable_local_fallback:
215
+ success = await self._store_local(key, serialized_data, metadata, compressed)
216
+ if success:
217
+ self.metrics.record_operation('local_store_success', 1)
218
+ duration = (datetime.utcnow() - start_time).total_seconds()
219
+ self.metrics.record_duration('local_store_duration', duration)
220
+ return True
221
+
222
+ self.metrics.record_operation('store_failure', 1)
223
+ return False
224
+
225
+ except Exception as e:
226
+ logger.error(f"Failed to store data for key {key}: {e}")
227
+ self.metrics.record_operation('store_error', 1)
228
+ raise FileStorageError(f"Storage failed: {e}")
229
+
230
+ async def retrieve(self, key: str) -> Optional[Union[str, bytes, Dict[str, Any]]]:
231
+ """
232
+ Retrieve data by key.
233
+
234
+ Args:
235
+ key: Storage key
236
+
237
+ Returns:
238
+ The stored data if found, None otherwise
239
+ """
240
+ if not self._initialized:
241
+ await self.initialize()
242
+
243
+ start_time = datetime.utcnow()
244
+
245
+ try:
246
+ # Check cache first
247
+ if self.config.enable_cache and key in self._cache:
248
+ cache_time = self._cache_timestamps.get(key)
249
+ if cache_time and (datetime.utcnow() - cache_time).total_seconds() < self.config.cache_ttl_seconds:
250
+ self.metrics.record_operation('cache_hit', 1)
251
+ return self._cache[key]['data']
252
+ else:
253
+ # Remove expired cache entry
254
+ self._cache.pop(key, None)
255
+ self._cache_timestamps.pop(key, None)
256
+
257
+ # Try GCS first
258
+ if self._gcs_bucket:
259
+ data = await self._retrieve_gcs(key)
260
+ if data is not None:
261
+ self.metrics.record_operation('gcs_retrieve_success', 1)
262
+ duration = (datetime.utcnow() - start_time).total_seconds()
263
+ self.metrics.record_duration('gcs_retrieve_duration', duration)
264
+
265
+ # Update cache
266
+ if self.config.enable_cache:
267
+ self._cache[key] = {'data': data, 'metadata': {}}
268
+ self._cache_timestamps[key] = datetime.utcnow()
269
+
270
+ return data
271
+
272
+ # Fallback to local storage
273
+ if self.config.enable_local_fallback:
274
+ data = await self._retrieve_local(key)
275
+ if data is not None:
276
+ self.metrics.record_operation('local_retrieve_success', 1)
277
+ duration = (datetime.utcnow() - start_time).total_seconds()
278
+ self.metrics.record_duration('local_retrieve_duration', duration)
279
+
280
+ # Update cache
281
+ if self.config.enable_cache:
282
+ self._cache[key] = {'data': data, 'metadata': {}}
283
+ self._cache_timestamps[key] = datetime.utcnow()
284
+
285
+ return data
286
+
287
+ self.metrics.record_operation('retrieve_not_found', 1)
288
+ return None
289
+
290
+ except Exception as e:
291
+ logger.error(f"Failed to retrieve data for key {key}: {e}")
292
+ self.metrics.record_operation('retrieve_error', 1)
293
+ raise FileStorageError(f"Retrieval failed: {e}")
294
+
295
+ async def delete(self, key: str) -> bool:
296
+ """
297
+ Delete data by key.
298
+
299
+ Args:
300
+ key: Storage key
301
+
302
+ Returns:
303
+ True if deletion was successful
304
+ """
305
+ if not self._initialized:
306
+ await self.initialize()
307
+
308
+ try:
309
+ success = True
310
+
311
+ # Remove from cache
312
+ if self.config.enable_cache:
313
+ self._cache.pop(key, None)
314
+ self._cache_timestamps.pop(key, None)
315
+
316
+ # Delete from GCS
317
+ if self._gcs_bucket:
318
+ gcs_success = await self._delete_gcs(key)
319
+ if gcs_success:
320
+ self.metrics.record_operation('gcs_delete_success', 1)
321
+ else:
322
+ success = False
323
+
324
+ # Delete from local storage
325
+ if self.config.enable_local_fallback:
326
+ local_success = await self._delete_local(key)
327
+ if local_success:
328
+ self.metrics.record_operation('local_delete_success', 1)
329
+ else:
330
+ success = False
331
+
332
+ if success:
333
+ self.metrics.record_operation('delete_success', 1)
334
+ else:
335
+ self.metrics.record_operation('delete_failure', 1)
336
+
337
+ return success
338
+
339
+ except Exception as e:
340
+ logger.error(f"Failed to delete data for key {key}: {e}")
341
+ self.metrics.record_operation('delete_error', 1)
342
+ raise FileStorageError(f"Deletion failed: {e}")
343
+
344
+ async def exists(self, key: str) -> bool:
345
+ """
346
+ Check if data exists for the given key.
347
+
348
+ Args:
349
+ key: Storage key
350
+
351
+ Returns:
352
+ True if data exists
353
+ """
354
+ if not self._initialized:
355
+ await self.initialize()
356
+
357
+ try:
358
+ # Check cache first
359
+ if self.config.enable_cache and key in self._cache:
360
+ cache_time = self._cache_timestamps.get(key)
361
+ if cache_time and (datetime.utcnow() - cache_time).total_seconds() < self.config.cache_ttl_seconds:
362
+ return True
363
+
364
+ # Check GCS
365
+ if self._gcs_bucket:
366
+ if await self._exists_gcs(key):
367
+ return True
368
+
369
+ # Check local storage
370
+ if self.config.enable_local_fallback:
371
+ return await self._exists_local(key)
372
+
373
+ return False
374
+
375
+ except Exception as e:
376
+ logger.error(f"Failed to check existence for key {key}: {e}")
377
+ raise FileStorageError(f"Existence check failed: {e}")
378
+
379
+ async def list_keys(self, prefix: Optional[str] = None, limit: Optional[int] = None) -> List[str]:
380
+ """
381
+ List storage keys with optional prefix filtering.
382
+
383
+ Args:
384
+ prefix: Optional key prefix filter
385
+ limit: Maximum number of keys to return
386
+
387
+ Returns:
388
+ List of storage keys
389
+ """
390
+ if not self._initialized:
391
+ await self.initialize()
392
+
393
+ try:
394
+ keys = set()
395
+
396
+ # Get keys from GCS
397
+ if self._gcs_bucket:
398
+ gcs_keys = await self._list_keys_gcs(prefix, limit)
399
+ keys.update(gcs_keys)
400
+
401
+ # Get keys from local storage
402
+ if self.config.enable_local_fallback:
403
+ local_keys = await self._list_keys_local(prefix, limit)
404
+ keys.update(local_keys)
405
+
406
+ # Apply limit if specified
407
+ keys_list = list(keys)
408
+ if limit:
409
+ keys_list = keys_list[:limit]
410
+
411
+ return keys_list
412
+
413
+ except Exception as e:
414
+ logger.error(f"Failed to list keys: {e}")
415
+ raise FileStorageError(f"Key listing failed: {e}")
416
+
417
+ # GCS implementation methods
418
+
419
+ async def _store_gcs(self, key: str, data: bytes, metadata: Optional[Dict[str, Any]], compressed: bool) -> bool:
420
+ """Store data in Google Cloud Storage."""
421
+ try:
422
+ blob = self._gcs_bucket.blob(key)
423
+
424
+ # Set metadata
425
+ if metadata:
426
+ blob.metadata = metadata
427
+ if compressed:
428
+ blob.content_encoding = 'gzip'
429
+
430
+ # Upload data
431
+ blob.upload_from_string(data)
432
+ return True
433
+
434
+ except Exception as e:
435
+ logger.error(f"GCS store failed for key {key}: {e}")
436
+ return False
437
+
438
+ async def _retrieve_gcs(self, key: str) -> Optional[Any]:
439
+ """Retrieve data from Google Cloud Storage."""
440
+ try:
441
+ blob = self._gcs_bucket.blob(key)
442
+
443
+ if not blob.exists():
444
+ return None
445
+
446
+ # Download data
447
+ data = blob.download_as_bytes()
448
+
449
+ # Decompress if needed
450
+ if blob.content_encoding == 'gzip':
451
+ data = gzip.decompress(data)
452
+
453
+ # Deserialize data
454
+ return await self._deserialize_data(data)
455
+
456
+ except NotFound:
457
+ return None
458
+ except Exception as e:
459
+ logger.error(f"GCS retrieve failed for key {key}: {e}")
460
+ return None
461
+
462
+ async def _delete_gcs(self, key: str) -> bool:
463
+ """Delete data from Google Cloud Storage."""
464
+ try:
465
+ blob = self._gcs_bucket.blob(key)
466
+ blob.delete()
467
+ return True
468
+
469
+ except NotFound:
470
+ return True # Already deleted
471
+ except Exception as e:
472
+ logger.error(f"GCS delete failed for key {key}: {e}")
473
+ return False
474
+
475
+ async def _exists_gcs(self, key: str) -> bool:
476
+ """Check if data exists in Google Cloud Storage."""
477
+ try:
478
+ blob = self._gcs_bucket.blob(key)
479
+ return blob.exists()
480
+
481
+ except Exception as e:
482
+ logger.error(f"GCS exists check failed for key {key}: {e}")
483
+ return False
484
+
485
+ async def _list_keys_gcs(self, prefix: Optional[str], limit: Optional[int]) -> List[str]:
486
+ """List keys from Google Cloud Storage."""
487
+ try:
488
+ blobs = self._gcs_bucket.list_blobs(prefix=prefix, max_results=limit)
489
+ return [blob.name for blob in blobs]
490
+
491
+ except Exception as e:
492
+ logger.error(f"GCS list keys failed: {e}")
493
+ return []
494
+
495
+ # Local storage implementation methods
496
+
497
+ async def _store_local(self, key: str, data: bytes, metadata: Optional[Dict[str, Any]], compressed: bool) -> bool:
498
+ """Store data in local filesystem."""
499
+ try:
500
+ file_path = Path(self.config.local_storage_path) / key
501
+ file_path.parent.mkdir(parents=True, exist_ok=True)
502
+
503
+ async with aiofiles.open(file_path, 'wb') as f:
504
+ await f.write(data)
505
+
506
+ # Store metadata separately
507
+ if metadata:
508
+ metadata_path = file_path.with_suffix('.metadata')
509
+ metadata_with_compression = {**metadata, 'compressed': compressed}
510
+ async with aiofiles.open(metadata_path, 'w') as f:
511
+ await f.write(json.dumps(metadata_with_compression))
512
+
513
+ return True
514
+
515
+ except Exception as e:
516
+ logger.error(f"Local store failed for key {key}: {e}")
517
+ return False
518
+
519
+ async def _retrieve_local(self, key: str) -> Optional[Any]:
520
+ """Retrieve data from local filesystem."""
521
+ try:
522
+ file_path = Path(self.config.local_storage_path) / key
523
+
524
+ if not file_path.exists():
525
+ return None
526
+
527
+ async with aiofiles.open(file_path, 'rb') as f:
528
+ data = await f.read()
529
+
530
+ # Check for compression metadata
531
+ metadata_path = file_path.with_suffix('.metadata')
532
+ compressed = False
533
+ if metadata_path.exists():
534
+ async with aiofiles.open(metadata_path, 'r') as f:
535
+ metadata = json.loads(await f.read())
536
+ compressed = metadata.get('compressed', False)
537
+
538
+ # Decompress if needed
539
+ if compressed:
540
+ data = gzip.decompress(data)
541
+
542
+ # Deserialize data
543
+ return await self._deserialize_data(data)
544
+
545
+ except Exception as e:
546
+ logger.error(f"Local retrieve failed for key {key}: {e}")
547
+ return None
548
+
549
+ async def _delete_local(self, key: str) -> bool:
550
+ """Delete data from local filesystem."""
551
+ try:
552
+ file_path = Path(self.config.local_storage_path) / key
553
+ metadata_path = file_path.with_suffix('.metadata')
554
+
555
+ success = True
556
+ if file_path.exists():
557
+ file_path.unlink()
558
+
559
+ if metadata_path.exists():
560
+ metadata_path.unlink()
561
+
562
+ return success
563
+
564
+ except Exception as e:
565
+ logger.error(f"Local delete failed for key {key}: {e}")
566
+ return False
567
+
568
+ async def _exists_local(self, key: str) -> bool:
569
+ """Check if data exists in local filesystem."""
570
+ try:
571
+ file_path = Path(self.config.local_storage_path) / key
572
+ return file_path.exists()
573
+
574
+ except Exception as e:
575
+ logger.error(f"Local exists check failed for key {key}: {e}")
576
+ return False
577
+
578
+ async def _list_keys_local(self, prefix: Optional[str], limit: Optional[int]) -> List[str]:
579
+ """List keys from local filesystem."""
580
+ try:
581
+ storage_path = Path(self.config.local_storage_path)
582
+ if not storage_path.exists():
583
+ return []
584
+
585
+ keys = []
586
+ for file_path in storage_path.rglob('*'):
587
+ if file_path.is_file() and not file_path.name.endswith('.metadata'):
588
+ key = str(file_path.relative_to(storage_path))
589
+ if not prefix or key.startswith(prefix):
590
+ keys.append(key)
591
+ if limit and len(keys) >= limit:
592
+ break
593
+
594
+ return keys
595
+
596
+ except Exception as e:
597
+ logger.error(f"Local list keys failed: {e}")
598
+ return []
599
+
600
+ # Utility methods
601
+
602
+ async def _serialize_data(self, data: Union[str, bytes, Dict[str, Any]]) -> bytes:
603
+ """Serialize data for storage."""
604
+ if isinstance(data, bytes):
605
+ return data
606
+ elif isinstance(data, str):
607
+ return data.encode('utf-8')
608
+ else:
609
+ # Use pickle for complex objects
610
+ return pickle.dumps(data)
611
+
612
+ async def _deserialize_data(self, data: bytes) -> Any:
613
+ """Deserialize data from storage."""
614
+ try:
615
+ # Try to deserialize as pickle first
616
+ return pickle.loads(data)
617
+ except:
618
+ try:
619
+ # Try as JSON
620
+ return json.loads(data.decode('utf-8'))
621
+ except:
622
+ # Return as string
623
+ return data.decode('utf-8')
624
+
625
+ async def _cleanup_cache(self):
626
+ """Clean up expired cache entries."""
627
+ if not self.config.enable_cache:
628
+ return
629
+
630
+ current_time = datetime.utcnow()
631
+ expired_keys = []
632
+
633
+ for key, timestamp in self._cache_timestamps.items():
634
+ if (current_time - timestamp).total_seconds() > self.config.cache_ttl_seconds:
635
+ expired_keys.append(key)
636
+
637
+ for key in expired_keys:
638
+ self._cache.pop(key, None)
639
+ self._cache_timestamps.pop(key, None)
640
+
641
+ def get_stats(self) -> Dict[str, Any]:
642
+ """Get storage statistics."""
643
+ return {
644
+ 'initialized': self._initialized,
645
+ 'gcs_available': self._gcs_bucket is not None,
646
+ 'local_fallback_enabled': self.config.enable_local_fallback,
647
+ 'cache_enabled': self.config.enable_cache,
648
+ 'cache_size': len(self._cache),
649
+ 'metrics': self.metrics.get_metrics_summary() if hasattr(self.metrics, 'get_metrics_summary') else {}
650
+ }
651
+
652
+
653
+ # Global instance
654
+ _file_storage_instance = None
655
+
656
+ def get_file_storage(config: Optional[Dict[str, Any]] = None) -> FileStorage:
657
+ """Get the global file storage instance."""
658
+ global _file_storage_instance
659
+ if _file_storage_instance is None:
660
+ if config is None:
661
+ from aiecs.config.config import get_settings
662
+ settings = get_settings()
663
+ config = settings.file_storage_config
664
+ _file_storage_instance = FileStorage(config)
665
+ return _file_storage_instance
666
+
667
+ async def initialize_file_storage(config: Optional[Dict[str, Any]] = None) -> FileStorage:
668
+ """Initialize and return the file storage instance."""
669
+ storage = get_file_storage(config)
670
+ await storage.initialize()
671
+ return storage