genxai-framework 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +3 -0
- cli/commands/__init__.py +6 -0
- cli/commands/approval.py +85 -0
- cli/commands/audit.py +127 -0
- cli/commands/metrics.py +25 -0
- cli/commands/tool.py +389 -0
- cli/main.py +32 -0
- genxai/__init__.py +81 -0
- genxai/api/__init__.py +5 -0
- genxai/api/app.py +21 -0
- genxai/config/__init__.py +5 -0
- genxai/config/settings.py +37 -0
- genxai/connectors/__init__.py +19 -0
- genxai/connectors/base.py +122 -0
- genxai/connectors/kafka.py +92 -0
- genxai/connectors/postgres_cdc.py +95 -0
- genxai/connectors/registry.py +44 -0
- genxai/connectors/sqs.py +94 -0
- genxai/connectors/webhook.py +73 -0
- genxai/core/__init__.py +37 -0
- genxai/core/agent/__init__.py +32 -0
- genxai/core/agent/base.py +206 -0
- genxai/core/agent/config_io.py +59 -0
- genxai/core/agent/registry.py +98 -0
- genxai/core/agent/runtime.py +970 -0
- genxai/core/communication/__init__.py +6 -0
- genxai/core/communication/collaboration.py +44 -0
- genxai/core/communication/message_bus.py +192 -0
- genxai/core/communication/protocols.py +35 -0
- genxai/core/execution/__init__.py +22 -0
- genxai/core/execution/metadata.py +181 -0
- genxai/core/execution/queue.py +201 -0
- genxai/core/graph/__init__.py +30 -0
- genxai/core/graph/checkpoints.py +77 -0
- genxai/core/graph/edges.py +131 -0
- genxai/core/graph/engine.py +813 -0
- genxai/core/graph/executor.py +516 -0
- genxai/core/graph/nodes.py +161 -0
- genxai/core/graph/trigger_runner.py +40 -0
- genxai/core/memory/__init__.py +19 -0
- genxai/core/memory/base.py +72 -0
- genxai/core/memory/embedding.py +327 -0
- genxai/core/memory/episodic.py +448 -0
- genxai/core/memory/long_term.py +467 -0
- genxai/core/memory/manager.py +543 -0
- genxai/core/memory/persistence.py +297 -0
- genxai/core/memory/procedural.py +461 -0
- genxai/core/memory/semantic.py +526 -0
- genxai/core/memory/shared.py +62 -0
- genxai/core/memory/short_term.py +303 -0
- genxai/core/memory/vector_store.py +508 -0
- genxai/core/memory/working.py +211 -0
- genxai/core/state/__init__.py +6 -0
- genxai/core/state/manager.py +293 -0
- genxai/core/state/schema.py +115 -0
- genxai/llm/__init__.py +14 -0
- genxai/llm/base.py +150 -0
- genxai/llm/factory.py +329 -0
- genxai/llm/providers/__init__.py +1 -0
- genxai/llm/providers/anthropic.py +249 -0
- genxai/llm/providers/cohere.py +274 -0
- genxai/llm/providers/google.py +334 -0
- genxai/llm/providers/ollama.py +147 -0
- genxai/llm/providers/openai.py +257 -0
- genxai/llm/routing.py +83 -0
- genxai/observability/__init__.py +6 -0
- genxai/observability/logging.py +327 -0
- genxai/observability/metrics.py +494 -0
- genxai/observability/tracing.py +372 -0
- genxai/performance/__init__.py +39 -0
- genxai/performance/cache.py +256 -0
- genxai/performance/pooling.py +289 -0
- genxai/security/audit.py +304 -0
- genxai/security/auth.py +315 -0
- genxai/security/cost_control.py +528 -0
- genxai/security/default_policies.py +44 -0
- genxai/security/jwt.py +142 -0
- genxai/security/oauth.py +226 -0
- genxai/security/pii.py +366 -0
- genxai/security/policy_engine.py +82 -0
- genxai/security/rate_limit.py +341 -0
- genxai/security/rbac.py +247 -0
- genxai/security/validation.py +218 -0
- genxai/tools/__init__.py +21 -0
- genxai/tools/base.py +383 -0
- genxai/tools/builtin/__init__.py +131 -0
- genxai/tools/builtin/communication/__init__.py +15 -0
- genxai/tools/builtin/communication/email_sender.py +159 -0
- genxai/tools/builtin/communication/notification_manager.py +167 -0
- genxai/tools/builtin/communication/slack_notifier.py +118 -0
- genxai/tools/builtin/communication/sms_sender.py +118 -0
- genxai/tools/builtin/communication/webhook_caller.py +136 -0
- genxai/tools/builtin/computation/__init__.py +15 -0
- genxai/tools/builtin/computation/calculator.py +101 -0
- genxai/tools/builtin/computation/code_executor.py +183 -0
- genxai/tools/builtin/computation/data_validator.py +259 -0
- genxai/tools/builtin/computation/hash_generator.py +129 -0
- genxai/tools/builtin/computation/regex_matcher.py +201 -0
- genxai/tools/builtin/data/__init__.py +15 -0
- genxai/tools/builtin/data/csv_processor.py +213 -0
- genxai/tools/builtin/data/data_transformer.py +299 -0
- genxai/tools/builtin/data/json_processor.py +233 -0
- genxai/tools/builtin/data/text_analyzer.py +288 -0
- genxai/tools/builtin/data/xml_processor.py +175 -0
- genxai/tools/builtin/database/__init__.py +15 -0
- genxai/tools/builtin/database/database_inspector.py +157 -0
- genxai/tools/builtin/database/mongodb_query.py +196 -0
- genxai/tools/builtin/database/redis_cache.py +167 -0
- genxai/tools/builtin/database/sql_query.py +145 -0
- genxai/tools/builtin/database/vector_search.py +163 -0
- genxai/tools/builtin/file/__init__.py +17 -0
- genxai/tools/builtin/file/directory_scanner.py +214 -0
- genxai/tools/builtin/file/file_compressor.py +237 -0
- genxai/tools/builtin/file/file_reader.py +102 -0
- genxai/tools/builtin/file/file_writer.py +122 -0
- genxai/tools/builtin/file/image_processor.py +186 -0
- genxai/tools/builtin/file/pdf_parser.py +144 -0
- genxai/tools/builtin/test/__init__.py +15 -0
- genxai/tools/builtin/test/async_simulator.py +62 -0
- genxai/tools/builtin/test/data_transformer.py +99 -0
- genxai/tools/builtin/test/error_generator.py +82 -0
- genxai/tools/builtin/test/simple_math.py +94 -0
- genxai/tools/builtin/test/string_processor.py +72 -0
- genxai/tools/builtin/web/__init__.py +15 -0
- genxai/tools/builtin/web/api_caller.py +161 -0
- genxai/tools/builtin/web/html_parser.py +330 -0
- genxai/tools/builtin/web/http_client.py +187 -0
- genxai/tools/builtin/web/url_validator.py +162 -0
- genxai/tools/builtin/web/web_scraper.py +170 -0
- genxai/tools/custom/my_test_tool_2.py +9 -0
- genxai/tools/dynamic.py +105 -0
- genxai/tools/mcp_server.py +167 -0
- genxai/tools/persistence/__init__.py +6 -0
- genxai/tools/persistence/models.py +55 -0
- genxai/tools/persistence/service.py +322 -0
- genxai/tools/registry.py +227 -0
- genxai/tools/security/__init__.py +11 -0
- genxai/tools/security/limits.py +214 -0
- genxai/tools/security/policy.py +20 -0
- genxai/tools/security/sandbox.py +248 -0
- genxai/tools/templates.py +435 -0
- genxai/triggers/__init__.py +19 -0
- genxai/triggers/base.py +104 -0
- genxai/triggers/file_watcher.py +75 -0
- genxai/triggers/queue.py +68 -0
- genxai/triggers/registry.py +82 -0
- genxai/triggers/schedule.py +66 -0
- genxai/triggers/webhook.py +68 -0
- genxai/utils/__init__.py +1 -0
- genxai/utils/tokens.py +295 -0
- genxai_framework-0.1.0.dist-info/METADATA +495 -0
- genxai_framework-0.1.0.dist-info/RECORD +156 -0
- genxai_framework-0.1.0.dist-info/WHEEL +5 -0
- genxai_framework-0.1.0.dist-info/entry_points.txt +2 -0
- genxai_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
- genxai_framework-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Vector search tool for semantic similarity search."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class VectorSearchTool(Tool):
|
|
12
|
+
"""Perform semantic similarity search in vector databases."""
|
|
13
|
+
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
"""Initialize vector search tool."""
|
|
16
|
+
metadata = ToolMetadata(
|
|
17
|
+
name="vector_search",
|
|
18
|
+
description="Search vector databases for semantic similarity (supports in-memory vectors)",
|
|
19
|
+
category=ToolCategory.DATABASE,
|
|
20
|
+
tags=["vector", "semantic", "search", "similarity", "embeddings"],
|
|
21
|
+
version="1.0.0",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
parameters = [
|
|
25
|
+
ToolParameter(
|
|
26
|
+
name="query",
|
|
27
|
+
type="string",
|
|
28
|
+
description="Search query text",
|
|
29
|
+
required=True,
|
|
30
|
+
),
|
|
31
|
+
ToolParameter(
|
|
32
|
+
name="vectors",
|
|
33
|
+
type="object",
|
|
34
|
+
description="Dictionary of id:vector pairs to search (for in-memory search)",
|
|
35
|
+
required=True,
|
|
36
|
+
),
|
|
37
|
+
ToolParameter(
|
|
38
|
+
name="top_k",
|
|
39
|
+
type="number",
|
|
40
|
+
description="Number of top results to return",
|
|
41
|
+
required=False,
|
|
42
|
+
default=5,
|
|
43
|
+
min_value=1,
|
|
44
|
+
max_value=100,
|
|
45
|
+
),
|
|
46
|
+
ToolParameter(
|
|
47
|
+
name="threshold",
|
|
48
|
+
type="number",
|
|
49
|
+
description="Minimum similarity threshold (0-1)",
|
|
50
|
+
required=False,
|
|
51
|
+
default=0.0,
|
|
52
|
+
min_value=0.0,
|
|
53
|
+
max_value=1.0,
|
|
54
|
+
),
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
super().__init__(metadata, parameters)
|
|
58
|
+
|
|
59
|
+
async def _execute(
|
|
60
|
+
self,
|
|
61
|
+
query: str,
|
|
62
|
+
vectors: Dict[str, List[float]],
|
|
63
|
+
top_k: int = 5,
|
|
64
|
+
threshold: float = 0.0,
|
|
65
|
+
) -> Dict[str, Any]:
|
|
66
|
+
"""Execute vector search.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
query: Search query
|
|
70
|
+
vectors: Dictionary of vectors to search
|
|
71
|
+
top_k: Number of results
|
|
72
|
+
threshold: Similarity threshold
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dictionary containing search results
|
|
76
|
+
"""
|
|
77
|
+
result: Dict[str, Any] = {
|
|
78
|
+
"query": query,
|
|
79
|
+
"success": False,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
# Simple in-memory vector search using cosine similarity
|
|
84
|
+
# In production, this would connect to Pinecone, Weaviate, etc.
|
|
85
|
+
|
|
86
|
+
# For demo purposes, we'll use a simple similarity calculation
|
|
87
|
+
# Assuming query is already a vector or we have a simple embedding
|
|
88
|
+
|
|
89
|
+
# Convert query to simple vector (character-based for demo)
|
|
90
|
+
query_vector = self._text_to_simple_vector(query)
|
|
91
|
+
|
|
92
|
+
# Calculate similarities
|
|
93
|
+
similarities = []
|
|
94
|
+
for doc_id, doc_vector in vectors.items():
|
|
95
|
+
similarity = self._cosine_similarity(query_vector, doc_vector)
|
|
96
|
+
if similarity >= threshold:
|
|
97
|
+
similarities.append({
|
|
98
|
+
"id": doc_id,
|
|
99
|
+
"similarity": similarity,
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
# Sort by similarity
|
|
103
|
+
similarities.sort(key=lambda x: x["similarity"], reverse=True)
|
|
104
|
+
|
|
105
|
+
# Return top k
|
|
106
|
+
top_results = similarities[:top_k]
|
|
107
|
+
|
|
108
|
+
result.update({
|
|
109
|
+
"results": top_results,
|
|
110
|
+
"count": len(top_results),
|
|
111
|
+
"total_searched": len(vectors),
|
|
112
|
+
"success": True,
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
except Exception as e:
|
|
116
|
+
result["error"] = str(e)
|
|
117
|
+
|
|
118
|
+
logger.info(f"Vector search completed: success={result['success']}")
|
|
119
|
+
return result
|
|
120
|
+
|
|
121
|
+
def _text_to_simple_vector(self, text: str, dim: int = 128) -> List[float]:
|
|
122
|
+
"""Convert text to simple vector (demo implementation).
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
text: Input text
|
|
126
|
+
dim: Vector dimension
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Vector representation
|
|
130
|
+
"""
|
|
131
|
+
# Simple character-based vectorization for demo
|
|
132
|
+
vector = [0.0] * dim
|
|
133
|
+
for i, char in enumerate(text[:dim]):
|
|
134
|
+
vector[i] = ord(char) / 255.0
|
|
135
|
+
return vector
|
|
136
|
+
|
|
137
|
+
def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
|
138
|
+
"""Calculate cosine similarity between two vectors.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
vec1: First vector
|
|
142
|
+
vec2: Second vector
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Similarity score (0-1)
|
|
146
|
+
"""
|
|
147
|
+
# Ensure same length
|
|
148
|
+
min_len = min(len(vec1), len(vec2))
|
|
149
|
+
vec1 = vec1[:min_len]
|
|
150
|
+
vec2 = vec2[:min_len]
|
|
151
|
+
|
|
152
|
+
# Calculate dot product
|
|
153
|
+
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
|
154
|
+
|
|
155
|
+
# Calculate magnitudes
|
|
156
|
+
mag1 = sum(a * a for a in vec1) ** 0.5
|
|
157
|
+
mag2 = sum(b * b for b in vec2) ** 0.5
|
|
158
|
+
|
|
159
|
+
# Avoid division by zero
|
|
160
|
+
if mag1 == 0 or mag2 == 0:
|
|
161
|
+
return 0.0
|
|
162
|
+
|
|
163
|
+
return dot_product / (mag1 * mag2)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""File tools for GenXAI."""
|
|
2
|
+
|
|
3
|
+
from genxai.tools.builtin.file.file_reader import FileReaderTool
|
|
4
|
+
from genxai.tools.builtin.file.file_writer import FileWriterTool
|
|
5
|
+
from genxai.tools.builtin.file.pdf_parser import PDFParserTool
|
|
6
|
+
from genxai.tools.builtin.file.image_processor import ImageProcessorTool
|
|
7
|
+
from genxai.tools.builtin.file.file_compressor import FileCompressorTool
|
|
8
|
+
from genxai.tools.builtin.file.directory_scanner import DirectoryScannerTool
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"FileReaderTool",
|
|
12
|
+
"FileWriterTool",
|
|
13
|
+
"PDFParserTool",
|
|
14
|
+
"ImageProcessorTool",
|
|
15
|
+
"FileCompressorTool",
|
|
16
|
+
"DirectoryScannerTool",
|
|
17
|
+
]
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Directory scanner tool for scanning and analyzing directory structures."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DirectoryScannerTool(Tool):
|
|
14
|
+
"""Scan directories and analyze file structures."""
|
|
15
|
+
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
"""Initialize directory scanner tool."""
|
|
18
|
+
metadata = ToolMetadata(
|
|
19
|
+
name="directory_scanner",
|
|
20
|
+
description="Scan directories, list files, and analyze directory structures",
|
|
21
|
+
category=ToolCategory.FILE,
|
|
22
|
+
tags=["directory", "scan", "files", "structure", "analysis"],
|
|
23
|
+
version="1.0.0",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
parameters = [
|
|
27
|
+
ToolParameter(
|
|
28
|
+
name="path",
|
|
29
|
+
type="string",
|
|
30
|
+
description="Path to directory to scan",
|
|
31
|
+
required=True,
|
|
32
|
+
),
|
|
33
|
+
ToolParameter(
|
|
34
|
+
name="recursive",
|
|
35
|
+
type="boolean",
|
|
36
|
+
description="Whether to scan recursively",
|
|
37
|
+
required=False,
|
|
38
|
+
default=True,
|
|
39
|
+
),
|
|
40
|
+
ToolParameter(
|
|
41
|
+
name="include_hidden",
|
|
42
|
+
type="boolean",
|
|
43
|
+
description="Whether to include hidden files",
|
|
44
|
+
required=False,
|
|
45
|
+
default=False,
|
|
46
|
+
),
|
|
47
|
+
ToolParameter(
|
|
48
|
+
name="file_pattern",
|
|
49
|
+
type="string",
|
|
50
|
+
description="File pattern to match (e.g., '*.py', '*.txt')",
|
|
51
|
+
required=False,
|
|
52
|
+
pattern=r"^\*?\.\w+$",
|
|
53
|
+
),
|
|
54
|
+
ToolParameter(
|
|
55
|
+
name="max_depth",
|
|
56
|
+
type="number",
|
|
57
|
+
description="Maximum depth for recursive scan",
|
|
58
|
+
required=False,
|
|
59
|
+
default=10,
|
|
60
|
+
min_value=1,
|
|
61
|
+
max_value=50,
|
|
62
|
+
),
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
super().__init__(metadata, parameters)
|
|
66
|
+
|
|
67
|
+
async def _execute(
|
|
68
|
+
self,
|
|
69
|
+
path: str,
|
|
70
|
+
recursive: bool = True,
|
|
71
|
+
include_hidden: bool = False,
|
|
72
|
+
file_pattern: str = None,
|
|
73
|
+
max_depth: int = 10,
|
|
74
|
+
) -> Dict[str, Any]:
|
|
75
|
+
"""Execute directory scanning.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
directory_path: Directory to scan
|
|
79
|
+
recursive: Recursive scan flag
|
|
80
|
+
include_hidden: Include hidden files flag
|
|
81
|
+
file_pattern: File pattern to match
|
|
82
|
+
max_depth: Maximum depth
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Dictionary containing scan results
|
|
86
|
+
"""
|
|
87
|
+
result: Dict[str, Any] = {
|
|
88
|
+
"path": path,
|
|
89
|
+
"success": False,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
if not os.path.exists(path):
|
|
94
|
+
raise FileNotFoundError(f"Directory not found: {path}")
|
|
95
|
+
|
|
96
|
+
if not os.path.isdir(path):
|
|
97
|
+
raise ValueError(f"Path is not a directory: {path}")
|
|
98
|
+
|
|
99
|
+
# Scan directory
|
|
100
|
+
files = []
|
|
101
|
+
directories = []
|
|
102
|
+
total_size = 0
|
|
103
|
+
|
|
104
|
+
if recursive:
|
|
105
|
+
for root, dirs, filenames in os.walk(path):
|
|
106
|
+
# Calculate depth
|
|
107
|
+
depth = root[len(path):].count(os.sep)
|
|
108
|
+
if depth >= max_depth:
|
|
109
|
+
dirs.clear() # Don't recurse deeper
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
# Filter hidden directories
|
|
113
|
+
if not include_hidden:
|
|
114
|
+
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
|
115
|
+
|
|
116
|
+
# Process directories
|
|
117
|
+
for dirname in dirs:
|
|
118
|
+
dir_path = os.path.join(root, dirname)
|
|
119
|
+
directories.append({
|
|
120
|
+
"name": dirname,
|
|
121
|
+
"path": dir_path,
|
|
122
|
+
"relative_path": os.path.relpath(dir_path, path),
|
|
123
|
+
"depth": depth + 1,
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
# Process files
|
|
127
|
+
for filename in filenames:
|
|
128
|
+
# Filter hidden files
|
|
129
|
+
if not include_hidden and filename.startswith('.'):
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
# Filter by pattern
|
|
133
|
+
if file_pattern:
|
|
134
|
+
if not Path(filename).match(file_pattern):
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
file_path = os.path.join(root, filename)
|
|
138
|
+
file_stat = os.stat(file_path)
|
|
139
|
+
file_size = file_stat.st_size
|
|
140
|
+
total_size += file_size
|
|
141
|
+
|
|
142
|
+
files.append({
|
|
143
|
+
"name": filename,
|
|
144
|
+
"path": file_path,
|
|
145
|
+
"relative_path": os.path.relpath(file_path, path),
|
|
146
|
+
"size": file_size,
|
|
147
|
+
"extension": Path(filename).suffix,
|
|
148
|
+
"modified_time": file_stat.st_mtime,
|
|
149
|
+
"depth": depth,
|
|
150
|
+
})
|
|
151
|
+
else:
|
|
152
|
+
# Non-recursive scan
|
|
153
|
+
for item in os.listdir(path):
|
|
154
|
+
# Filter hidden items
|
|
155
|
+
if not include_hidden and item.startswith('.'):
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
item_path = os.path.join(path, item)
|
|
159
|
+
|
|
160
|
+
if os.path.isdir(item_path):
|
|
161
|
+
directories.append({
|
|
162
|
+
"name": item,
|
|
163
|
+
"path": item_path,
|
|
164
|
+
"relative_path": item,
|
|
165
|
+
"depth": 0,
|
|
166
|
+
})
|
|
167
|
+
else:
|
|
168
|
+
# Filter by pattern
|
|
169
|
+
if file_pattern and not Path(item).match(file_pattern):
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
file_stat = os.stat(item_path)
|
|
173
|
+
file_size = file_stat.st_size
|
|
174
|
+
total_size += file_size
|
|
175
|
+
|
|
176
|
+
files.append({
|
|
177
|
+
"name": item,
|
|
178
|
+
"path": item_path,
|
|
179
|
+
"relative_path": item,
|
|
180
|
+
"size": file_size,
|
|
181
|
+
"extension": Path(item).suffix,
|
|
182
|
+
"modified_time": file_stat.st_mtime,
|
|
183
|
+
"depth": 0,
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
# Calculate statistics
|
|
187
|
+
file_types = {}
|
|
188
|
+
for file in files:
|
|
189
|
+
ext = file["extension"] or "no_extension"
|
|
190
|
+
if ext not in file_types:
|
|
191
|
+
file_types[ext] = {"count": 0, "total_size": 0}
|
|
192
|
+
file_types[ext]["count"] += 1
|
|
193
|
+
file_types[ext]["total_size"] += file["size"]
|
|
194
|
+
|
|
195
|
+
result.update({
|
|
196
|
+
"files": files,
|
|
197
|
+
"directories": directories,
|
|
198
|
+
"file_count": len(files),
|
|
199
|
+
"directory_count": len(directories),
|
|
200
|
+
"total_size": total_size,
|
|
201
|
+
"total_size_mb": round(total_size / (1024 * 1024), 2),
|
|
202
|
+
"file_types": file_types,
|
|
203
|
+
"success": True,
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
except FileNotFoundError as e:
|
|
207
|
+
result["error"] = str(e)
|
|
208
|
+
except PermissionError:
|
|
209
|
+
result["error"] = f"Permission denied: {path}"
|
|
210
|
+
except Exception as e:
|
|
211
|
+
result["error"] = str(e)
|
|
212
|
+
|
|
213
|
+
logger.info(f"Directory scan completed: success={result['success']}")
|
|
214
|
+
return result
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""File compressor tool for compressing and decompressing files."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import zipfile
|
|
7
|
+
import tarfile
|
|
8
|
+
import gzip
|
|
9
|
+
import shutil
|
|
10
|
+
|
|
11
|
+
from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FileCompressorTool(Tool):
|
|
17
|
+
"""Compress and decompress files using various formats (ZIP, TAR, GZIP)."""
|
|
18
|
+
|
|
19
|
+
def __init__(self) -> None:
|
|
20
|
+
"""Initialize file compressor tool."""
|
|
21
|
+
metadata = ToolMetadata(
|
|
22
|
+
name="file_compressor",
|
|
23
|
+
description="Compress and decompress files and directories using ZIP, TAR, or GZIP",
|
|
24
|
+
category=ToolCategory.FILE,
|
|
25
|
+
tags=["compression", "zip", "tar", "gzip", "archive"],
|
|
26
|
+
version="1.0.0",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
parameters = [
|
|
30
|
+
ToolParameter(
|
|
31
|
+
name="operation",
|
|
32
|
+
type="string",
|
|
33
|
+
description="Operation to perform",
|
|
34
|
+
required=True,
|
|
35
|
+
enum=["compress", "decompress", "list"],
|
|
36
|
+
),
|
|
37
|
+
ToolParameter(
|
|
38
|
+
name="source_path",
|
|
39
|
+
type="string",
|
|
40
|
+
description="Source file or directory path",
|
|
41
|
+
required=True,
|
|
42
|
+
),
|
|
43
|
+
ToolParameter(
|
|
44
|
+
name="output_path",
|
|
45
|
+
type="string",
|
|
46
|
+
description="Output archive path (for compress) or extraction directory (for decompress)",
|
|
47
|
+
required=False,
|
|
48
|
+
),
|
|
49
|
+
ToolParameter(
|
|
50
|
+
name="format",
|
|
51
|
+
type="string",
|
|
52
|
+
description="Compression format",
|
|
53
|
+
required=False,
|
|
54
|
+
default="zip",
|
|
55
|
+
enum=["zip", "tar", "tar.gz", "gzip"],
|
|
56
|
+
),
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
super().__init__(metadata, parameters)
|
|
60
|
+
|
|
61
|
+
async def _execute(
|
|
62
|
+
self,
|
|
63
|
+
operation: str,
|
|
64
|
+
source_path: str,
|
|
65
|
+
output_path: Optional[str] = None,
|
|
66
|
+
format: str = "zip",
|
|
67
|
+
) -> Dict[str, Any]:
|
|
68
|
+
"""Execute file compression/decompression.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
operation: Operation to perform
|
|
72
|
+
source_path: Source path
|
|
73
|
+
output_path: Output path
|
|
74
|
+
format: Compression format
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Dictionary containing operation results
|
|
78
|
+
"""
|
|
79
|
+
result: Dict[str, Any] = {
|
|
80
|
+
"operation": operation,
|
|
81
|
+
"format": format,
|
|
82
|
+
"success": False,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
if operation == "compress":
|
|
87
|
+
if not output_path:
|
|
88
|
+
raise ValueError("output_path required for compress operation")
|
|
89
|
+
|
|
90
|
+
if format == "zip":
|
|
91
|
+
result.update(self._compress_zip(source_path, output_path))
|
|
92
|
+
elif format in ["tar", "tar.gz"]:
|
|
93
|
+
result.update(self._compress_tar(source_path, output_path, format))
|
|
94
|
+
elif format == "gzip":
|
|
95
|
+
result.update(self._compress_gzip(source_path, output_path))
|
|
96
|
+
|
|
97
|
+
elif operation == "decompress":
|
|
98
|
+
if not output_path:
|
|
99
|
+
raise ValueError("output_path required for decompress operation")
|
|
100
|
+
|
|
101
|
+
if format == "zip":
|
|
102
|
+
result.update(self._decompress_zip(source_path, output_path))
|
|
103
|
+
elif format in ["tar", "tar.gz"]:
|
|
104
|
+
result.update(self._decompress_tar(source_path, output_path))
|
|
105
|
+
elif format == "gzip":
|
|
106
|
+
result.update(self._decompress_gzip(source_path, output_path))
|
|
107
|
+
|
|
108
|
+
elif operation == "list":
|
|
109
|
+
if format == "zip":
|
|
110
|
+
result.update(self._list_zip(source_path))
|
|
111
|
+
elif format in ["tar", "tar.gz"]:
|
|
112
|
+
result.update(self._list_tar(source_path))
|
|
113
|
+
|
|
114
|
+
result["success"] = True
|
|
115
|
+
|
|
116
|
+
except FileNotFoundError:
|
|
117
|
+
result["error"] = f"File not found: {source_path}"
|
|
118
|
+
except Exception as e:
|
|
119
|
+
result["error"] = str(e)
|
|
120
|
+
|
|
121
|
+
logger.info(f"File {operation} ({format}) completed: success={result['success']}")
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
def _compress_zip(self, source_path: str, output_path: str) -> Dict[str, Any]:
|
|
125
|
+
"""Compress to ZIP format."""
|
|
126
|
+
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
127
|
+
if os.path.isfile(source_path):
|
|
128
|
+
zipf.write(source_path, os.path.basename(source_path))
|
|
129
|
+
file_count = 1
|
|
130
|
+
else:
|
|
131
|
+
file_count = 0
|
|
132
|
+
for root, dirs, files in os.walk(source_path):
|
|
133
|
+
for file in files:
|
|
134
|
+
file_path = os.path.join(root, file)
|
|
135
|
+
arcname = os.path.relpath(file_path, source_path)
|
|
136
|
+
zipf.write(file_path, arcname)
|
|
137
|
+
file_count += 1
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
"output_path": output_path,
|
|
141
|
+
"file_count": file_count,
|
|
142
|
+
"compressed_size": os.path.getsize(output_path),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
def _compress_tar(self, source_path: str, output_path: str, format: str) -> Dict[str, Any]:
|
|
146
|
+
"""Compress to TAR format."""
|
|
147
|
+
mode = "w:gz" if format == "tar.gz" else "w"
|
|
148
|
+
|
|
149
|
+
with tarfile.open(output_path, mode) as tar:
|
|
150
|
+
tar.add(source_path, arcname=os.path.basename(source_path))
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
"output_path": output_path,
|
|
154
|
+
"compressed_size": os.path.getsize(output_path),
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
def _compress_gzip(self, source_path: str, output_path: str) -> Dict[str, Any]:
|
|
158
|
+
"""Compress to GZIP format."""
|
|
159
|
+
with open(source_path, "rb") as f_in:
|
|
160
|
+
with gzip.open(output_path, "wb") as f_out:
|
|
161
|
+
shutil.copyfileobj(f_in, f_out)
|
|
162
|
+
|
|
163
|
+
return {
|
|
164
|
+
"output_path": output_path,
|
|
165
|
+
"original_size": os.path.getsize(source_path),
|
|
166
|
+
"compressed_size": os.path.getsize(output_path),
|
|
167
|
+
"compression_ratio": round(
|
|
168
|
+
os.path.getsize(output_path) / os.path.getsize(source_path), 2
|
|
169
|
+
),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
def _decompress_zip(self, source_path: str, output_path: str) -> Dict[str, Any]:
|
|
173
|
+
"""Decompress ZIP format."""
|
|
174
|
+
with zipfile.ZipFile(source_path, "r") as zipf:
|
|
175
|
+
zipf.extractall(output_path)
|
|
176
|
+
file_count = len(zipf.namelist())
|
|
177
|
+
|
|
178
|
+
return {
|
|
179
|
+
"output_path": output_path,
|
|
180
|
+
"file_count": file_count,
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
def _decompress_tar(self, source_path: str, output_path: str) -> Dict[str, Any]:
|
|
184
|
+
"""Decompress TAR format."""
|
|
185
|
+
with tarfile.open(source_path, "r:*") as tar:
|
|
186
|
+
tar.extractall(output_path)
|
|
187
|
+
file_count = len(tar.getmembers())
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"output_path": output_path,
|
|
191
|
+
"file_count": file_count,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
def _decompress_gzip(self, source_path: str, output_path: str) -> Dict[str, Any]:
|
|
195
|
+
"""Decompress GZIP format."""
|
|
196
|
+
with gzip.open(source_path, "rb") as f_in:
|
|
197
|
+
with open(output_path, "wb") as f_out:
|
|
198
|
+
shutil.copyfileobj(f_in, f_out)
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
"output_path": output_path,
|
|
202
|
+
"decompressed_size": os.path.getsize(output_path),
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
def _list_zip(self, source_path: str) -> Dict[str, Any]:
|
|
206
|
+
"""List contents of ZIP archive."""
|
|
207
|
+
with zipfile.ZipFile(source_path, "r") as zipf:
|
|
208
|
+
files = [
|
|
209
|
+
{
|
|
210
|
+
"name": info.filename,
|
|
211
|
+
"size": info.file_size,
|
|
212
|
+
"compressed_size": info.compress_size,
|
|
213
|
+
}
|
|
214
|
+
for info in zipf.filelist
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
"file_count": len(files),
|
|
219
|
+
"files": files,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
def _list_tar(self, source_path: str) -> Dict[str, Any]:
|
|
223
|
+
"""List contents of TAR archive."""
|
|
224
|
+
with tarfile.open(source_path, "r:*") as tar:
|
|
225
|
+
files = [
|
|
226
|
+
{
|
|
227
|
+
"name": member.name,
|
|
228
|
+
"size": member.size,
|
|
229
|
+
"type": "directory" if member.isdir() else "file",
|
|
230
|
+
}
|
|
231
|
+
for member in tar.getmembers()
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
return {
|
|
235
|
+
"file_count": len(files),
|
|
236
|
+
"files": files,
|
|
237
|
+
}
|