hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""CSV Database Adapter - Async CSV file reading with schema inference.
|
|
2
|
+
|
|
3
|
+
This module provides a DatabasePort implementation for reading CSV files from a directory,
|
|
4
|
+
with automatic type inference and async I/O for non-blocking operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import csv
|
|
8
|
+
import logging
|
|
9
|
+
import re
|
|
10
|
+
from collections.abc import AsyncIterator
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import aiofiles
|
|
15
|
+
|
|
16
|
+
from hexdag.core.ports.database import ColumnSchema, ColumnType, DatabasePort, TableSchema
|
|
17
|
+
|
|
18
|
+
logging.basicConfig(level=logging.INFO)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CsvAdapter(DatabasePort):
|
|
22
|
+
"""
|
|
23
|
+
Adapter class for reading CSV files from a specified directory as database tables.
|
|
24
|
+
|
|
25
|
+
Provides schema inference for CSV files and querying capabilities, supporting
|
|
26
|
+
filters, column selection, and row limits.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, directory: str | Path) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Initialize the CSV adapter with a directory containing CSV files.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
directory (str | Path): Path to the directory holding CSV files.
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
ValueError: If the directory does not exist.
|
|
38
|
+
"""
|
|
39
|
+
self.__directory = Path(directory)
|
|
40
|
+
if not self.__directory.exists():
|
|
41
|
+
raise ValueError(f"Directory not found: {directory}")
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def directory(self) -> Path:
|
|
45
|
+
"""Return the base directory as a pathlib.Path object."""
|
|
46
|
+
return self.__directory
|
|
47
|
+
|
|
48
|
+
def _infer_type(self, values: list[str]) -> str:
|
|
49
|
+
"""
|
|
50
|
+
Infer column data type from a sample list of string values.
|
|
51
|
+
|
|
52
|
+
Checks for integers, floats, booleans ('true'/'false'), else defaults to 'text'.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
values (list[str]): List of sample values from a CSV column.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
str: Inferred data type ('int', 'float', 'text').
|
|
59
|
+
"""
|
|
60
|
+
for v in values:
|
|
61
|
+
if v == "":
|
|
62
|
+
continue
|
|
63
|
+
try:
|
|
64
|
+
int(v)
|
|
65
|
+
continue
|
|
66
|
+
except ValueError:
|
|
67
|
+
pass
|
|
68
|
+
try:
|
|
69
|
+
float(v)
|
|
70
|
+
continue
|
|
71
|
+
except ValueError:
|
|
72
|
+
pass
|
|
73
|
+
if v.lower() in ("true", "false"):
|
|
74
|
+
continue
|
|
75
|
+
return "text"
|
|
76
|
+
if all(v.isdigit() or v == "" for v in values):
|
|
77
|
+
return "int"
|
|
78
|
+
return "float"
|
|
79
|
+
|
|
80
|
+
async def aget_table_schemas(self) -> dict[str, dict[str, Any]]:
|
|
81
|
+
"""
|
|
82
|
+
Get schema information for all CSV files in the adapter's directory.
|
|
83
|
+
|
|
84
|
+
Reads each CSV file to infer column types and builds corresponding
|
|
85
|
+
schema dictionaries.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dictionary mapping table names to schema information.
|
|
89
|
+
"""
|
|
90
|
+
schemas = {}
|
|
91
|
+
for file_path in self.directory.glob("*.csv"):
|
|
92
|
+
async with aiofiles.open(file_path) as f:
|
|
93
|
+
content = await f.read()
|
|
94
|
+
# Process CSV content in memory
|
|
95
|
+
reader = csv.DictReader(content.splitlines())
|
|
96
|
+
if not reader.fieldnames:
|
|
97
|
+
logging.warning(f"No headers found in CSV file {file_path}, skipping.")
|
|
98
|
+
continue
|
|
99
|
+
data = list(reader)
|
|
100
|
+
|
|
101
|
+
# Build columns dict
|
|
102
|
+
columns = {}
|
|
103
|
+
primary_keys: list[str] = []
|
|
104
|
+
for name in reader.fieldnames:
|
|
105
|
+
col_values = [row.get(name, "") for row in data]
|
|
106
|
+
col_type = self._infer_type(col_values)
|
|
107
|
+
columns[name] = col_type
|
|
108
|
+
|
|
109
|
+
schemas[file_path.stem] = {
|
|
110
|
+
"table_name": file_path.stem,
|
|
111
|
+
"columns": columns,
|
|
112
|
+
"primary_keys": primary_keys,
|
|
113
|
+
"foreign_keys": [],
|
|
114
|
+
}
|
|
115
|
+
return schemas
|
|
116
|
+
|
|
117
|
+
async def aexecute_query(
|
|
118
|
+
self, query: str, params: dict[str, Any] | None = None
|
|
119
|
+
) -> list[dict[str, Any]]:
|
|
120
|
+
"""Execute a query on CSV files.
|
|
121
|
+
|
|
122
|
+
Note: CSV adapter doesn't support SQL queries. This method is required
|
|
123
|
+
by DatabasePort but will raise NotImplementedError.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
query: SQL query string (not supported)
|
|
127
|
+
params: Optional query parameters (not supported)
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
NotImplementedError: CSV adapter doesn't support SQL queries
|
|
131
|
+
"""
|
|
132
|
+
raise NotImplementedError(
|
|
133
|
+
"CSV adapter doesn't support SQL queries. Use query() method instead."
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
async def get_table_schemas(self) -> list[TableSchema]:
|
|
137
|
+
"""
|
|
138
|
+
Generate table schemas for all CSV files in the adapter's directory.
|
|
139
|
+
|
|
140
|
+
Reads each CSV file to infer column types and builds corresponding
|
|
141
|
+
TableSchema and ColumnSchema objects.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Sequence[TableSchema]: List of inferred table schemas, one per CSV file.
|
|
145
|
+
"""
|
|
146
|
+
schemas = []
|
|
147
|
+
for file_path in self.directory.glob("*.csv"):
|
|
148
|
+
async with aiofiles.open(file_path) as f:
|
|
149
|
+
content = await f.read()
|
|
150
|
+
# Process CSV content in memory
|
|
151
|
+
reader = csv.DictReader(content.splitlines())
|
|
152
|
+
if not reader.fieldnames:
|
|
153
|
+
logging.warning(f"No headers found in CSV file {file_path}, skipping.")
|
|
154
|
+
continue
|
|
155
|
+
data = list(reader)
|
|
156
|
+
columns = []
|
|
157
|
+
for name in reader.fieldnames:
|
|
158
|
+
col_values = [row.get(name, "") for row in data]
|
|
159
|
+
col_type = self._infer_type(col_values)
|
|
160
|
+
columns.append(
|
|
161
|
+
ColumnSchema(
|
|
162
|
+
name=name,
|
|
163
|
+
type=ColumnType[col_type.upper()],
|
|
164
|
+
nullable=True,
|
|
165
|
+
primary_key=False,
|
|
166
|
+
)
|
|
167
|
+
)
|
|
168
|
+
schemas.append(
|
|
169
|
+
TableSchema(
|
|
170
|
+
name=file_path.stem,
|
|
171
|
+
columns=columns,
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
return schemas
|
|
175
|
+
|
|
176
|
+
def _get_safe_file_path(self, table: str) -> Path:
|
|
177
|
+
"""
|
|
178
|
+
Safely resolve the file path for a given table name within the base directory.
|
|
179
|
+
|
|
180
|
+
Ensures the resolved path does not escape the base directory to prevent
|
|
181
|
+
path traversal attacks.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
table (str): Table name (CSV file name without extension).
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
ValueError: If the resolved path is outside the base directory or file doesn't exist.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Path: Resolved safe file path for the CSV.
|
|
191
|
+
"""
|
|
192
|
+
file_path = self.directory / f"{table}.csv"
|
|
193
|
+
resolved_path = file_path.resolve()
|
|
194
|
+
base_dir_resolved = self.directory.resolve()
|
|
195
|
+
|
|
196
|
+
if not str(resolved_path).startswith(str(base_dir_resolved)):
|
|
197
|
+
raise ValueError(f"Attempted access outside base directory: {table}")
|
|
198
|
+
|
|
199
|
+
if not resolved_path.exists():
|
|
200
|
+
raise ValueError(f"Table (CSV file) not found: {table}")
|
|
201
|
+
|
|
202
|
+
return resolved_path
|
|
203
|
+
|
|
204
|
+
async def query(
|
|
205
|
+
self,
|
|
206
|
+
table: str,
|
|
207
|
+
filters: dict[str, Any] | None = None,
|
|
208
|
+
columns: list[str] | None = None,
|
|
209
|
+
limit: int | None = None,
|
|
210
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
211
|
+
"""
|
|
212
|
+
Query rows from a CSV file with optional filtering, column selection, and row limit.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
table (str): Name of the table (CSV file without '.csv').
|
|
216
|
+
filters (dict[str, Any] | None): Optional column-value filters to apply.
|
|
217
|
+
columns (list[str] | None): Optional list of columns to include in results.
|
|
218
|
+
limit (int | None): Optional maximum number of rows to yield.
|
|
219
|
+
|
|
220
|
+
Yields:
|
|
221
|
+
dict[str, Any]: Rows matching filters with requested columns.
|
|
222
|
+
|
|
223
|
+
Raises:
|
|
224
|
+
ValueError: If the table file does not exist or path is unsafe.
|
|
225
|
+
"""
|
|
226
|
+
file_path = self._get_safe_file_path(table)
|
|
227
|
+
|
|
228
|
+
count = 0
|
|
229
|
+
async with aiofiles.open(file_path) as f:
|
|
230
|
+
content = await f.read()
|
|
231
|
+
reader = csv.DictReader(content.splitlines())
|
|
232
|
+
|
|
233
|
+
if not reader.fieldnames:
|
|
234
|
+
return
|
|
235
|
+
|
|
236
|
+
field_names = columns or reader.fieldnames
|
|
237
|
+
|
|
238
|
+
for row in reader:
|
|
239
|
+
if filters and any(
|
|
240
|
+
(isinstance(v, re.Pattern) and not v.search(str(row.get(k, ""))))
|
|
241
|
+
or (not isinstance(v, re.Pattern) and str(row.get(k, "")) != str(v))
|
|
242
|
+
for k, v in filters.items()
|
|
243
|
+
):
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
yield {k: row[k] for k in field_names if k in row}
|
|
247
|
+
count += 1
|
|
248
|
+
if limit and count >= limit:
|
|
249
|
+
break
|