qalita 2.9.1__py3-none-any.whl → 2.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. qalita/_frontend/.next/BUILD_ID +1 -1
  2. qalita/_frontend/.next/build-manifest.json +7 -7
  3. qalita/_frontend/.next/prerender-manifest.json +3 -3
  4. qalita/_frontend/.next/required-server-files.json +196 -40
  5. qalita/_frontend/.next/server/app/_global-error/page/build-manifest.json +5 -5
  6. qalita/_frontend/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
  7. qalita/_frontend/.next/server/app/_global-error.html +2 -2
  8. qalita/_frontend/.next/server/app/_global-error.rsc +7 -7
  9. qalita/_frontend/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +2 -2
  10. qalita/_frontend/.next/server/app/_global-error.segments/_full.segment.rsc +7 -7
  11. qalita/_frontend/.next/server/app/_global-error.segments/_head.segment.rsc +3 -3
  12. qalita/_frontend/.next/server/app/_global-error.segments/_index.segment.rsc +3 -3
  13. qalita/_frontend/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  14. qalita/_frontend/.next/server/app/_not-found/page/build-manifest.json +5 -5
  15. qalita/_frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  16. qalita/_frontend/.next/server/app/_not-found.html +1 -1
  17. qalita/_frontend/.next/server/app/_not-found.rsc +9 -9
  18. qalita/_frontend/.next/server/app/_not-found.segments/_full.segment.rsc +9 -9
  19. qalita/_frontend/.next/server/app/_not-found.segments/_head.segment.rsc +3 -3
  20. qalita/_frontend/.next/server/app/_not-found.segments/_index.segment.rsc +5 -5
  21. qalita/_frontend/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +2 -2
  22. qalita/_frontend/.next/server/app/_not-found.segments/_not-found.segment.rsc +3 -3
  23. qalita/_frontend/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
  24. qalita/_frontend/.next/server/app/page/build-manifest.json +5 -5
  25. qalita/_frontend/.next/server/app/page_client-reference-manifest.js +1 -1
  26. qalita/_frontend/.next/server/app/sources/add/page/build-manifest.json +5 -5
  27. qalita/_frontend/.next/server/app/sources/add/page_client-reference-manifest.js +1 -1
  28. qalita/_frontend/.next/server/app/sources/add.html +1 -1
  29. qalita/_frontend/.next/server/app/sources/add.rsc +11 -11
  30. qalita/_frontend/.next/server/app/sources/add.segments/_full.segment.rsc +11 -11
  31. qalita/_frontend/.next/server/app/sources/add.segments/_head.segment.rsc +3 -3
  32. qalita/_frontend/.next/server/app/sources/add.segments/_index.segment.rsc +5 -5
  33. qalita/_frontend/.next/server/app/sources/add.segments/_tree.segment.rsc +2 -2
  34. qalita/_frontend/.next/server/app/sources/add.segments/sources/add/__PAGE__.segment.rsc +4 -4
  35. qalita/_frontend/.next/server/app/sources/add.segments/sources/add.segment.rsc +3 -3
  36. qalita/_frontend/.next/server/app/sources/add.segments/sources.segment.rsc +3 -3
  37. qalita/_frontend/.next/server/app/sources/edit/[id]/page/build-manifest.json +5 -5
  38. qalita/_frontend/.next/server/app/sources/edit/[id]/page_client-reference-manifest.js +1 -1
  39. qalita/_frontend/.next/server/app/sources/page/build-manifest.json +5 -5
  40. qalita/_frontend/.next/server/app/sources/page_client-reference-manifest.js +1 -1
  41. qalita/_frontend/.next/server/app/sources.html +1 -1
  42. qalita/_frontend/.next/server/app/sources.rsc +11 -11
  43. qalita/_frontend/.next/server/app/sources.segments/_full.segment.rsc +11 -11
  44. qalita/_frontend/.next/server/app/sources.segments/_head.segment.rsc +3 -3
  45. qalita/_frontend/.next/server/app/sources.segments/_index.segment.rsc +5 -5
  46. qalita/_frontend/.next/server/app/sources.segments/_tree.segment.rsc +2 -2
  47. qalita/_frontend/.next/server/app/sources.segments/sources/__PAGE__.segment.rsc +4 -4
  48. qalita/_frontend/.next/server/app/sources.segments/sources.segment.rsc +3 -3
  49. qalita/_frontend/.next/server/chunks/[root-of-the-server]__bf0c3d33._.js +3 -3
  50. qalita/_frontend/.next/server/chunks/[root-of-the-server]__f408c708._.js +2 -2
  51. qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__be91267c._.js +1 -1
  52. qalita/_frontend/.next/server/chunks/ssr/_404f6e81._.js +2 -2
  53. qalita/_frontend/.next/server/chunks/ssr/_6a67f6f0._.js +2 -2
  54. qalita/_frontend/.next/server/chunks/ssr/_cb7b44d6._.js +1 -1
  55. qalita/_frontend/.next/server/chunks/ssr/_d44c43ed._.js +1 -1
  56. qalita/_frontend/.next/server/chunks/ssr/components_DashboardContent_tsx_c3635665._.js +1 -1
  57. qalita/_frontend/.next/server/chunks/ssr/node_modules_next_dist_4b9a0874._.js +1 -1
  58. qalita/_frontend/.next/server/middleware-build-manifest.js +5 -5
  59. qalita/_frontend/.next/server/pages/404.html +1 -1
  60. qalita/_frontend/.next/server/pages/500.html +2 -2
  61. qalita/_frontend/.next/server/server-reference-manifest.js +1 -1
  62. qalita/_frontend/.next/server/server-reference-manifest.json +1 -1
  63. qalita/_frontend/.next/static/chunks/0c7542414b6a6f86.js +2 -0
  64. qalita/_frontend/.next/static/chunks/{89ba62a8ba9b79ce.js → 12daa96885968840.js} +1 -1
  65. qalita/_frontend/.next/static/chunks/1e6a98e93c470083.css +1 -0
  66. qalita/_frontend/.next/static/chunks/499b7099996cc9f9.js +1 -0
  67. qalita/_frontend/.next/static/chunks/694836347d1e5ef3.js +1 -0
  68. qalita/_frontend/.next/static/chunks/7ea91ca84dc4b3a4.js +1 -0
  69. qalita/_frontend/.next/static/chunks/89c689b5748e28ed.js +1 -0
  70. qalita/_frontend/.next/static/chunks/9e71bf77f23416e6.js +1 -0
  71. qalita/_frontend/.next/static/chunks/aa2a44cc19d89bdb.js +1 -0
  72. qalita/_frontend/.next/static/chunks/ba22289f779d638e.js +1 -0
  73. qalita/_frontend/.next/static/chunks/bb05964d928aa166.js +3 -0
  74. qalita/_frontend/.next/static/chunks/dde1c328f398837e.js +1 -0
  75. qalita/_frontend/.next/static/chunks/ecbb64dc112ad516.js +1 -0
  76. qalita/_frontend/.next/static/chunks/facd124df217e016.js +1 -0
  77. qalita/_frontend/.next/static/chunks/turbopack-9fc8bcb3a9806c66.js +4 -0
  78. qalita/_frontend/node_modules/@next/env/package.json +1 -1
  79. qalita/_frontend/node_modules/next/dist/build/index.js +10 -4
  80. qalita/_frontend/node_modules/next/dist/build/swc/index.js +1 -1
  81. qalita/_frontend/node_modules/next/dist/build/webpack-config.js +3 -3
  82. qalita/_frontend/node_modules/next/dist/client/components/segment-cache/lru.js +2 -0
  83. qalita/_frontend/node_modules/next/dist/compiled/next-server/app-page-turbo-experimental.runtime.prod.js +1 -1
  84. qalita/_frontend/node_modules/next/dist/compiled/next-server/app-page-turbo.runtime.prod.js +1 -1
  85. qalita/_frontend/node_modules/next/dist/server/config-shared.js +4 -0
  86. qalita/_frontend/node_modules/next/dist/server/dev/hot-reloader-turbopack.js +1 -1
  87. qalita/_frontend/node_modules/next/dist/server/dev/hot-reloader-webpack.js +1 -1
  88. qalita/_frontend/node_modules/next/dist/server/lib/app-info-log.js +1 -1
  89. qalita/_frontend/node_modules/next/dist/server/lib/start-server.js +1 -1
  90. qalita/_frontend/node_modules/next/dist/server/web/adapter.js +1 -1
  91. qalita/_frontend/node_modules/next/dist/shared/lib/errors/canary-only-config-error.js +1 -1
  92. qalita/_frontend/node_modules/next/dist/telemetry/anonymous-meta.js +1 -1
  93. qalita/_frontend/node_modules/next/dist/telemetry/events/version.js +2 -2
  94. qalita/_frontend/node_modules/next/package.json +15 -15
  95. qalita/_frontend/package.json +4 -4
  96. qalita/_frontend/server.js +1 -1
  97. qalita/commands/source.py +166 -2
  98. qalita/commands/worker.py +3 -3
  99. qalita/commands/worker_grpc.py +113 -3
  100. qalita/grpc/client.py +260 -34
  101. qalita/grpc/protos/qalita.proto +26 -0
  102. qalita/grpc/protos/qalita_pb2.py +80 -76
  103. qalita/grpc/protos/qalita_pb2_grpc.py +1 -1
  104. qalita/internal/action_executor.py +1009 -0
  105. qalita/internal/utils.py +1 -1
  106. {qalita-2.9.1.dist-info → qalita-2.10.0.dist-info}/METADATA +4 -3
  107. {qalita-2.9.1.dist-info → qalita-2.10.0.dist-info}/RECORD +113 -111
  108. qalita/_frontend/.next/static/chunks/02a64570f0a14789.js +0 -1
  109. qalita/_frontend/.next/static/chunks/0b082245f106d665.js +0 -1
  110. qalita/_frontend/.next/static/chunks/27b3ba70c7ef50a8.js +0 -1
  111. qalita/_frontend/.next/static/chunks/517e9b74d1a3c0ce.js +0 -1
  112. qalita/_frontend/.next/static/chunks/58689c96b0676c41.js +0 -1
  113. qalita/_frontend/.next/static/chunks/6c99da4248e4fcfc.js +0 -1
  114. qalita/_frontend/.next/static/chunks/acc5da18ff20daa1.js +0 -3
  115. qalita/_frontend/.next/static/chunks/bdc8a8e7721f5675.js +0 -2
  116. qalita/_frontend/.next/static/chunks/e0df86cbf44bbf9f.js +0 -1
  117. qalita/_frontend/.next/static/chunks/e4c3a252774ab7fd.css +0 -1
  118. qalita/_frontend/.next/static/chunks/e6ce59ba40b863f2.js +0 -1
  119. qalita/_frontend/.next/static/chunks/ec4b1f1e3cd3ae43.js +0 -1
  120. qalita/_frontend/.next/static/chunks/turbopack-d21156d03715fafa.js +0 -4
  121. /qalita/_frontend/.next/static/{M1H4Lcjc6A78n9p1qVA6d → NJRrkC0Gn13ofbqb0Lb0C}/_buildManifest.js +0 -0
  122. /qalita/_frontend/.next/static/{M1H4Lcjc6A78n9p1qVA6d → NJRrkC0Gn13ofbqb0Lb0C}/_clientMiddlewareManifest.json +0 -0
  123. /qalita/_frontend/.next/static/{M1H4Lcjc6A78n9p1qVA6d → NJRrkC0Gn13ofbqb0Lb0C}/_ssgManifest.js +0 -0
  124. {qalita-2.9.1.dist-info → qalita-2.10.0.dist-info}/WHEEL +0 -0
  125. {qalita-2.9.1.dist-info → qalita-2.10.0.dist-info}/entry_points.txt +0 -0
  126. {qalita-2.9.1.dist-info → qalita-2.10.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1009 @@
1
+ """
2
+ # QALITA (c) COPYRIGHT 2025 - ALL RIGHTS RESERVED -
3
+ Action Executor module for Studio Agent integration.
4
+
5
+ This module provides an abstraction layer that translates LLM agent commands
6
+ into concrete operations on data sources (SQL queries, file manipulations, etc.).
7
+ """
8
+
9
+ import json
10
+ import time
11
+ from dataclasses import dataclass, field
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ import pandas as pd
15
+
16
+ from qalita.internal.utils import logger
17
+ from qalita.internal.data_preview import (
18
+ DataPreviewResult,
19
+ _dataframe_to_preview,
20
+ _error_result,
21
+ DEFAULT_ROW_LIMIT,
22
+ )
23
+
24
+
25
+ @dataclass
26
+ class ActionResult:
27
+ """Result of an action execution."""
28
+
29
+ ok: bool
30
+ action_type: str
31
+ error: Optional[str] = None
32
+ result_json: Optional[str] = None # Structured result as JSON string
33
+ data: Optional[DataPreviewResult] = None # Tabular data if applicable
34
+ execution_time_ms: int = 0
35
+
36
+
37
+ # Supported action types
38
+ ACTION_TYPES = {
39
+ "query": "Execute a SQL query on any data source (database or file via pandasql)",
40
+ "read_data": "Read data from a file or database source",
41
+ "filter": "Filter data based on conditions",
42
+ "aggregate": "Perform aggregation on data",
43
+ "describe": "Get metadata about a source (schema, columns, row count)",
44
+ "sample": "Get a random sample of data",
45
+ "count": "Count rows in a source or query result",
46
+ "distinct": "Get distinct values from a column",
47
+ "head": "Get first N rows from a source",
48
+ "tail": "Get last N rows from a source",
49
+ }
50
+
51
+ # Check if pandasql is available for file SQL queries
52
+ _PANDASQL_AVAILABLE = False
53
+ try:
54
+ import pandasql
55
+ _PANDASQL_AVAILABLE = True
56
+ except ImportError:
57
+ pass
58
+
59
+
60
+ class ActionExecutor:
61
+ """
62
+ Executes actions requested by the LLM agent.
63
+
64
+ This class provides a unified interface for executing various data operations
65
+ on different source types (databases, files, etc.).
66
+ """
67
+
68
+ def __init__(self):
69
+ """Initialize the action executor."""
70
+ self._engines: Dict[int, Any] = {} # Cache for database engines
71
+
72
+ def execute(
73
+ self,
74
+ action_type: str,
75
+ source_config: dict,
76
+ params: dict,
77
+ timeout_seconds: Optional[int] = None,
78
+ ) -> ActionResult:
79
+ """
80
+ Execute an action on a data source.
81
+
82
+ Args:
83
+ action_type: Type of action to execute (query, read_data, etc.)
84
+ source_config: Source configuration dict with 'type' and 'config' keys
85
+ params: Action parameters (specific to each action type)
86
+ timeout_seconds: Optional timeout for the action
87
+
88
+ Returns:
89
+ ActionResult with the execution result
90
+ """
91
+ start_time = time.time()
92
+
93
+ if action_type not in ACTION_TYPES:
94
+ return ActionResult(
95
+ ok=False,
96
+ action_type=action_type,
97
+ error=f"Unknown action type: {action_type}. Supported: {list(ACTION_TYPES.keys())}",
98
+ )
99
+
100
+ handlers = {
101
+ "query": self._execute_query,
102
+ "read_data": self._read_data,
103
+ "filter": self._filter_data,
104
+ "aggregate": self._aggregate_data,
105
+ "describe": self._describe_source,
106
+ "sample": self._sample_data,
107
+ "count": self._count_rows,
108
+ "distinct": self._get_distinct,
109
+ "head": self._get_head,
110
+ "tail": self._get_tail,
111
+ }
112
+
113
+ handler = handlers.get(action_type)
114
+ if not handler:
115
+ return ActionResult(
116
+ ok=False,
117
+ action_type=action_type,
118
+ error=f"Handler not implemented for action: {action_type}",
119
+ )
120
+
121
+ try:
122
+ result = handler(source_config, params)
123
+ result.execution_time_ms = int((time.time() - start_time) * 1000)
124
+ return result
125
+ except Exception as e:
126
+ logger.error(f"Error executing action {action_type}: {e}")
127
+ return ActionResult(
128
+ ok=False,
129
+ action_type=action_type,
130
+ error=str(e),
131
+ execution_time_ms=int((time.time() - start_time) * 1000),
132
+ )
133
+
134
+ def _get_database_engine(self, source_config: dict) -> Any:
135
+ """Get or create a SQLAlchemy engine for a database source."""
136
+ from sqlalchemy import create_engine
137
+
138
+ config = source_config.get("config", {})
139
+ source_type = source_config.get("type", "").lower()
140
+
141
+ connection_string = config.get("connection_string")
142
+ if not connection_string:
143
+ db_type_map = {
144
+ "postgresql": "postgresql",
145
+ "mysql": "mysql",
146
+ "mssql": "mssql+pymssql",
147
+ "oracle": "oracle+oracledb",
148
+ "sqlite": "sqlite",
149
+ }
150
+
151
+ dialect = db_type_map.get(source_type)
152
+ if not dialect:
153
+ raise ValueError(f"Unsupported database type: {source_type}")
154
+
155
+ if source_type == "sqlite":
156
+ database_path = config.get("database", ":memory:")
157
+ connection_string = f"sqlite:///{database_path}"
158
+ elif source_type == "oracle":
159
+ connection_string = (
160
+ f"{dialect}://{config['username']}:{config['password']}"
161
+ f"@{config['host']}:{config['port']}/?service_name={config['database']}"
162
+ )
163
+ else:
164
+ connection_string = (
165
+ f"{dialect}://{config['username']}:{config['password']}"
166
+ f"@{config['host']}:{config['port']}/{config['database']}"
167
+ )
168
+
169
+ return create_engine(connection_string)
170
+
171
+ def _is_database_source(self, source_config: dict) -> bool:
172
+ """Check if the source is a database."""
173
+ source_type = source_config.get("type", "").lower()
174
+ return source_type in ("postgresql", "mysql", "mssql", "oracle", "sqlite")
175
+
176
+ def _is_file_source(self, source_config: dict) -> bool:
177
+ """Check if the source is a file."""
178
+ source_type = source_config.get("type", "").lower()
179
+ return source_type in ("file", "csv", "excel", "parquet", "json", "folder")
180
+
181
+ def _execute_query(self, source_config: dict, params: dict) -> ActionResult:
182
+ """Execute a SQL query on any data source (database or file via pandasql)."""
183
+ sql = params.get("sql")
184
+ if not sql:
185
+ return ActionResult(
186
+ ok=False,
187
+ action_type="query",
188
+ error="SQL query is required for 'query' action",
189
+ )
190
+
191
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
192
+
193
+ # Security: reject modification queries
194
+ sql_lower = sql.strip().lower()
195
+ if sql_lower.startswith(("insert", "update", "delete", "create", "drop", "alter", "truncate")):
196
+ return ActionResult(
197
+ ok=False,
198
+ action_type="query",
199
+ error="Modification queries (INSERT, UPDATE, DELETE, etc.) are not allowed",
200
+ )
201
+
202
+ # Add LIMIT if not present (for safety)
203
+ if "limit" not in sql_lower:
204
+ sql = f"{sql.rstrip(';')} LIMIT {limit}"
205
+
206
+ if self._is_database_source(source_config):
207
+ return self._execute_database_query(source_config, sql, limit)
208
+ elif self._is_file_source(source_config):
209
+ return self._execute_file_query(source_config, sql, limit)
210
+ else:
211
+ return ActionResult(
212
+ ok=False,
213
+ action_type="query",
214
+ error=f"Query action not supported for source type: {source_config.get('type')}",
215
+ )
216
+
217
+ def _execute_database_query(self, source_config: dict, sql: str, limit: int) -> ActionResult:
218
+ """Execute a SQL query on a database source."""
219
+ try:
220
+ engine = self._get_database_engine(source_config)
221
+ with engine.connect() as conn:
222
+ df = pd.read_sql(sql, conn)
223
+
224
+ preview = _dataframe_to_preview(df, limit)
225
+ return ActionResult(
226
+ ok=True,
227
+ action_type="query",
228
+ data=preview,
229
+ result_json=json.dumps({"rows_returned": len(df), "columns": list(df.columns)}),
230
+ )
231
+ except Exception as e:
232
+ return ActionResult(
233
+ ok=False,
234
+ action_type="query",
235
+ error=f"Query execution failed: {str(e)}",
236
+ )
237
+
238
+ def _execute_file_query(self, source_config: dict, sql: str, limit: int) -> ActionResult:
239
+ """Execute a SQL query on a file source using pandasql."""
240
+ import os
241
+
242
+ if not _PANDASQL_AVAILABLE:
243
+ return ActionResult(
244
+ ok=False,
245
+ action_type="query",
246
+ error="SQL queries on file sources require 'pandasql'. Install with: pip install pandasql",
247
+ )
248
+
249
+ config = source_config.get("config", {})
250
+ source_type = source_config.get("type", "").lower()
251
+ path = config.get("path")
252
+
253
+ if not path:
254
+ return ActionResult(
255
+ ok=False,
256
+ action_type="query",
257
+ error="File path not configured",
258
+ )
259
+
260
+ if not os.path.exists(path):
261
+ return ActionResult(
262
+ ok=False,
263
+ action_type="query",
264
+ error=f"File not found: {path}",
265
+ )
266
+
267
+ try:
268
+ # Load the file into a DataFrame
269
+ source_data = self._load_file_to_dataframe(path, source_type)
270
+
271
+ if source_data is None:
272
+ return ActionResult(
273
+ ok=False,
274
+ action_type="query",
275
+ error=f"Unsupported file type: {source_type}",
276
+ )
277
+
278
+ # Execute the SQL query using pandasql
279
+ # The user must use 'source_data' as the table name in their query
280
+ from pandasql import sqldf
281
+
282
+ # Create a local namespace for pandasql
283
+ local_env = {"source_data": source_data}
284
+ result_df = sqldf(sql, local_env)
285
+
286
+ # Apply limit if result is larger
287
+ if len(result_df) > limit:
288
+ result_df = result_df.head(limit)
289
+
290
+ preview = _dataframe_to_preview(result_df, limit)
291
+ return ActionResult(
292
+ ok=True,
293
+ action_type="query",
294
+ data=preview,
295
+ result_json=json.dumps({
296
+ "rows_returned": len(result_df),
297
+ "columns": list(result_df.columns),
298
+ "source_rows": len(source_data),
299
+ }),
300
+ )
301
+ except Exception as e:
302
+ error_msg = str(e)
303
+ # Provide helpful hint if table name is wrong
304
+ if "no such table" in error_msg.lower():
305
+ error_msg += ". Hint: Use 'source_data' as the table name, e.g., SELECT * FROM source_data"
306
+ return ActionResult(
307
+ ok=False,
308
+ action_type="query",
309
+ error=f"Query execution failed: {error_msg}",
310
+ )
311
+
312
+ def _load_file_to_dataframe(self, path: str, source_type: str) -> Optional[pd.DataFrame]:
313
+ """Load a file into a pandas DataFrame for SQL querying."""
314
+ try:
315
+ if source_type in ("csv", "file") or path.endswith(".csv"):
316
+ return pd.read_csv(path, low_memory=False)
317
+ elif source_type == "excel" or path.endswith((".xlsx", ".xls")):
318
+ return pd.read_excel(path, engine="openpyxl")
319
+ elif source_type == "parquet" or path.endswith(".parquet"):
320
+ return pd.read_parquet(path)
321
+ elif source_type == "json" or path.endswith(".json"):
322
+ return pd.read_json(path)
323
+ else:
324
+ return None
325
+ except Exception as e:
326
+ logger.error(f"Failed to load file {path}: {e}")
327
+ return None
328
+
329
+ def _read_data(self, source_config: dict, params: dict) -> ActionResult:
330
+ """Read data from a source."""
331
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
332
+ columns = params.get("columns") # Optional list of columns to select
333
+
334
+ if self._is_database_source(source_config):
335
+ config = source_config.get("config", {})
336
+ table = params.get("table") or config.get("table") or config.get("default_table")
337
+ if not table:
338
+ return ActionResult(
339
+ ok=False,
340
+ action_type="read_data",
341
+ error="Table name is required for database sources",
342
+ )
343
+
344
+ schema = config.get("schema")
345
+ qualified_table = f"{schema}.{table}" if schema else table
346
+
347
+ cols = ", ".join(columns) if columns else "*"
348
+ sql = f"SELECT {cols} FROM {qualified_table} LIMIT {limit}"
349
+
350
+ try:
351
+ engine = self._get_database_engine(source_config)
352
+ with engine.connect() as conn:
353
+ df = pd.read_sql(sql, conn)
354
+
355
+ preview = _dataframe_to_preview(df, limit)
356
+ return ActionResult(
357
+ ok=True,
358
+ action_type="read_data",
359
+ data=preview,
360
+ result_json=json.dumps({"rows_returned": len(df), "columns": list(df.columns)}),
361
+ )
362
+ except Exception as e:
363
+ return ActionResult(
364
+ ok=False,
365
+ action_type="read_data",
366
+ error=f"Failed to read data: {str(e)}",
367
+ )
368
+
369
+ elif self._is_file_source(source_config):
370
+ return self._read_file_data(source_config, params)
371
+
372
+ else:
373
+ return ActionResult(
374
+ ok=False,
375
+ action_type="read_data",
376
+ error=f"Unsupported source type: {source_config.get('type')}",
377
+ )
378
+
379
+ def _read_file_data(self, source_config: dict, params: dict) -> ActionResult:
380
+ """Read data from a file source."""
381
+ import os
382
+
383
+ config = source_config.get("config", {})
384
+ source_type = source_config.get("type", "").lower()
385
+ path = config.get("path")
386
+
387
+ if not path:
388
+ return ActionResult(
389
+ ok=False,
390
+ action_type="read_data",
391
+ error="File path not configured",
392
+ )
393
+
394
+ if not os.path.exists(path):
395
+ return ActionResult(
396
+ ok=False,
397
+ action_type="read_data",
398
+ error=f"File not found: {path}",
399
+ )
400
+
401
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
402
+ columns = params.get("columns")
403
+
404
+ try:
405
+ if source_type in ("csv", "file") and path.endswith(".csv"):
406
+ usecols = columns if columns else None
407
+ df = pd.read_csv(path, nrows=limit, usecols=usecols, low_memory=False)
408
+ elif source_type == "excel" or path.endswith((".xlsx", ".xls")):
409
+ usecols = columns if columns else None
410
+ df = pd.read_excel(path, nrows=limit, usecols=usecols, engine="openpyxl")
411
+ elif source_type == "parquet" or path.endswith(".parquet"):
412
+ df = pd.read_parquet(path, columns=columns)
413
+ df = df.head(limit)
414
+ elif source_type == "json" or path.endswith(".json"):
415
+ df = pd.read_json(path)
416
+ if columns:
417
+ df = df[columns]
418
+ df = df.head(limit)
419
+ else:
420
+ return ActionResult(
421
+ ok=False,
422
+ action_type="read_data",
423
+ error=f"Unsupported file type: {source_type}",
424
+ )
425
+
426
+ preview = _dataframe_to_preview(df, limit)
427
+ return ActionResult(
428
+ ok=True,
429
+ action_type="read_data",
430
+ data=preview,
431
+ result_json=json.dumps({"rows_returned": len(df), "columns": list(df.columns)}),
432
+ )
433
+ except Exception as e:
434
+ return ActionResult(
435
+ ok=False,
436
+ action_type="read_data",
437
+ error=f"Failed to read file: {str(e)}",
438
+ )
439
+
440
+ def _filter_data(self, source_config: dict, params: dict) -> ActionResult:
441
+ """Filter data based on a condition."""
442
+ condition = params.get("condition")
443
+ if not condition:
444
+ return ActionResult(
445
+ ok=False,
446
+ action_type="filter",
447
+ error="Filter condition is required",
448
+ )
449
+
450
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
451
+
452
+ if self._is_database_source(source_config):
453
+ config = source_config.get("config", {})
454
+ table = params.get("table") or config.get("table") or config.get("default_table")
455
+ if not table:
456
+ return ActionResult(
457
+ ok=False,
458
+ action_type="filter",
459
+ error="Table name is required for database sources",
460
+ )
461
+
462
+ schema = config.get("schema")
463
+ qualified_table = f"{schema}.{table}" if schema else table
464
+ columns = params.get("columns")
465
+ cols = ", ".join(columns) if columns else "*"
466
+
467
+ sql = f"SELECT {cols} FROM {qualified_table} WHERE {condition} LIMIT {limit}"
468
+
469
+ try:
470
+ engine = self._get_database_engine(source_config)
471
+ with engine.connect() as conn:
472
+ df = pd.read_sql(sql, conn)
473
+
474
+ preview = _dataframe_to_preview(df, limit)
475
+ return ActionResult(
476
+ ok=True,
477
+ action_type="filter",
478
+ data=preview,
479
+ result_json=json.dumps({"rows_returned": len(df), "condition": condition}),
480
+ )
481
+ except Exception as e:
482
+ return ActionResult(
483
+ ok=False,
484
+ action_type="filter",
485
+ error=f"Filter failed: {str(e)}",
486
+ )
487
+
488
+ elif self._is_file_source(source_config):
489
+ # First read the data
490
+ read_result = self._read_file_data(source_config, {"limit": 10000}) # Read more for filtering
491
+ if not read_result.ok or not read_result.data:
492
+ return ActionResult(
493
+ ok=False,
494
+ action_type="filter",
495
+ error=read_result.error or "Failed to read data for filtering",
496
+ )
497
+
498
+ # Reconstruct dataframe and filter
499
+ try:
500
+ df = pd.DataFrame(
501
+ [row for row in read_result.data.rows],
502
+ columns=read_result.data.headers
503
+ )
504
+ # Use query for filtering
505
+ df_filtered = df.query(condition)
506
+ df_filtered = df_filtered.head(limit)
507
+
508
+ preview = _dataframe_to_preview(df_filtered, limit)
509
+ return ActionResult(
510
+ ok=True,
511
+ action_type="filter",
512
+ data=preview,
513
+ result_json=json.dumps({"rows_returned": len(df_filtered), "condition": condition}),
514
+ )
515
+ except Exception as e:
516
+ return ActionResult(
517
+ ok=False,
518
+ action_type="filter",
519
+ error=f"Filter failed: {str(e)}",
520
+ )
521
+
522
+ else:
523
+ return ActionResult(
524
+ ok=False,
525
+ action_type="filter",
526
+ error=f"Unsupported source type: {source_config.get('type')}",
527
+ )
528
+
529
+ def _aggregate_data(self, source_config: dict, params: dict) -> ActionResult:
530
+ """Perform aggregation on data."""
531
+ group_by = params.get("group_by") # Column(s) to group by
532
+ agg_func = params.get("agg_func", "count") # Aggregation function
533
+ agg_column = params.get("agg_column") # Column to aggregate
534
+
535
+ if not group_by:
536
+ return ActionResult(
537
+ ok=False,
538
+ action_type="aggregate",
539
+ error="group_by column is required for aggregation",
540
+ )
541
+
542
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
543
+
544
+ if self._is_database_source(source_config):
545
+ config = source_config.get("config", {})
546
+ table = params.get("table") or config.get("table") or config.get("default_table")
547
+ if not table:
548
+ return ActionResult(
549
+ ok=False,
550
+ action_type="aggregate",
551
+ error="Table name is required for database sources",
552
+ )
553
+
554
+ schema = config.get("schema")
555
+ qualified_table = f"{schema}.{table}" if schema else table
556
+
557
+ # Build SQL aggregation
558
+ group_cols = group_by if isinstance(group_by, list) else [group_by]
559
+ group_str = ", ".join(group_cols)
560
+
561
+ if agg_func.upper() == "COUNT":
562
+ agg_expr = "COUNT(*) as count"
563
+ elif agg_column:
564
+ agg_expr = f"{agg_func.upper()}({agg_column}) as {agg_func.lower()}_{agg_column}"
565
+ else:
566
+ agg_expr = "COUNT(*) as count"
567
+
568
+ sql = f"SELECT {group_str}, {agg_expr} FROM {qualified_table} GROUP BY {group_str} LIMIT {limit}"
569
+
570
+ try:
571
+ engine = self._get_database_engine(source_config)
572
+ with engine.connect() as conn:
573
+ df = pd.read_sql(sql, conn)
574
+
575
+ preview = _dataframe_to_preview(df, limit)
576
+ return ActionResult(
577
+ ok=True,
578
+ action_type="aggregate",
579
+ data=preview,
580
+ result_json=json.dumps({
581
+ "groups": len(df),
582
+ "group_by": group_by,
583
+ "agg_func": agg_func,
584
+ }),
585
+ )
586
+ except Exception as e:
587
+ return ActionResult(
588
+ ok=False,
589
+ action_type="aggregate",
590
+ error=f"Aggregation failed: {str(e)}",
591
+ )
592
+
593
+ else:
594
+ return ActionResult(
595
+ ok=False,
596
+ action_type="aggregate",
597
+ error="Aggregation currently only supported for database sources",
598
+ )
599
+
600
+ def _describe_source(self, source_config: dict, params: dict) -> ActionResult:
601
+ """Get metadata about a source."""
602
+ source_type = source_config.get("type", "").lower()
603
+ config = source_config.get("config", {})
604
+
605
+ metadata = {
606
+ "source_type": source_type,
607
+ "name": source_config.get("name", "unknown"),
608
+ }
609
+
610
+ if self._is_database_source(source_config):
611
+ try:
612
+ from sqlalchemy import inspect
613
+
614
+ engine = self._get_database_engine(source_config)
615
+ inspector = inspect(engine)
616
+
617
+ # Get schema info
618
+ schema = config.get("schema")
619
+ tables = inspector.get_table_names(schema=schema)
620
+
621
+ metadata["tables"] = tables
622
+ metadata["schema"] = schema
623
+
624
+ # Get column info for specified table or first table
625
+ table = params.get("table") or config.get("table") or (tables[0] if tables else None)
626
+ if table:
627
+ columns = inspector.get_columns(table, schema=schema)
628
+ metadata["table"] = table
629
+ metadata["columns"] = [
630
+ {"name": col["name"], "type": str(col["type"])}
631
+ for col in columns
632
+ ]
633
+
634
+ # Get row count
635
+ with engine.connect() as conn:
636
+ from sqlalchemy import text
637
+ qualified_table = f"{schema}.{table}" if schema else table
638
+ result = conn.execute(text(f"SELECT COUNT(*) FROM {qualified_table}"))
639
+ metadata["row_count"] = result.scalar()
640
+
641
+ return ActionResult(
642
+ ok=True,
643
+ action_type="describe",
644
+ result_json=json.dumps(metadata),
645
+ )
646
+ except Exception as e:
647
+ return ActionResult(
648
+ ok=False,
649
+ action_type="describe",
650
+ error=f"Failed to describe database source: {str(e)}",
651
+ )
652
+
653
+ elif self._is_file_source(source_config):
654
+ import os
655
+
656
+ path = config.get("path")
657
+ if not path:
658
+ return ActionResult(
659
+ ok=False,
660
+ action_type="describe",
661
+ error="File path not configured",
662
+ )
663
+
664
+ if not os.path.exists(path):
665
+ return ActionResult(
666
+ ok=False,
667
+ action_type="describe",
668
+ error=f"File not found: {path}",
669
+ )
670
+
671
+ metadata["path"] = path
672
+ metadata["file_size_bytes"] = os.path.getsize(path)
673
+
674
+ try:
675
+ # Read a small sample to get column info
676
+ if source_type in ("csv", "file") and path.endswith(".csv"):
677
+ df = pd.read_csv(path, nrows=5, low_memory=False)
678
+ elif source_type == "excel" or path.endswith((".xlsx", ".xls")):
679
+ df = pd.read_excel(path, nrows=5, engine="openpyxl")
680
+ elif source_type == "parquet" or path.endswith(".parquet"):
681
+ df = pd.read_parquet(path)
682
+ df = df.head(5)
683
+ elif source_type == "json" or path.endswith(".json"):
684
+ df = pd.read_json(path)
685
+ df = df.head(5)
686
+ else:
687
+ df = None
688
+
689
+ if df is not None:
690
+ metadata["columns"] = [
691
+ {"name": col, "type": str(df[col].dtype)}
692
+ for col in df.columns
693
+ ]
694
+ # Try to get total row count
695
+ if source_type in ("csv", "file") and path.endswith(".csv"):
696
+ with open(path, 'r', encoding='utf-8', errors='ignore') as f:
697
+ metadata["row_count"] = sum(1 for _ in f) - 1 # Exclude header
698
+ elif source_type == "parquet" or path.endswith(".parquet"):
699
+ metadata["row_count"] = len(pd.read_parquet(path))
700
+
701
+ return ActionResult(
702
+ ok=True,
703
+ action_type="describe",
704
+ result_json=json.dumps(metadata),
705
+ )
706
+ except Exception as e:
707
+ return ActionResult(
708
+ ok=False,
709
+ action_type="describe",
710
+ error=f"Failed to describe file source: {str(e)}",
711
+ )
712
+
713
+ else:
714
+ return ActionResult(
715
+ ok=False,
716
+ action_type="describe",
717
+ error=f"Describe not supported for source type: {source_type}",
718
+ )
719
+
720
+ def _sample_data(self, source_config: dict, params: dict) -> ActionResult:
721
+ """Get a random sample of data."""
722
+ n = params.get("n", 10) # Number of samples
723
+
724
+ if self._is_database_source(source_config):
725
+ config = source_config.get("config", {})
726
+ table = params.get("table") or config.get("table") or config.get("default_table")
727
+ if not table:
728
+ return ActionResult(
729
+ ok=False,
730
+ action_type="sample",
731
+ error="Table name is required",
732
+ )
733
+
734
+ schema = config.get("schema")
735
+ qualified_table = f"{schema}.{table}" if schema else table
736
+ source_type = source_config.get("type", "").lower()
737
+
738
+ # Different databases have different random sampling syntax
739
+ if source_type == "postgresql":
740
+ sql = f"SELECT * FROM {qualified_table} ORDER BY RANDOM() LIMIT {n}"
741
+ elif source_type == "mysql":
742
+ sql = f"SELECT * FROM {qualified_table} ORDER BY RAND() LIMIT {n}"
743
+ else:
744
+ sql = f"SELECT * FROM {qualified_table} LIMIT {n}" # Fallback
745
+
746
+ try:
747
+ engine = self._get_database_engine(source_config)
748
+ with engine.connect() as conn:
749
+ df = pd.read_sql(sql, conn)
750
+
751
+ preview = _dataframe_to_preview(df, n)
752
+ return ActionResult(
753
+ ok=True,
754
+ action_type="sample",
755
+ data=preview,
756
+ result_json=json.dumps({"samples": len(df)}),
757
+ )
758
+ except Exception as e:
759
+ return ActionResult(
760
+ ok=False,
761
+ action_type="sample",
762
+ error=f"Sampling failed: {str(e)}",
763
+ )
764
+
765
+ elif self._is_file_source(source_config):
766
+ # Read all data and sample
767
+ read_result = self._read_file_data(source_config, {"limit": 10000})
768
+ if not read_result.ok or not read_result.data:
769
+ return ActionResult(
770
+ ok=False,
771
+ action_type="sample",
772
+ error=read_result.error or "Failed to read data for sampling",
773
+ )
774
+
775
+ try:
776
+ df = pd.DataFrame(
777
+ [row for row in read_result.data.rows],
778
+ columns=read_result.data.headers
779
+ )
780
+ df_sample = df.sample(n=min(n, len(df)))
781
+
782
+ preview = _dataframe_to_preview(df_sample, n)
783
+ return ActionResult(
784
+ ok=True,
785
+ action_type="sample",
786
+ data=preview,
787
+ result_json=json.dumps({"samples": len(df_sample)}),
788
+ )
789
+ except Exception as e:
790
+ return ActionResult(
791
+ ok=False,
792
+ action_type="sample",
793
+ error=f"Sampling failed: {str(e)}",
794
+ )
795
+
796
+ else:
797
+ return ActionResult(
798
+ ok=False,
799
+ action_type="sample",
800
+ error=f"Sampling not supported for source type: {source_config.get('type')}",
801
+ )
802
+
803
+ def _count_rows(self, source_config: dict, params: dict) -> ActionResult:
804
+ """Count rows in a source."""
805
+ if self._is_database_source(source_config):
806
+ config = source_config.get("config", {})
807
+ table = params.get("table") or config.get("table") or config.get("default_table")
808
+ condition = params.get("condition")
809
+
810
+ if not table:
811
+ return ActionResult(
812
+ ok=False,
813
+ action_type="count",
814
+ error="Table name is required",
815
+ )
816
+
817
+ schema = config.get("schema")
818
+ qualified_table = f"{schema}.{table}" if schema else table
819
+
820
+ sql = f"SELECT COUNT(*) as count FROM {qualified_table}"
821
+ if condition:
822
+ sql += f" WHERE {condition}"
823
+
824
+ try:
825
+ engine = self._get_database_engine(source_config)
826
+ with engine.connect() as conn:
827
+ from sqlalchemy import text
828
+ result = conn.execute(text(sql))
829
+ count = result.scalar()
830
+
831
+ return ActionResult(
832
+ ok=True,
833
+ action_type="count",
834
+ result_json=json.dumps({"count": count, "table": table}),
835
+ )
836
+ except Exception as e:
837
+ return ActionResult(
838
+ ok=False,
839
+ action_type="count",
840
+ error=f"Count failed: {str(e)}",
841
+ )
842
+
843
+ else:
844
+ # Use describe for file sources
845
+ describe_result = self._describe_source(source_config, params)
846
+ if describe_result.ok and describe_result.result_json:
847
+ metadata = json.loads(describe_result.result_json)
848
+ if "row_count" in metadata:
849
+ return ActionResult(
850
+ ok=True,
851
+ action_type="count",
852
+ result_json=json.dumps({"count": metadata["row_count"]}),
853
+ )
854
+
855
+ return ActionResult(
856
+ ok=False,
857
+ action_type="count",
858
+ error="Could not determine row count",
859
+ )
860
+
861
+ def _get_distinct(self, source_config: dict, params: dict) -> ActionResult:
862
+ """Get distinct values from a column."""
863
+ column = params.get("column")
864
+ if not column:
865
+ return ActionResult(
866
+ ok=False,
867
+ action_type="distinct",
868
+ error="Column name is required for distinct action",
869
+ )
870
+
871
+ limit = params.get("limit", 100)
872
+
873
+ if self._is_database_source(source_config):
874
+ config = source_config.get("config", {})
875
+ table = params.get("table") or config.get("table") or config.get("default_table")
876
+
877
+ if not table:
878
+ return ActionResult(
879
+ ok=False,
880
+ action_type="distinct",
881
+ error="Table name is required",
882
+ )
883
+
884
+ schema = config.get("schema")
885
+ qualified_table = f"{schema}.{table}" if schema else table
886
+
887
+ sql = f"SELECT DISTINCT {column} FROM {qualified_table} LIMIT {limit}"
888
+
889
+ try:
890
+ engine = self._get_database_engine(source_config)
891
+ with engine.connect() as conn:
892
+ df = pd.read_sql(sql, conn)
893
+
894
+ values = df[column].tolist()
895
+ return ActionResult(
896
+ ok=True,
897
+ action_type="distinct",
898
+ result_json=json.dumps({
899
+ "column": column,
900
+ "distinct_count": len(values),
901
+ "values": values[:limit],
902
+ }),
903
+ )
904
+ except Exception as e:
905
+ return ActionResult(
906
+ ok=False,
907
+ action_type="distinct",
908
+ error=f"Distinct failed: {str(e)}",
909
+ )
910
+
911
+ else:
912
+ return ActionResult(
913
+ ok=False,
914
+ action_type="distinct",
915
+ error="Distinct currently only supported for database sources",
916
+ )
917
+
918
+ def _get_head(self, source_config: dict, params: dict) -> ActionResult:
919
+ """Get first N rows from a source."""
920
+ n = params.get("n", 10)
921
+ params["limit"] = n
922
+ return self._read_data(source_config, params)
923
+
924
+ def _get_tail(self, source_config: dict, params: dict) -> ActionResult:
925
+ """Get last N rows from a source."""
926
+ n = params.get("n", 10)
927
+
928
+ if self._is_database_source(source_config):
929
+ config = source_config.get("config", {})
930
+ table = params.get("table") or config.get("table") or config.get("default_table")
931
+
932
+ if not table:
933
+ return ActionResult(
934
+ ok=False,
935
+ action_type="tail",
936
+ error="Table name is required",
937
+ )
938
+
939
+ schema = config.get("schema")
940
+ qualified_table = f"{schema}.{table}" if schema else table
941
+
942
+ # This requires knowing the order - use a subquery with DESC ordering
943
+ sql = f"""
944
+ SELECT * FROM (
945
+ SELECT * FROM {qualified_table} ORDER BY 1 DESC LIMIT {n}
946
+ ) sub ORDER BY 1 ASC
947
+ """
948
+
949
+ try:
950
+ engine = self._get_database_engine(source_config)
951
+ with engine.connect() as conn:
952
+ df = pd.read_sql(sql, conn)
953
+
954
+ preview = _dataframe_to_preview(df, n)
955
+ return ActionResult(
956
+ ok=True,
957
+ action_type="tail",
958
+ data=preview,
959
+ result_json=json.dumps({"rows_returned": len(df)}),
960
+ )
961
+ except Exception as e:
962
+ return ActionResult(
963
+ ok=False,
964
+ action_type="tail",
965
+ error=f"Tail failed: {str(e)}",
966
+ )
967
+
968
+ else:
969
+ # For files, read all and take tail
970
+ read_result = self._read_file_data(source_config, {"limit": 10000})
971
+ if not read_result.ok or not read_result.data:
972
+ return ActionResult(
973
+ ok=False,
974
+ action_type="tail",
975
+ error=read_result.error or "Failed to read data",
976
+ )
977
+
978
+ try:
979
+ df = pd.DataFrame(
980
+ [row for row in read_result.data.rows],
981
+ columns=read_result.data.headers
982
+ )
983
+ df_tail = df.tail(n)
984
+
985
+ preview = _dataframe_to_preview(df_tail, n)
986
+ return ActionResult(
987
+ ok=True,
988
+ action_type="tail",
989
+ data=preview,
990
+ result_json=json.dumps({"rows_returned": len(df_tail)}),
991
+ )
992
+ except Exception as e:
993
+ return ActionResult(
994
+ ok=False,
995
+ action_type="tail",
996
+ error=f"Tail failed: {str(e)}",
997
+ )
998
+
999
+
1000
+ # Singleton instance
1001
+ _executor: Optional[ActionExecutor] = None
1002
+
1003
+
1004
+ def get_action_executor() -> ActionExecutor:
1005
+ """Get the singleton ActionExecutor instance."""
1006
+ global _executor
1007
+ if _executor is None:
1008
+ _executor = ActionExecutor()
1009
+ return _executor