qalita 2.9.0__py3-none-any.whl → 2.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. qalita/_frontend/.next/BUILD_ID +1 -1
  2. qalita/_frontend/.next/build-manifest.json +7 -7
  3. qalita/_frontend/.next/prerender-manifest.json +3 -3
  4. qalita/_frontend/.next/required-server-files.json +196 -40
  5. qalita/_frontend/.next/server/app/_global-error/page/build-manifest.json +5 -5
  6. qalita/_frontend/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
  7. qalita/_frontend/.next/server/app/_global-error.html +2 -2
  8. qalita/_frontend/.next/server/app/_global-error.rsc +7 -7
  9. qalita/_frontend/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +2 -2
  10. qalita/_frontend/.next/server/app/_global-error.segments/_full.segment.rsc +7 -7
  11. qalita/_frontend/.next/server/app/_global-error.segments/_head.segment.rsc +3 -3
  12. qalita/_frontend/.next/server/app/_global-error.segments/_index.segment.rsc +3 -3
  13. qalita/_frontend/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  14. qalita/_frontend/.next/server/app/_not-found/page/build-manifest.json +5 -5
  15. qalita/_frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  16. qalita/_frontend/.next/server/app/_not-found.html +1 -1
  17. qalita/_frontend/.next/server/app/_not-found.rsc +8 -8
  18. qalita/_frontend/.next/server/app/_not-found.segments/_full.segment.rsc +8 -8
  19. qalita/_frontend/.next/server/app/_not-found.segments/_head.segment.rsc +3 -3
  20. qalita/_frontend/.next/server/app/_not-found.segments/_index.segment.rsc +4 -4
  21. qalita/_frontend/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +2 -2
  22. qalita/_frontend/.next/server/app/_not-found.segments/_not-found.segment.rsc +3 -3
  23. qalita/_frontend/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  24. qalita/_frontend/.next/server/app/page/build-manifest.json +5 -5
  25. qalita/_frontend/.next/server/app/page_client-reference-manifest.js +1 -1
  26. qalita/_frontend/.next/server/app/sources/add/page/build-manifest.json +5 -5
  27. qalita/_frontend/.next/server/app/sources/add/page_client-reference-manifest.js +1 -1
  28. qalita/_frontend/.next/server/app/sources/add.html +1 -1
  29. qalita/_frontend/.next/server/app/sources/add.rsc +10 -10
  30. qalita/_frontend/.next/server/app/sources/add.segments/_full.segment.rsc +10 -10
  31. qalita/_frontend/.next/server/app/sources/add.segments/_head.segment.rsc +3 -3
  32. qalita/_frontend/.next/server/app/sources/add.segments/_index.segment.rsc +4 -4
  33. qalita/_frontend/.next/server/app/sources/add.segments/_tree.segment.rsc +1 -1
  34. qalita/_frontend/.next/server/app/sources/add.segments/sources/add/__PAGE__.segment.rsc +4 -4
  35. qalita/_frontend/.next/server/app/sources/add.segments/sources/add.segment.rsc +3 -3
  36. qalita/_frontend/.next/server/app/sources/add.segments/sources.segment.rsc +3 -3
  37. qalita/_frontend/.next/server/app/sources/edit/[id]/page/build-manifest.json +5 -5
  38. qalita/_frontend/.next/server/app/sources/edit/[id]/page_client-reference-manifest.js +1 -1
  39. qalita/_frontend/.next/server/app/sources/page/build-manifest.json +5 -5
  40. qalita/_frontend/.next/server/app/sources/page_client-reference-manifest.js +1 -1
  41. qalita/_frontend/.next/server/app/sources.html +1 -1
  42. qalita/_frontend/.next/server/app/sources.rsc +10 -10
  43. qalita/_frontend/.next/server/app/sources.segments/_full.segment.rsc +10 -10
  44. qalita/_frontend/.next/server/app/sources.segments/_head.segment.rsc +3 -3
  45. qalita/_frontend/.next/server/app/sources.segments/_index.segment.rsc +4 -4
  46. qalita/_frontend/.next/server/app/sources.segments/_tree.segment.rsc +1 -1
  47. qalita/_frontend/.next/server/app/sources.segments/sources/__PAGE__.segment.rsc +4 -4
  48. qalita/_frontend/.next/server/app/sources.segments/sources.segment.rsc +3 -3
  49. qalita/_frontend/.next/server/chunks/[root-of-the-server]__bf0c3d33._.js +3 -3
  50. qalita/_frontend/.next/server/chunks/[root-of-the-server]__f408c708._.js +2 -2
  51. qalita/_frontend/.next/server/chunks/ssr/_404f6e81._.js +2 -2
  52. qalita/_frontend/.next/server/chunks/ssr/_6a67f6f0._.js +2 -2
  53. qalita/_frontend/.next/server/chunks/ssr/node_modules_next_dist_4b9a0874._.js +1 -1
  54. qalita/_frontend/.next/server/middleware-build-manifest.js +5 -5
  55. qalita/_frontend/.next/server/pages/404.html +1 -1
  56. qalita/_frontend/.next/server/pages/500.html +2 -2
  57. qalita/_frontend/.next/server/server-reference-manifest.js +1 -1
  58. qalita/_frontend/.next/server/server-reference-manifest.json +1 -1
  59. qalita/_frontend/.next/static/chunks/0c7542414b6a6f86.js +2 -0
  60. qalita/_frontend/.next/static/chunks/{89ba62a8ba9b79ce.js → 12daa96885968840.js} +1 -1
  61. qalita/_frontend/.next/static/chunks/694836347d1e5ef3.js +1 -0
  62. qalita/_frontend/.next/static/chunks/7ea91ca84dc4b3a4.js +1 -0
  63. qalita/_frontend/.next/static/chunks/9e71bf77f23416e6.js +1 -0
  64. qalita/_frontend/.next/static/chunks/aa2a44cc19d89bdb.js +1 -0
  65. qalita/_frontend/.next/static/chunks/bb05964d928aa166.js +3 -0
  66. qalita/_frontend/.next/static/chunks/ecbb64dc112ad516.js +1 -0
  67. qalita/_frontend/.next/static/chunks/turbopack-9fc8bcb3a9806c66.js +4 -0
  68. qalita/_frontend/node_modules/@next/env/package.json +1 -1
  69. qalita/_frontend/node_modules/next/dist/build/index.js +10 -4
  70. qalita/_frontend/node_modules/next/dist/build/swc/index.js +1 -1
  71. qalita/_frontend/node_modules/next/dist/build/webpack-config.js +3 -3
  72. qalita/_frontend/node_modules/next/dist/client/components/segment-cache/lru.js +2 -0
  73. qalita/_frontend/node_modules/next/dist/compiled/next-server/app-page-turbo-experimental.runtime.prod.js +1 -1
  74. qalita/_frontend/node_modules/next/dist/compiled/next-server/app-page-turbo.runtime.prod.js +1 -1
  75. qalita/_frontend/node_modules/next/dist/server/config-shared.js +4 -0
  76. qalita/_frontend/node_modules/next/dist/server/dev/hot-reloader-turbopack.js +1 -1
  77. qalita/_frontend/node_modules/next/dist/server/dev/hot-reloader-webpack.js +1 -1
  78. qalita/_frontend/node_modules/next/dist/server/lib/app-info-log.js +1 -1
  79. qalita/_frontend/node_modules/next/dist/server/lib/start-server.js +1 -1
  80. qalita/_frontend/node_modules/next/dist/server/web/adapter.js +1 -1
  81. qalita/_frontend/node_modules/next/dist/shared/lib/errors/canary-only-config-error.js +1 -1
  82. qalita/_frontend/node_modules/next/dist/telemetry/anonymous-meta.js +1 -1
  83. qalita/_frontend/node_modules/next/dist/telemetry/events/version.js +2 -2
  84. qalita/_frontend/node_modules/next/package.json +15 -15
  85. qalita/_frontend/package.json +4 -4
  86. qalita/_frontend/server.js +1 -1
  87. qalita/commands/worker.py +11 -10
  88. qalita/commands/worker_grpc.py +110 -0
  89. qalita/grpc/client.py +42 -2
  90. qalita/grpc/protos/qalita.proto +26 -0
  91. qalita/grpc/protos/qalita_pb2.py +80 -76
  92. qalita/grpc/protos/qalita_pb2_grpc.py +1 -1
  93. qalita/internal/action_executor.py +896 -0
  94. qalita/internal/utils.py +1 -1
  95. {qalita-2.9.0.dist-info → qalita-2.9.2.dist-info}/METADATA +3 -3
  96. {qalita-2.9.0.dist-info → qalita-2.9.2.dist-info}/RECORD +102 -101
  97. qalita/_frontend/.next/static/chunks/0b082245f106d665.js +0 -1
  98. qalita/_frontend/.next/static/chunks/58689c96b0676c41.js +0 -1
  99. qalita/_frontend/.next/static/chunks/acc5da18ff20daa1.js +0 -3
  100. qalita/_frontend/.next/static/chunks/bdc8a8e7721f5675.js +0 -2
  101. qalita/_frontend/.next/static/chunks/e0df86cbf44bbf9f.js +0 -1
  102. qalita/_frontend/.next/static/chunks/e6ce59ba40b863f2.js +0 -1
  103. qalita/_frontend/.next/static/chunks/ec4b1f1e3cd3ae43.js +0 -1
  104. qalita/_frontend/.next/static/chunks/turbopack-d21156d03715fafa.js +0 -4
  105. /qalita/_frontend/.next/static/{rObHWX45g5zAJ7RtwthzS → SlJmHVnRND1B7HlzvPJuC}/_buildManifest.js +0 -0
  106. /qalita/_frontend/.next/static/{rObHWX45g5zAJ7RtwthzS → SlJmHVnRND1B7HlzvPJuC}/_clientMiddlewareManifest.json +0 -0
  107. /qalita/_frontend/.next/static/{rObHWX45g5zAJ7RtwthzS → SlJmHVnRND1B7HlzvPJuC}/_ssgManifest.js +0 -0
  108. {qalita-2.9.0.dist-info → qalita-2.9.2.dist-info}/WHEEL +0 -0
  109. {qalita-2.9.0.dist-info → qalita-2.9.2.dist-info}/entry_points.txt +0 -0
  110. {qalita-2.9.0.dist-info → qalita-2.9.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,896 @@
1
+ """
2
+ # QALITA (c) COPYRIGHT 2025 - ALL RIGHTS RESERVED -
3
+ Action Executor module for Studio Agent integration.
4
+
5
+ This module provides an abstraction layer that translates LLM agent commands
6
+ into concrete operations on data sources (SQL queries, file manipulations, etc.).
7
+ """
8
+
9
+ import json
10
+ import time
11
+ from dataclasses import dataclass, field
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ import pandas as pd
15
+
16
+ from qalita.internal.utils import logger
17
+ from qalita.internal.data_preview import (
18
+ DataPreviewResult,
19
+ _dataframe_to_preview,
20
+ _error_result,
21
+ DEFAULT_ROW_LIMIT,
22
+ )
23
+
24
+
25
+ @dataclass
26
+ class ActionResult:
27
+ """Result of an action execution."""
28
+
29
+ ok: bool
30
+ action_type: str
31
+ error: Optional[str] = None
32
+ result_json: Optional[str] = None # Structured result as JSON string
33
+ data: Optional[DataPreviewResult] = None # Tabular data if applicable
34
+ execution_time_ms: int = 0
35
+
36
+
37
+ # Supported action types
38
+ ACTION_TYPES = {
39
+ "query": "Execute a SQL query on a database source",
40
+ "read_data": "Read data from a file or database source",
41
+ "filter": "Filter data based on conditions",
42
+ "aggregate": "Perform aggregation on data",
43
+ "describe": "Get metadata about a source (schema, columns, row count)",
44
+ "sample": "Get a random sample of data",
45
+ "count": "Count rows in a source or query result",
46
+ "distinct": "Get distinct values from a column",
47
+ "head": "Get first N rows from a source",
48
+ "tail": "Get last N rows from a source",
49
+ }
50
+
51
+
52
+ class ActionExecutor:
53
+ """
54
+ Executes actions requested by the LLM agent.
55
+
56
+ This class provides a unified interface for executing various data operations
57
+ on different source types (databases, files, etc.).
58
+ """
59
+
60
+ def __init__(self):
61
+ """Initialize the action executor."""
62
+ self._engines: Dict[int, Any] = {} # Cache for database engines
63
+
64
+ def execute(
65
+ self,
66
+ action_type: str,
67
+ source_config: dict,
68
+ params: dict,
69
+ timeout_seconds: Optional[int] = None,
70
+ ) -> ActionResult:
71
+ """
72
+ Execute an action on a data source.
73
+
74
+ Args:
75
+ action_type: Type of action to execute (query, read_data, etc.)
76
+ source_config: Source configuration dict with 'type' and 'config' keys
77
+ params: Action parameters (specific to each action type)
78
+ timeout_seconds: Optional timeout for the action
79
+
80
+ Returns:
81
+ ActionResult with the execution result
82
+ """
83
+ start_time = time.time()
84
+
85
+ if action_type not in ACTION_TYPES:
86
+ return ActionResult(
87
+ ok=False,
88
+ action_type=action_type,
89
+ error=f"Unknown action type: {action_type}. Supported: {list(ACTION_TYPES.keys())}",
90
+ )
91
+
92
+ handlers = {
93
+ "query": self._execute_query,
94
+ "read_data": self._read_data,
95
+ "filter": self._filter_data,
96
+ "aggregate": self._aggregate_data,
97
+ "describe": self._describe_source,
98
+ "sample": self._sample_data,
99
+ "count": self._count_rows,
100
+ "distinct": self._get_distinct,
101
+ "head": self._get_head,
102
+ "tail": self._get_tail,
103
+ }
104
+
105
+ handler = handlers.get(action_type)
106
+ if not handler:
107
+ return ActionResult(
108
+ ok=False,
109
+ action_type=action_type,
110
+ error=f"Handler not implemented for action: {action_type}",
111
+ )
112
+
113
+ try:
114
+ result = handler(source_config, params)
115
+ result.execution_time_ms = int((time.time() - start_time) * 1000)
116
+ return result
117
+ except Exception as e:
118
+ logger.error(f"Error executing action {action_type}: {e}")
119
+ return ActionResult(
120
+ ok=False,
121
+ action_type=action_type,
122
+ error=str(e),
123
+ execution_time_ms=int((time.time() - start_time) * 1000),
124
+ )
125
+
126
+ def _get_database_engine(self, source_config: dict) -> Any:
127
+ """Get or create a SQLAlchemy engine for a database source."""
128
+ from sqlalchemy import create_engine
129
+
130
+ config = source_config.get("config", {})
131
+ source_type = source_config.get("type", "").lower()
132
+
133
+ connection_string = config.get("connection_string")
134
+ if not connection_string:
135
+ db_type_map = {
136
+ "postgresql": "postgresql",
137
+ "mysql": "mysql",
138
+ "mssql": "mssql+pymssql",
139
+ "oracle": "oracle+oracledb",
140
+ "sqlite": "sqlite",
141
+ }
142
+
143
+ dialect = db_type_map.get(source_type)
144
+ if not dialect:
145
+ raise ValueError(f"Unsupported database type: {source_type}")
146
+
147
+ if source_type == "sqlite":
148
+ database_path = config.get("database", ":memory:")
149
+ connection_string = f"sqlite:///{database_path}"
150
+ elif source_type == "oracle":
151
+ connection_string = (
152
+ f"{dialect}://{config['username']}:{config['password']}"
153
+ f"@{config['host']}:{config['port']}/?service_name={config['database']}"
154
+ )
155
+ else:
156
+ connection_string = (
157
+ f"{dialect}://{config['username']}:{config['password']}"
158
+ f"@{config['host']}:{config['port']}/{config['database']}"
159
+ )
160
+
161
+ return create_engine(connection_string)
162
+
163
+ def _is_database_source(self, source_config: dict) -> bool:
164
+ """Check if the source is a database."""
165
+ source_type = source_config.get("type", "").lower()
166
+ return source_type in ("postgresql", "mysql", "mssql", "oracle", "sqlite")
167
+
168
+ def _is_file_source(self, source_config: dict) -> bool:
169
+ """Check if the source is a file."""
170
+ source_type = source_config.get("type", "").lower()
171
+ return source_type in ("file", "csv", "excel", "parquet", "json", "folder")
172
+
173
+ def _execute_query(self, source_config: dict, params: dict) -> ActionResult:
174
+ """Execute a SQL query on a database source."""
175
+ if not self._is_database_source(source_config):
176
+ return ActionResult(
177
+ ok=False,
178
+ action_type="query",
179
+ error=f"Query action only supported for database sources, not {source_config.get('type')}",
180
+ )
181
+
182
+ sql = params.get("sql")
183
+ if not sql:
184
+ return ActionResult(
185
+ ok=False,
186
+ action_type="query",
187
+ error="SQL query is required for 'query' action",
188
+ )
189
+
190
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
191
+
192
+ # Add LIMIT if not present (for safety)
193
+ sql_lower = sql.strip().lower()
194
+ if "limit" not in sql_lower and not sql_lower.startswith(("insert", "update", "delete", "create", "drop", "alter")):
195
+ sql = f"{sql.rstrip(';')} LIMIT {limit}"
196
+
197
+ try:
198
+ engine = self._get_database_engine(source_config)
199
+ with engine.connect() as conn:
200
+ df = pd.read_sql(sql, conn)
201
+
202
+ preview = _dataframe_to_preview(df, limit)
203
+ return ActionResult(
204
+ ok=True,
205
+ action_type="query",
206
+ data=preview,
207
+ result_json=json.dumps({"rows_returned": len(df), "columns": list(df.columns)}),
208
+ )
209
+ except Exception as e:
210
+ return ActionResult(
211
+ ok=False,
212
+ action_type="query",
213
+ error=f"Query execution failed: {str(e)}",
214
+ )
215
+
216
+ def _read_data(self, source_config: dict, params: dict) -> ActionResult:
217
+ """Read data from a source."""
218
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
219
+ columns = params.get("columns") # Optional list of columns to select
220
+
221
+ if self._is_database_source(source_config):
222
+ config = source_config.get("config", {})
223
+ table = params.get("table") or config.get("table") or config.get("default_table")
224
+ if not table:
225
+ return ActionResult(
226
+ ok=False,
227
+ action_type="read_data",
228
+ error="Table name is required for database sources",
229
+ )
230
+
231
+ schema = config.get("schema")
232
+ qualified_table = f"{schema}.{table}" if schema else table
233
+
234
+ cols = ", ".join(columns) if columns else "*"
235
+ sql = f"SELECT {cols} FROM {qualified_table} LIMIT {limit}"
236
+
237
+ try:
238
+ engine = self._get_database_engine(source_config)
239
+ with engine.connect() as conn:
240
+ df = pd.read_sql(sql, conn)
241
+
242
+ preview = _dataframe_to_preview(df, limit)
243
+ return ActionResult(
244
+ ok=True,
245
+ action_type="read_data",
246
+ data=preview,
247
+ result_json=json.dumps({"rows_returned": len(df), "columns": list(df.columns)}),
248
+ )
249
+ except Exception as e:
250
+ return ActionResult(
251
+ ok=False,
252
+ action_type="read_data",
253
+ error=f"Failed to read data: {str(e)}",
254
+ )
255
+
256
+ elif self._is_file_source(source_config):
257
+ return self._read_file_data(source_config, params)
258
+
259
+ else:
260
+ return ActionResult(
261
+ ok=False,
262
+ action_type="read_data",
263
+ error=f"Unsupported source type: {source_config.get('type')}",
264
+ )
265
+
266
+ def _read_file_data(self, source_config: dict, params: dict) -> ActionResult:
267
+ """Read data from a file source."""
268
+ import os
269
+
270
+ config = source_config.get("config", {})
271
+ source_type = source_config.get("type", "").lower()
272
+ path = config.get("path")
273
+
274
+ if not path:
275
+ return ActionResult(
276
+ ok=False,
277
+ action_type="read_data",
278
+ error="File path not configured",
279
+ )
280
+
281
+ if not os.path.exists(path):
282
+ return ActionResult(
283
+ ok=False,
284
+ action_type="read_data",
285
+ error=f"File not found: {path}",
286
+ )
287
+
288
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
289
+ columns = params.get("columns")
290
+
291
+ try:
292
+ if source_type in ("csv", "file") and path.endswith(".csv"):
293
+ usecols = columns if columns else None
294
+ df = pd.read_csv(path, nrows=limit, usecols=usecols, low_memory=False)
295
+ elif source_type == "excel" or path.endswith((".xlsx", ".xls")):
296
+ usecols = columns if columns else None
297
+ df = pd.read_excel(path, nrows=limit, usecols=usecols, engine="openpyxl")
298
+ elif source_type == "parquet" or path.endswith(".parquet"):
299
+ df = pd.read_parquet(path, columns=columns)
300
+ df = df.head(limit)
301
+ elif source_type == "json" or path.endswith(".json"):
302
+ df = pd.read_json(path)
303
+ if columns:
304
+ df = df[columns]
305
+ df = df.head(limit)
306
+ else:
307
+ return ActionResult(
308
+ ok=False,
309
+ action_type="read_data",
310
+ error=f"Unsupported file type: {source_type}",
311
+ )
312
+
313
+ preview = _dataframe_to_preview(df, limit)
314
+ return ActionResult(
315
+ ok=True,
316
+ action_type="read_data",
317
+ data=preview,
318
+ result_json=json.dumps({"rows_returned": len(df), "columns": list(df.columns)}),
319
+ )
320
+ except Exception as e:
321
+ return ActionResult(
322
+ ok=False,
323
+ action_type="read_data",
324
+ error=f"Failed to read file: {str(e)}",
325
+ )
326
+
327
+ def _filter_data(self, source_config: dict, params: dict) -> ActionResult:
328
+ """Filter data based on a condition."""
329
+ condition = params.get("condition")
330
+ if not condition:
331
+ return ActionResult(
332
+ ok=False,
333
+ action_type="filter",
334
+ error="Filter condition is required",
335
+ )
336
+
337
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
338
+
339
+ if self._is_database_source(source_config):
340
+ config = source_config.get("config", {})
341
+ table = params.get("table") or config.get("table") or config.get("default_table")
342
+ if not table:
343
+ return ActionResult(
344
+ ok=False,
345
+ action_type="filter",
346
+ error="Table name is required for database sources",
347
+ )
348
+
349
+ schema = config.get("schema")
350
+ qualified_table = f"{schema}.{table}" if schema else table
351
+ columns = params.get("columns")
352
+ cols = ", ".join(columns) if columns else "*"
353
+
354
+ sql = f"SELECT {cols} FROM {qualified_table} WHERE {condition} LIMIT {limit}"
355
+
356
+ try:
357
+ engine = self._get_database_engine(source_config)
358
+ with engine.connect() as conn:
359
+ df = pd.read_sql(sql, conn)
360
+
361
+ preview = _dataframe_to_preview(df, limit)
362
+ return ActionResult(
363
+ ok=True,
364
+ action_type="filter",
365
+ data=preview,
366
+ result_json=json.dumps({"rows_returned": len(df), "condition": condition}),
367
+ )
368
+ except Exception as e:
369
+ return ActionResult(
370
+ ok=False,
371
+ action_type="filter",
372
+ error=f"Filter failed: {str(e)}",
373
+ )
374
+
375
+ elif self._is_file_source(source_config):
376
+ # First read the data
377
+ read_result = self._read_file_data(source_config, {"limit": 10000}) # Read more for filtering
378
+ if not read_result.ok or not read_result.data:
379
+ return ActionResult(
380
+ ok=False,
381
+ action_type="filter",
382
+ error=read_result.error or "Failed to read data for filtering",
383
+ )
384
+
385
+ # Reconstruct dataframe and filter
386
+ try:
387
+ df = pd.DataFrame(
388
+ [row for row in read_result.data.rows],
389
+ columns=read_result.data.headers
390
+ )
391
+ # Use query for filtering
392
+ df_filtered = df.query(condition)
393
+ df_filtered = df_filtered.head(limit)
394
+
395
+ preview = _dataframe_to_preview(df_filtered, limit)
396
+ return ActionResult(
397
+ ok=True,
398
+ action_type="filter",
399
+ data=preview,
400
+ result_json=json.dumps({"rows_returned": len(df_filtered), "condition": condition}),
401
+ )
402
+ except Exception as e:
403
+ return ActionResult(
404
+ ok=False,
405
+ action_type="filter",
406
+ error=f"Filter failed: {str(e)}",
407
+ )
408
+
409
+ else:
410
+ return ActionResult(
411
+ ok=False,
412
+ action_type="filter",
413
+ error=f"Unsupported source type: {source_config.get('type')}",
414
+ )
415
+
416
+ def _aggregate_data(self, source_config: dict, params: dict) -> ActionResult:
417
+ """Perform aggregation on data."""
418
+ group_by = params.get("group_by") # Column(s) to group by
419
+ agg_func = params.get("agg_func", "count") # Aggregation function
420
+ agg_column = params.get("agg_column") # Column to aggregate
421
+
422
+ if not group_by:
423
+ return ActionResult(
424
+ ok=False,
425
+ action_type="aggregate",
426
+ error="group_by column is required for aggregation",
427
+ )
428
+
429
+ limit = params.get("limit", DEFAULT_ROW_LIMIT)
430
+
431
+ if self._is_database_source(source_config):
432
+ config = source_config.get("config", {})
433
+ table = params.get("table") or config.get("table") or config.get("default_table")
434
+ if not table:
435
+ return ActionResult(
436
+ ok=False,
437
+ action_type="aggregate",
438
+ error="Table name is required for database sources",
439
+ )
440
+
441
+ schema = config.get("schema")
442
+ qualified_table = f"{schema}.{table}" if schema else table
443
+
444
+ # Build SQL aggregation
445
+ group_cols = group_by if isinstance(group_by, list) else [group_by]
446
+ group_str = ", ".join(group_cols)
447
+
448
+ if agg_func.upper() == "COUNT":
449
+ agg_expr = "COUNT(*) as count"
450
+ elif agg_column:
451
+ agg_expr = f"{agg_func.upper()}({agg_column}) as {agg_func.lower()}_{agg_column}"
452
+ else:
453
+ agg_expr = "COUNT(*) as count"
454
+
455
+ sql = f"SELECT {group_str}, {agg_expr} FROM {qualified_table} GROUP BY {group_str} LIMIT {limit}"
456
+
457
+ try:
458
+ engine = self._get_database_engine(source_config)
459
+ with engine.connect() as conn:
460
+ df = pd.read_sql(sql, conn)
461
+
462
+ preview = _dataframe_to_preview(df, limit)
463
+ return ActionResult(
464
+ ok=True,
465
+ action_type="aggregate",
466
+ data=preview,
467
+ result_json=json.dumps({
468
+ "groups": len(df),
469
+ "group_by": group_by,
470
+ "agg_func": agg_func,
471
+ }),
472
+ )
473
+ except Exception as e:
474
+ return ActionResult(
475
+ ok=False,
476
+ action_type="aggregate",
477
+ error=f"Aggregation failed: {str(e)}",
478
+ )
479
+
480
+ else:
481
+ return ActionResult(
482
+ ok=False,
483
+ action_type="aggregate",
484
+ error="Aggregation currently only supported for database sources",
485
+ )
486
+
487
+ def _describe_source(self, source_config: dict, params: dict) -> ActionResult:
488
+ """Get metadata about a source."""
489
+ source_type = source_config.get("type", "").lower()
490
+ config = source_config.get("config", {})
491
+
492
+ metadata = {
493
+ "source_type": source_type,
494
+ "name": source_config.get("name", "unknown"),
495
+ }
496
+
497
+ if self._is_database_source(source_config):
498
+ try:
499
+ from sqlalchemy import inspect
500
+
501
+ engine = self._get_database_engine(source_config)
502
+ inspector = inspect(engine)
503
+
504
+ # Get schema info
505
+ schema = config.get("schema")
506
+ tables = inspector.get_table_names(schema=schema)
507
+
508
+ metadata["tables"] = tables
509
+ metadata["schema"] = schema
510
+
511
+ # Get column info for specified table or first table
512
+ table = params.get("table") or config.get("table") or (tables[0] if tables else None)
513
+ if table:
514
+ columns = inspector.get_columns(table, schema=schema)
515
+ metadata["table"] = table
516
+ metadata["columns"] = [
517
+ {"name": col["name"], "type": str(col["type"])}
518
+ for col in columns
519
+ ]
520
+
521
+ # Get row count
522
+ with engine.connect() as conn:
523
+ from sqlalchemy import text
524
+ qualified_table = f"{schema}.{table}" if schema else table
525
+ result = conn.execute(text(f"SELECT COUNT(*) FROM {qualified_table}"))
526
+ metadata["row_count"] = result.scalar()
527
+
528
+ return ActionResult(
529
+ ok=True,
530
+ action_type="describe",
531
+ result_json=json.dumps(metadata),
532
+ )
533
+ except Exception as e:
534
+ return ActionResult(
535
+ ok=False,
536
+ action_type="describe",
537
+ error=f"Failed to describe database source: {str(e)}",
538
+ )
539
+
540
+ elif self._is_file_source(source_config):
541
+ import os
542
+
543
+ path = config.get("path")
544
+ if not path:
545
+ return ActionResult(
546
+ ok=False,
547
+ action_type="describe",
548
+ error="File path not configured",
549
+ )
550
+
551
+ if not os.path.exists(path):
552
+ return ActionResult(
553
+ ok=False,
554
+ action_type="describe",
555
+ error=f"File not found: {path}",
556
+ )
557
+
558
+ metadata["path"] = path
559
+ metadata["file_size_bytes"] = os.path.getsize(path)
560
+
561
+ try:
562
+ # Read a small sample to get column info
563
+ if source_type in ("csv", "file") and path.endswith(".csv"):
564
+ df = pd.read_csv(path, nrows=5, low_memory=False)
565
+ elif source_type == "excel" or path.endswith((".xlsx", ".xls")):
566
+ df = pd.read_excel(path, nrows=5, engine="openpyxl")
567
+ elif source_type == "parquet" or path.endswith(".parquet"):
568
+ df = pd.read_parquet(path)
569
+ df = df.head(5)
570
+ elif source_type == "json" or path.endswith(".json"):
571
+ df = pd.read_json(path)
572
+ df = df.head(5)
573
+ else:
574
+ df = None
575
+
576
+ if df is not None:
577
+ metadata["columns"] = [
578
+ {"name": col, "type": str(df[col].dtype)}
579
+ for col in df.columns
580
+ ]
581
+ # Try to get total row count
582
+ if source_type in ("csv", "file") and path.endswith(".csv"):
583
+ with open(path, 'r', encoding='utf-8', errors='ignore') as f:
584
+ metadata["row_count"] = sum(1 for _ in f) - 1 # Exclude header
585
+ elif source_type == "parquet" or path.endswith(".parquet"):
586
+ metadata["row_count"] = len(pd.read_parquet(path))
587
+
588
+ return ActionResult(
589
+ ok=True,
590
+ action_type="describe",
591
+ result_json=json.dumps(metadata),
592
+ )
593
+ except Exception as e:
594
+ return ActionResult(
595
+ ok=False,
596
+ action_type="describe",
597
+ error=f"Failed to describe file source: {str(e)}",
598
+ )
599
+
600
+ else:
601
+ return ActionResult(
602
+ ok=False,
603
+ action_type="describe",
604
+ error=f"Describe not supported for source type: {source_type}",
605
+ )
606
+
607
+ def _sample_data(self, source_config: dict, params: dict) -> ActionResult:
608
+ """Get a random sample of data."""
609
+ n = params.get("n", 10) # Number of samples
610
+
611
+ if self._is_database_source(source_config):
612
+ config = source_config.get("config", {})
613
+ table = params.get("table") or config.get("table") or config.get("default_table")
614
+ if not table:
615
+ return ActionResult(
616
+ ok=False,
617
+ action_type="sample",
618
+ error="Table name is required",
619
+ )
620
+
621
+ schema = config.get("schema")
622
+ qualified_table = f"{schema}.{table}" if schema else table
623
+ source_type = source_config.get("type", "").lower()
624
+
625
+ # Different databases have different random sampling syntax
626
+ if source_type == "postgresql":
627
+ sql = f"SELECT * FROM {qualified_table} ORDER BY RANDOM() LIMIT {n}"
628
+ elif source_type == "mysql":
629
+ sql = f"SELECT * FROM {qualified_table} ORDER BY RAND() LIMIT {n}"
630
+ else:
631
+ sql = f"SELECT * FROM {qualified_table} LIMIT {n}" # Fallback
632
+
633
+ try:
634
+ engine = self._get_database_engine(source_config)
635
+ with engine.connect() as conn:
636
+ df = pd.read_sql(sql, conn)
637
+
638
+ preview = _dataframe_to_preview(df, n)
639
+ return ActionResult(
640
+ ok=True,
641
+ action_type="sample",
642
+ data=preview,
643
+ result_json=json.dumps({"samples": len(df)}),
644
+ )
645
+ except Exception as e:
646
+ return ActionResult(
647
+ ok=False,
648
+ action_type="sample",
649
+ error=f"Sampling failed: {str(e)}",
650
+ )
651
+
652
+ elif self._is_file_source(source_config):
653
+ # Read all data and sample
654
+ read_result = self._read_file_data(source_config, {"limit": 10000})
655
+ if not read_result.ok or not read_result.data:
656
+ return ActionResult(
657
+ ok=False,
658
+ action_type="sample",
659
+ error=read_result.error or "Failed to read data for sampling",
660
+ )
661
+
662
+ try:
663
+ df = pd.DataFrame(
664
+ [row for row in read_result.data.rows],
665
+ columns=read_result.data.headers
666
+ )
667
+ df_sample = df.sample(n=min(n, len(df)))
668
+
669
+ preview = _dataframe_to_preview(df_sample, n)
670
+ return ActionResult(
671
+ ok=True,
672
+ action_type="sample",
673
+ data=preview,
674
+ result_json=json.dumps({"samples": len(df_sample)}),
675
+ )
676
+ except Exception as e:
677
+ return ActionResult(
678
+ ok=False,
679
+ action_type="sample",
680
+ error=f"Sampling failed: {str(e)}",
681
+ )
682
+
683
+ else:
684
+ return ActionResult(
685
+ ok=False,
686
+ action_type="sample",
687
+ error=f"Sampling not supported for source type: {source_config.get('type')}",
688
+ )
689
+
690
+ def _count_rows(self, source_config: dict, params: dict) -> ActionResult:
691
+ """Count rows in a source."""
692
+ if self._is_database_source(source_config):
693
+ config = source_config.get("config", {})
694
+ table = params.get("table") or config.get("table") or config.get("default_table")
695
+ condition = params.get("condition")
696
+
697
+ if not table:
698
+ return ActionResult(
699
+ ok=False,
700
+ action_type="count",
701
+ error="Table name is required",
702
+ )
703
+
704
+ schema = config.get("schema")
705
+ qualified_table = f"{schema}.{table}" if schema else table
706
+
707
+ sql = f"SELECT COUNT(*) as count FROM {qualified_table}"
708
+ if condition:
709
+ sql += f" WHERE {condition}"
710
+
711
+ try:
712
+ engine = self._get_database_engine(source_config)
713
+ with engine.connect() as conn:
714
+ from sqlalchemy import text
715
+ result = conn.execute(text(sql))
716
+ count = result.scalar()
717
+
718
+ return ActionResult(
719
+ ok=True,
720
+ action_type="count",
721
+ result_json=json.dumps({"count": count, "table": table}),
722
+ )
723
+ except Exception as e:
724
+ return ActionResult(
725
+ ok=False,
726
+ action_type="count",
727
+ error=f"Count failed: {str(e)}",
728
+ )
729
+
730
+ else:
731
+ # Use describe for file sources
732
+ describe_result = self._describe_source(source_config, params)
733
+ if describe_result.ok and describe_result.result_json:
734
+ metadata = json.loads(describe_result.result_json)
735
+ if "row_count" in metadata:
736
+ return ActionResult(
737
+ ok=True,
738
+ action_type="count",
739
+ result_json=json.dumps({"count": metadata["row_count"]}),
740
+ )
741
+
742
+ return ActionResult(
743
+ ok=False,
744
+ action_type="count",
745
+ error="Could not determine row count",
746
+ )
747
+
748
+ def _get_distinct(self, source_config: dict, params: dict) -> ActionResult:
749
+ """Get distinct values from a column."""
750
+ column = params.get("column")
751
+ if not column:
752
+ return ActionResult(
753
+ ok=False,
754
+ action_type="distinct",
755
+ error="Column name is required for distinct action",
756
+ )
757
+
758
+ limit = params.get("limit", 100)
759
+
760
+ if self._is_database_source(source_config):
761
+ config = source_config.get("config", {})
762
+ table = params.get("table") or config.get("table") or config.get("default_table")
763
+
764
+ if not table:
765
+ return ActionResult(
766
+ ok=False,
767
+ action_type="distinct",
768
+ error="Table name is required",
769
+ )
770
+
771
+ schema = config.get("schema")
772
+ qualified_table = f"{schema}.{table}" if schema else table
773
+
774
+ sql = f"SELECT DISTINCT {column} FROM {qualified_table} LIMIT {limit}"
775
+
776
+ try:
777
+ engine = self._get_database_engine(source_config)
778
+ with engine.connect() as conn:
779
+ df = pd.read_sql(sql, conn)
780
+
781
+ values = df[column].tolist()
782
+ return ActionResult(
783
+ ok=True,
784
+ action_type="distinct",
785
+ result_json=json.dumps({
786
+ "column": column,
787
+ "distinct_count": len(values),
788
+ "values": values[:limit],
789
+ }),
790
+ )
791
+ except Exception as e:
792
+ return ActionResult(
793
+ ok=False,
794
+ action_type="distinct",
795
+ error=f"Distinct failed: {str(e)}",
796
+ )
797
+
798
+ else:
799
+ return ActionResult(
800
+ ok=False,
801
+ action_type="distinct",
802
+ error="Distinct currently only supported for database sources",
803
+ )
804
+
805
+ def _get_head(self, source_config: dict, params: dict) -> ActionResult:
806
+ """Get first N rows from a source."""
807
+ n = params.get("n", 10)
808
+ params["limit"] = n
809
+ return self._read_data(source_config, params)
810
+
811
+ def _get_tail(self, source_config: dict, params: dict) -> ActionResult:
812
+ """Get last N rows from a source."""
813
+ n = params.get("n", 10)
814
+
815
+ if self._is_database_source(source_config):
816
+ config = source_config.get("config", {})
817
+ table = params.get("table") or config.get("table") or config.get("default_table")
818
+
819
+ if not table:
820
+ return ActionResult(
821
+ ok=False,
822
+ action_type="tail",
823
+ error="Table name is required",
824
+ )
825
+
826
+ schema = config.get("schema")
827
+ qualified_table = f"{schema}.{table}" if schema else table
828
+
829
+ # This requires knowing the order - use a subquery with DESC ordering
830
+ sql = f"""
831
+ SELECT * FROM (
832
+ SELECT * FROM {qualified_table} ORDER BY 1 DESC LIMIT {n}
833
+ ) sub ORDER BY 1 ASC
834
+ """
835
+
836
+ try:
837
+ engine = self._get_database_engine(source_config)
838
+ with engine.connect() as conn:
839
+ df = pd.read_sql(sql, conn)
840
+
841
+ preview = _dataframe_to_preview(df, n)
842
+ return ActionResult(
843
+ ok=True,
844
+ action_type="tail",
845
+ data=preview,
846
+ result_json=json.dumps({"rows_returned": len(df)}),
847
+ )
848
+ except Exception as e:
849
+ return ActionResult(
850
+ ok=False,
851
+ action_type="tail",
852
+ error=f"Tail failed: {str(e)}",
853
+ )
854
+
855
+ else:
856
+ # For files, read all and take tail
857
+ read_result = self._read_file_data(source_config, {"limit": 10000})
858
+ if not read_result.ok or not read_result.data:
859
+ return ActionResult(
860
+ ok=False,
861
+ action_type="tail",
862
+ error=read_result.error or "Failed to read data",
863
+ )
864
+
865
+ try:
866
+ df = pd.DataFrame(
867
+ [row for row in read_result.data.rows],
868
+ columns=read_result.data.headers
869
+ )
870
+ df_tail = df.tail(n)
871
+
872
+ preview = _dataframe_to_preview(df_tail, n)
873
+ return ActionResult(
874
+ ok=True,
875
+ action_type="tail",
876
+ data=preview,
877
+ result_json=json.dumps({"rows_returned": len(df_tail)}),
878
+ )
879
+ except Exception as e:
880
+ return ActionResult(
881
+ ok=False,
882
+ action_type="tail",
883
+ error=f"Tail failed: {str(e)}",
884
+ )
885
+
886
+
887
+ # Singleton instance
888
+ _executor: Optional[ActionExecutor] = None
889
+
890
+
891
+ def get_action_executor() -> ActionExecutor:
892
+ """Get the singleton ActionExecutor instance."""
893
+ global _executor
894
+ if _executor is None:
895
+ _executor = ActionExecutor()
896
+ return _executor