qalita 2.6.3__py3-none-any.whl → 2.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qalita/__main__.py +29 -19
- qalita/_frontend/.next/BUILD_ID +1 -1
- qalita/_frontend/.next/build-manifest.json +7 -7
- qalita/_frontend/.next/prerender-manifest.json +3 -3
- qalita/_frontend/.next/server/app/_global-error/page/build-manifest.json +5 -5
- qalita/_frontend/.next/server/app/_global-error/page.js +1 -1
- qalita/_frontend/.next/server/app/_global-error/page.js.nft.json +1 -1
- qalita/_frontend/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/_global-error.html +2 -2
- qalita/_frontend/.next/server/app/_global-error.rsc +7 -7
- qalita/_frontend/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/_global-error.segments/_full.segment.rsc +7 -7
- qalita/_frontend/.next/server/app/_global-error.segments/_head.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/_global-error.segments/_index.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_not-found/page/build-manifest.json +5 -5
- qalita/_frontend/.next/server/app/_not-found/page.js +1 -1
- qalita/_frontend/.next/server/app/_not-found/page.js.nft.json +1 -1
- qalita/_frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/_not-found.html +1 -1
- qalita/_frontend/.next/server/app/_not-found.rsc +14 -12
- qalita/_frontend/.next/server/app/_not-found.segments/_full.segment.rsc +14 -12
- qalita/_frontend/.next/server/app/_not-found.segments/_head.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/_not-found.segments/_index.segment.rsc +6 -4
- qalita/_frontend/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/_not-found.segments/_not-found.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/_not-found.segments/_tree.segment.rsc +3 -2
- qalita/_frontend/.next/server/app/page/build-manifest.json +5 -5
- qalita/_frontend/.next/server/app/page.js +1 -1
- qalita/_frontend/.next/server/app/page.js.nft.json +1 -1
- qalita/_frontend/.next/server/app/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/sources/add/page/build-manifest.json +5 -5
- qalita/_frontend/.next/server/app/sources/add/page.js +1 -1
- qalita/_frontend/.next/server/app/sources/add/page.js.nft.json +1 -1
- qalita/_frontend/.next/server/app/sources/add/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/sources/add.html +1 -1
- qalita/_frontend/.next/server/app/sources/add.rsc +18 -16
- qalita/_frontend/.next/server/app/sources/add.segments/_full.segment.rsc +18 -16
- qalita/_frontend/.next/server/app/sources/add.segments/_head.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/sources/add.segments/_index.segment.rsc +6 -4
- qalita/_frontend/.next/server/app/sources/add.segments/_tree.segment.rsc +3 -2
- qalita/_frontend/.next/server/app/sources/add.segments/sources/add/__PAGE__.segment.rsc +4 -4
- qalita/_frontend/.next/server/app/sources/add.segments/sources/add.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/sources/add.segments/sources.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/sources/edit/[id]/page/build-manifest.json +5 -5
- qalita/_frontend/.next/server/app/sources/edit/[id]/page.js +1 -1
- qalita/_frontend/.next/server/app/sources/edit/[id]/page.js.nft.json +1 -1
- qalita/_frontend/.next/server/app/sources/edit/[id]/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/sources/page/build-manifest.json +5 -5
- qalita/_frontend/.next/server/app/sources/page.js +1 -1
- qalita/_frontend/.next/server/app/sources/page.js.nft.json +1 -1
- qalita/_frontend/.next/server/app/sources/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/sources.html +1 -1
- qalita/_frontend/.next/server/app/sources.rsc +18 -16
- qalita/_frontend/.next/server/app/sources.segments/_full.segment.rsc +18 -16
- qalita/_frontend/.next/server/app/sources.segments/_head.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/sources.segments/_index.segment.rsc +6 -4
- qalita/_frontend/.next/server/app/sources.segments/_tree.segment.rsc +3 -2
- qalita/_frontend/.next/server/app/sources.segments/sources/__PAGE__.segment.rsc +4 -4
- qalita/_frontend/.next/server/app/sources.segments/sources.segment.rsc +3 -3
- qalita/_frontend/.next/server/chunks/[root-of-the-server]__e868c9e1._.js +1 -1
- qalita/_frontend/.next/server/chunks/[root-of-the-server]__ebaae723._.js +1 -1
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__17f2c9b6._.js +1 -1
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__1d5b5394._.js +3 -0
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__21824174._.js +3 -0
- qalita/_frontend/.next/server/chunks/ssr/{[root-of-the-server]__b9356576._.js → [root-of-the-server]__336e4c46._.js} +2 -2
- qalita/_frontend/.next/server/chunks/ssr/{[root-of-the-server]__c507bbfe._.js → [root-of-the-server]__7876511a._.js} +2 -2
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__be91267c._.js +3 -0
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__d15765f1._.js +3 -0
- qalita/_frontend/.next/server/chunks/ssr/{_3b4a232c._.js → _404f6e81._.js} +4 -4
- qalita/_frontend/.next/server/chunks/ssr/{_cd257a0c._.js → _6a67f6f0._.js} +4 -4
- qalita/_frontend/.next/server/chunks/ssr/_cafb65ac._.js +3 -0
- qalita/_frontend/.next/server/chunks/ssr/_cb7b44d6._.js +1 -1
- qalita/_frontend/.next/server/chunks/ssr/_d44c43ed._.js +3 -0
- qalita/_frontend/.next/server/chunks/ssr/components_DashboardContent_tsx_c3635665._.js +1 -1
- qalita/_frontend/.next/server/chunks/ssr/node_modules_next_dist_server_route-modules_app-page_vendored_a443a6bf._.js +3 -0
- qalita/_frontend/.next/server/middleware-build-manifest.js +5 -5
- qalita/_frontend/.next/server/pages/404.html +1 -1
- qalita/_frontend/.next/server/pages/500.html +2 -2
- qalita/_frontend/.next/server/server-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/server-reference-manifest.json +1 -1
- qalita/_frontend/.next/static/chunks/02a64570f0a14789.js +1 -0
- qalita/_frontend/.next/static/chunks/{7340adf74ff47ec0.js → 0b082245f106d665.js} +1 -1
- qalita/_frontend/.next/static/chunks/27b3ba70c7ef50a8.js +1 -0
- qalita/_frontend/.next/static/chunks/517e9b74d1a3c0ce.js +1 -0
- qalita/_frontend/.next/static/chunks/58689c96b0676c41.js +1 -0
- qalita/_frontend/.next/static/chunks/{236f7e5abd6f09ff.js → 89ba62a8ba9b79ce.js} +2 -2
- qalita/_frontend/.next/static/chunks/acc5da18ff20daa1.js +3 -0
- qalita/_frontend/.next/static/chunks/bdc8a8e7721f5675.js +2 -0
- qalita/_frontend/.next/static/chunks/e0df86cbf44bbf9f.js +1 -0
- qalita/_frontend/.next/static/chunks/e4c3a252774ab7fd.css +1 -0
- qalita/_frontend/.next/static/chunks/e6ce59ba40b863f2.js +1 -0
- qalita/_frontend/.next/static/chunks/{30ea11065999f7ac.js → ec4b1f1e3cd3ae43.js} +1 -1
- qalita/_frontend/.next/static/chunks/{turbopack-25186fc8e1264445.js → turbopack-d21156d03715fafa.js} +1 -1
- qalita/_frontend/node_modules/@swc/helpers/package.json +225 -2
- qalita/_frontend/node_modules/next/node_modules/@swc/helpers/package.json +471 -0
- qalita/_frontend/package.json +12 -1
- qalita/commands/pack.py +61 -8
- qalita/commands/worker.py +46 -20
- qalita/commands/worker_grpc.py +941 -0
- qalita/grpc/__init__.py +8 -0
- qalita/grpc/client.py +693 -0
- qalita/grpc/protos/__init__.py +4 -0
- qalita/grpc/protos/qalita.proto +391 -0
- qalita/grpc/protos/qalita_pb2.py +112 -0
- qalita/grpc/protos/qalita_pb2_grpc.py +588 -0
- qalita/internal/data_preview.py +565 -0
- qalita/internal/request.py +4 -3
- qalita/internal/utils.py +1 -1
- qalita/web/app.py +6 -2
- qalita/web/blueprints/dashboard.py +12 -44
- qalita/web/blueprints/helpers.py +119 -46
- qalita/web/blueprints/sources.py +5 -99
- qalita/web/blueprints/workers.py +6 -6
- {qalita-2.6.3.dist-info → qalita-2.8.1.dist-info}/METADATA +7 -1
- {qalita-2.6.3.dist-info → qalita-2.8.1.dist-info}/RECORD +124 -111
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__345b6cae._.js +0 -3
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__7213ba1d._.js +0 -3
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__9130e1f5._.js +0 -3
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__e2a7729d._.js +0 -3
- qalita/_frontend/.next/server/chunks/ssr/app_layout_tsx_271801d7._.js +0 -3
- qalita/_frontend/.next/static/chunks/0f84739db4a8acc7.js +0 -1
- qalita/_frontend/.next/static/chunks/1107bdca1eff6d34.css +0 -1
- qalita/_frontend/.next/static/chunks/4b0c5de8d4cc313f.js +0 -1
- qalita/_frontend/.next/static/chunks/4dd28bc3f722184a.js +0 -2
- qalita/_frontend/.next/static/chunks/711d597b816a80c1.js +0 -1
- qalita/_frontend/.next/static/chunks/bb29c2be4df20a40.js +0 -1
- qalita/_frontend/.next/static/chunks/ecf559101be0ae12.js +0 -3
- /qalita/_frontend/.next/static/{N9MqNrf23ZZkbbSW2aXkt → 8__jz-coq8Cacgz6IFGzi}/_buildManifest.js +0 -0
- /qalita/_frontend/.next/static/{N9MqNrf23ZZkbbSW2aXkt → 8__jz-coq8Cacgz6IFGzi}/_clientMiddlewareManifest.json +0 -0
- /qalita/_frontend/.next/static/{N9MqNrf23ZZkbbSW2aXkt → 8__jz-coq8Cacgz6IFGzi}/_ssgManifest.js +0 -0
- /qalita/_frontend/node_modules/{@swc → next/node_modules/@swc}/helpers/cjs/_interop_require_default.cjs +0 -0
- /qalita/_frontend/node_modules/{@swc → next/node_modules/@swc}/helpers/cjs/_interop_require_wildcard.cjs +0 -0
- {qalita-2.6.3.dist-info → qalita-2.8.1.dist-info}/WHEEL +0 -0
- {qalita-2.6.3.dist-info → qalita-2.8.1.dist-info}/entry_points.txt +0 -0
- {qalita-2.6.3.dist-info → qalita-2.8.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
"""
|
|
2
|
+
# QALITA (c) COPYRIGHT 2025 - ALL RIGHTS RESERVED -
|
|
3
|
+
Data Preview module for Studio integration.
|
|
4
|
+
|
|
5
|
+
This module provides data preview functionality for various source types,
|
|
6
|
+
used by the gRPC worker to respond to DataPreviewRequest from the platform.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import json
|
|
11
|
+
import mimetypes
|
|
12
|
+
import os
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from qalita.internal.utils import logger
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class DataPreviewResult:
|
|
23
|
+
"""Result of a data preview operation."""
|
|
24
|
+
|
|
25
|
+
ok: bool
|
|
26
|
+
data_type: str # table, image, pdf, text, json, error
|
|
27
|
+
error: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
# For table data
|
|
30
|
+
headers: list[str] = field(default_factory=list)
|
|
31
|
+
rows: list[list[str]] = field(default_factory=list)
|
|
32
|
+
total_rows: Optional[int] = None
|
|
33
|
+
|
|
34
|
+
# For text/json content
|
|
35
|
+
content: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
# For binary content (image, pdf)
|
|
38
|
+
binary_base64: Optional[str] = None
|
|
39
|
+
mime_type: Optional[str] = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Maximum size for binary content (5MB base64 encoded)
|
|
43
|
+
MAX_BINARY_SIZE = 5 * 1024 * 1024
|
|
44
|
+
# Maximum content size for text/json (1MB)
|
|
45
|
+
MAX_TEXT_SIZE = 1 * 1024 * 1024
|
|
46
|
+
# Default row limit
|
|
47
|
+
DEFAULT_ROW_LIMIT = 1000
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _error_result(message: str) -> DataPreviewResult:
|
|
51
|
+
"""Create an error result."""
|
|
52
|
+
return DataPreviewResult(ok=False, data_type="error", error=message)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _dataframe_to_preview(
|
|
56
|
+
df: pd.DataFrame,
|
|
57
|
+
limit: int = DEFAULT_ROW_LIMIT,
|
|
58
|
+
total_rows: Optional[int] = None,
|
|
59
|
+
) -> DataPreviewResult:
|
|
60
|
+
"""Convert a pandas DataFrame to a preview result."""
|
|
61
|
+
try:
|
|
62
|
+
# Get headers
|
|
63
|
+
headers = [str(col) for col in df.columns.tolist()]
|
|
64
|
+
|
|
65
|
+
# Limit rows
|
|
66
|
+
df_limited = df.head(limit)
|
|
67
|
+
|
|
68
|
+
# Convert all values to strings for transport
|
|
69
|
+
rows = []
|
|
70
|
+
for _, row in df_limited.iterrows():
|
|
71
|
+
row_values = []
|
|
72
|
+
for val in row:
|
|
73
|
+
if pd.isna(val):
|
|
74
|
+
row_values.append("")
|
|
75
|
+
else:
|
|
76
|
+
row_values.append(str(val))
|
|
77
|
+
rows.append(row_values)
|
|
78
|
+
|
|
79
|
+
return DataPreviewResult(
|
|
80
|
+
ok=True,
|
|
81
|
+
data_type="table",
|
|
82
|
+
headers=headers,
|
|
83
|
+
rows=rows,
|
|
84
|
+
total_rows=total_rows if total_rows is not None else len(df),
|
|
85
|
+
)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error(f"Error converting DataFrame to preview: {e}")
|
|
88
|
+
return _error_result(f"Failed to convert data: {str(e)}")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def preview_csv(
|
|
92
|
+
file_path: str,
|
|
93
|
+
limit: int = DEFAULT_ROW_LIMIT,
|
|
94
|
+
encoding: str = "utf-8",
|
|
95
|
+
) -> DataPreviewResult:
|
|
96
|
+
"""Preview a CSV file."""
|
|
97
|
+
try:
|
|
98
|
+
if not os.path.exists(file_path):
|
|
99
|
+
return _error_result(f"File not found: {file_path}")
|
|
100
|
+
|
|
101
|
+
# Count total rows (without loading all data)
|
|
102
|
+
total_rows = sum(1 for _ in open(file_path, encoding=encoding, errors="ignore")) - 1
|
|
103
|
+
total_rows = max(0, total_rows)
|
|
104
|
+
|
|
105
|
+
# Read only the needed rows
|
|
106
|
+
df = pd.read_csv(
|
|
107
|
+
file_path,
|
|
108
|
+
nrows=limit,
|
|
109
|
+
low_memory=False,
|
|
110
|
+
encoding=encoding,
|
|
111
|
+
on_bad_lines="warn",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return _dataframe_to_preview(df, limit, total_rows)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.error(f"Error previewing CSV file {file_path}: {e}")
|
|
117
|
+
return _error_result(f"Failed to read CSV: {str(e)}")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def preview_excel(
|
|
121
|
+
file_path: str,
|
|
122
|
+
limit: int = DEFAULT_ROW_LIMIT,
|
|
123
|
+
sheet_name: Optional[str] = None,
|
|
124
|
+
) -> DataPreviewResult:
|
|
125
|
+
"""Preview an Excel file."""
|
|
126
|
+
try:
|
|
127
|
+
if not os.path.exists(file_path):
|
|
128
|
+
return _error_result(f"File not found: {file_path}")
|
|
129
|
+
|
|
130
|
+
# Read the Excel file
|
|
131
|
+
df = pd.read_excel(
|
|
132
|
+
file_path,
|
|
133
|
+
sheet_name=sheet_name or 0,
|
|
134
|
+
nrows=limit,
|
|
135
|
+
engine="openpyxl",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Get total rows - need to read without limit for count
|
|
139
|
+
df_full = pd.read_excel(
|
|
140
|
+
file_path,
|
|
141
|
+
sheet_name=sheet_name or 0,
|
|
142
|
+
engine="openpyxl",
|
|
143
|
+
usecols=[0], # Only read first column for counting
|
|
144
|
+
)
|
|
145
|
+
total_rows = len(df_full)
|
|
146
|
+
|
|
147
|
+
return _dataframe_to_preview(df, limit, total_rows)
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.error(f"Error previewing Excel file {file_path}: {e}")
|
|
150
|
+
return _error_result(f"Failed to read Excel: {str(e)}")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def preview_parquet(
|
|
154
|
+
file_path: str,
|
|
155
|
+
limit: int = DEFAULT_ROW_LIMIT,
|
|
156
|
+
) -> DataPreviewResult:
|
|
157
|
+
"""Preview a Parquet file."""
|
|
158
|
+
try:
|
|
159
|
+
if not os.path.exists(file_path):
|
|
160
|
+
return _error_result(f"File not found: {file_path}")
|
|
161
|
+
|
|
162
|
+
# Read parquet with row limit
|
|
163
|
+
df = pd.read_parquet(file_path)
|
|
164
|
+
total_rows = len(df)
|
|
165
|
+
|
|
166
|
+
return _dataframe_to_preview(df, limit, total_rows)
|
|
167
|
+
except Exception as e:
|
|
168
|
+
logger.error(f"Error previewing Parquet file {file_path}: {e}")
|
|
169
|
+
return _error_result(f"Failed to read Parquet: {str(e)}")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def preview_json(file_path: str) -> DataPreviewResult:
|
|
173
|
+
"""Preview a JSON file."""
|
|
174
|
+
try:
|
|
175
|
+
if not os.path.exists(file_path):
|
|
176
|
+
return _error_result(f"File not found: {file_path}")
|
|
177
|
+
|
|
178
|
+
file_size = os.path.getsize(file_path)
|
|
179
|
+
if file_size > MAX_TEXT_SIZE:
|
|
180
|
+
# Read first part and indicate truncation
|
|
181
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
182
|
+
content = f.read(MAX_TEXT_SIZE)
|
|
183
|
+
content += "\n\n... [truncated - file too large] ..."
|
|
184
|
+
else:
|
|
185
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
186
|
+
content = f.read()
|
|
187
|
+
|
|
188
|
+
# Validate JSON and pretty print
|
|
189
|
+
try:
|
|
190
|
+
parsed = json.loads(content.split("... [truncated")[0] if "... [truncated" in content else content)
|
|
191
|
+
content = json.dumps(parsed, indent=2, ensure_ascii=False)
|
|
192
|
+
except json.JSONDecodeError:
|
|
193
|
+
pass # Keep raw content if not valid JSON
|
|
194
|
+
|
|
195
|
+
return DataPreviewResult(
|
|
196
|
+
ok=True,
|
|
197
|
+
data_type="json",
|
|
198
|
+
content=content,
|
|
199
|
+
)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"Error previewing JSON file {file_path}: {e}")
|
|
202
|
+
return _error_result(f"Failed to read JSON: {str(e)}")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def preview_text(file_path: str) -> DataPreviewResult:
|
|
206
|
+
"""Preview a text file."""
|
|
207
|
+
try:
|
|
208
|
+
if not os.path.exists(file_path):
|
|
209
|
+
return _error_result(f"File not found: {file_path}")
|
|
210
|
+
|
|
211
|
+
file_size = os.path.getsize(file_path)
|
|
212
|
+
if file_size > MAX_TEXT_SIZE:
|
|
213
|
+
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
214
|
+
content = f.read(MAX_TEXT_SIZE)
|
|
215
|
+
content += "\n\n... [truncated - file too large] ..."
|
|
216
|
+
else:
|
|
217
|
+
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
218
|
+
content = f.read()
|
|
219
|
+
|
|
220
|
+
return DataPreviewResult(
|
|
221
|
+
ok=True,
|
|
222
|
+
data_type="text",
|
|
223
|
+
content=content,
|
|
224
|
+
)
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logger.error(f"Error previewing text file {file_path}: {e}")
|
|
227
|
+
return _error_result(f"Failed to read text file: {str(e)}")
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def preview_image(file_path: str) -> DataPreviewResult:
|
|
231
|
+
"""Preview an image file (PNG, JPG, GIF, WebP)."""
|
|
232
|
+
try:
|
|
233
|
+
if not os.path.exists(file_path):
|
|
234
|
+
return _error_result(f"File not found: {file_path}")
|
|
235
|
+
|
|
236
|
+
file_size = os.path.getsize(file_path)
|
|
237
|
+
if file_size > MAX_BINARY_SIZE:
|
|
238
|
+
return _error_result(f"Image too large for preview ({file_size} bytes)")
|
|
239
|
+
|
|
240
|
+
# Detect MIME type
|
|
241
|
+
mime_type, _ = mimetypes.guess_type(file_path)
|
|
242
|
+
if not mime_type:
|
|
243
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
244
|
+
mime_map = {
|
|
245
|
+
".png": "image/png",
|
|
246
|
+
".jpg": "image/jpeg",
|
|
247
|
+
".jpeg": "image/jpeg",
|
|
248
|
+
".gif": "image/gif",
|
|
249
|
+
".webp": "image/webp",
|
|
250
|
+
}
|
|
251
|
+
mime_type = mime_map.get(ext, "application/octet-stream")
|
|
252
|
+
|
|
253
|
+
with open(file_path, "rb") as f:
|
|
254
|
+
binary_data = f.read()
|
|
255
|
+
|
|
256
|
+
binary_base64 = base64.b64encode(binary_data).decode("utf-8")
|
|
257
|
+
|
|
258
|
+
return DataPreviewResult(
|
|
259
|
+
ok=True,
|
|
260
|
+
data_type="image",
|
|
261
|
+
binary_base64=binary_base64,
|
|
262
|
+
mime_type=mime_type,
|
|
263
|
+
)
|
|
264
|
+
except Exception as e:
|
|
265
|
+
logger.error(f"Error previewing image file {file_path}: {e}")
|
|
266
|
+
return _error_result(f"Failed to read image: {str(e)}")
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def preview_pdf(file_path: str) -> DataPreviewResult:
|
|
270
|
+
"""Preview a PDF file."""
|
|
271
|
+
try:
|
|
272
|
+
if not os.path.exists(file_path):
|
|
273
|
+
return _error_result(f"File not found: {file_path}")
|
|
274
|
+
|
|
275
|
+
file_size = os.path.getsize(file_path)
|
|
276
|
+
if file_size > MAX_BINARY_SIZE:
|
|
277
|
+
return _error_result(f"PDF too large for preview ({file_size} bytes)")
|
|
278
|
+
|
|
279
|
+
with open(file_path, "rb") as f:
|
|
280
|
+
binary_data = f.read()
|
|
281
|
+
|
|
282
|
+
binary_base64 = base64.b64encode(binary_data).decode("utf-8")
|
|
283
|
+
|
|
284
|
+
return DataPreviewResult(
|
|
285
|
+
ok=True,
|
|
286
|
+
data_type="pdf",
|
|
287
|
+
binary_base64=binary_base64,
|
|
288
|
+
mime_type="application/pdf",
|
|
289
|
+
)
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logger.error(f"Error previewing PDF file {file_path}: {e}")
|
|
292
|
+
return _error_result(f"Failed to read PDF: {str(e)}")
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def preview_database(
|
|
296
|
+
config: dict,
|
|
297
|
+
limit: int = DEFAULT_ROW_LIMIT,
|
|
298
|
+
query: Optional[str] = None,
|
|
299
|
+
table: Optional[str] = None,
|
|
300
|
+
) -> DataPreviewResult:
|
|
301
|
+
"""Preview data from a database source."""
|
|
302
|
+
try:
|
|
303
|
+
from sqlalchemy import create_engine, text
|
|
304
|
+
|
|
305
|
+
# Build connection string from config
|
|
306
|
+
db_type = config.get("type", "").lower()
|
|
307
|
+
|
|
308
|
+
connection_string = config.get("connection_string")
|
|
309
|
+
if not connection_string:
|
|
310
|
+
# Build connection string from components
|
|
311
|
+
db_type_map = {
|
|
312
|
+
"postgresql": "postgresql",
|
|
313
|
+
"mysql": "mysql",
|
|
314
|
+
"mssql": "mssql+pymssql",
|
|
315
|
+
"oracle": "oracle+oracledb",
|
|
316
|
+
"sqlite": "sqlite",
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
dialect = db_type_map.get(db_type)
|
|
320
|
+
if not dialect:
|
|
321
|
+
return _error_result(f"Unsupported database type: {db_type}")
|
|
322
|
+
|
|
323
|
+
if db_type == "sqlite":
|
|
324
|
+
database_path = config.get("database", ":memory:")
|
|
325
|
+
connection_string = f"sqlite:///{database_path}"
|
|
326
|
+
elif db_type == "oracle":
|
|
327
|
+
connection_string = (
|
|
328
|
+
f"{dialect}://{config['username']}:{config['password']}"
|
|
329
|
+
f"@{config['host']}:{config['port']}/?service_name={config['database']}"
|
|
330
|
+
)
|
|
331
|
+
else:
|
|
332
|
+
connection_string = (
|
|
333
|
+
f"{dialect}://{config['username']}:{config['password']}"
|
|
334
|
+
f"@{config['host']}:{config['port']}/{config['database']}"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
engine = create_engine(connection_string)
|
|
338
|
+
|
|
339
|
+
# Determine what to query
|
|
340
|
+
if query:
|
|
341
|
+
# Custom query provided
|
|
342
|
+
sql = query
|
|
343
|
+
elif table:
|
|
344
|
+
# Specific table
|
|
345
|
+
schema = config.get("schema")
|
|
346
|
+
qualified_table = f"{schema}.{table}" if schema else table
|
|
347
|
+
sql = f"SELECT * FROM {qualified_table}"
|
|
348
|
+
else:
|
|
349
|
+
return _error_result("No table or query specified for database preview")
|
|
350
|
+
|
|
351
|
+
# Add LIMIT clause if not present
|
|
352
|
+
sql_lower = sql.strip().lower()
|
|
353
|
+
if "limit" not in sql_lower:
|
|
354
|
+
sql = f"{sql.rstrip(';')} LIMIT {limit}"
|
|
355
|
+
|
|
356
|
+
# Execute query
|
|
357
|
+
with engine.connect() as conn:
|
|
358
|
+
# Get total count (approximate)
|
|
359
|
+
try:
|
|
360
|
+
if table and not query:
|
|
361
|
+
schema = config.get("schema")
|
|
362
|
+
qualified_table = f"{schema}.{table}" if schema else table
|
|
363
|
+
count_result = conn.execute(text(f"SELECT COUNT(*) FROM {qualified_table}"))
|
|
364
|
+
total_rows = count_result.scalar()
|
|
365
|
+
else:
|
|
366
|
+
total_rows = None
|
|
367
|
+
except Exception:
|
|
368
|
+
total_rows = None
|
|
369
|
+
|
|
370
|
+
# Get data
|
|
371
|
+
df = pd.read_sql(sql, conn)
|
|
372
|
+
|
|
373
|
+
return _dataframe_to_preview(df, limit, total_rows)
|
|
374
|
+
except Exception as e:
|
|
375
|
+
logger.error(f"Error previewing database: {e}")
|
|
376
|
+
return _error_result(f"Failed to query database: {str(e)}")
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def preview_s3(
|
|
380
|
+
config: dict,
|
|
381
|
+
limit: int = DEFAULT_ROW_LIMIT,
|
|
382
|
+
) -> DataPreviewResult:
|
|
383
|
+
"""Preview data from S3 source."""
|
|
384
|
+
try:
|
|
385
|
+
# Build path
|
|
386
|
+
path = config.get("path")
|
|
387
|
+
if not path:
|
|
388
|
+
bucket = config.get("bucket")
|
|
389
|
+
key = config.get("key")
|
|
390
|
+
if bucket and key:
|
|
391
|
+
path = f"s3://{bucket}/{key}"
|
|
392
|
+
|
|
393
|
+
if not path:
|
|
394
|
+
return _error_result("S3 path not configured")
|
|
395
|
+
|
|
396
|
+
# Build storage options
|
|
397
|
+
storage_options = {}
|
|
398
|
+
for opt_key in ["key", "secret", "token", "client_kwargs"]:
|
|
399
|
+
if opt_key in config:
|
|
400
|
+
storage_options[opt_key] = config[opt_key]
|
|
401
|
+
|
|
402
|
+
# Determine file type from path
|
|
403
|
+
ext = os.path.splitext(path)[1].lower()
|
|
404
|
+
|
|
405
|
+
if ext == ".csv":
|
|
406
|
+
df = pd.read_csv(
|
|
407
|
+
path,
|
|
408
|
+
storage_options=storage_options or None,
|
|
409
|
+
nrows=limit,
|
|
410
|
+
)
|
|
411
|
+
return _dataframe_to_preview(df, limit)
|
|
412
|
+
elif ext == ".parquet":
|
|
413
|
+
df = pd.read_parquet(path, storage_options=storage_options or None)
|
|
414
|
+
total_rows = len(df)
|
|
415
|
+
return _dataframe_to_preview(df.head(limit), limit, total_rows)
|
|
416
|
+
elif ext in (".xlsx", ".xls"):
|
|
417
|
+
df = pd.read_excel(path, nrows=limit, engine="openpyxl")
|
|
418
|
+
return _dataframe_to_preview(df, limit)
|
|
419
|
+
elif ext == ".json":
|
|
420
|
+
df = pd.read_json(path, storage_options=storage_options or None)
|
|
421
|
+
return _dataframe_to_preview(df.head(limit), limit, len(df))
|
|
422
|
+
else:
|
|
423
|
+
return _error_result(f"Unsupported file type in S3: {ext}")
|
|
424
|
+
except Exception as e:
|
|
425
|
+
logger.error(f"Error previewing S3 source: {e}")
|
|
426
|
+
return _error_result(f"Failed to read S3: {str(e)}")
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def preview_source(
|
|
430
|
+
source_config: dict,
|
|
431
|
+
limit: int = DEFAULT_ROW_LIMIT,
|
|
432
|
+
query: Optional[str] = None,
|
|
433
|
+
) -> DataPreviewResult:
|
|
434
|
+
"""
|
|
435
|
+
Preview data from a source configuration.
|
|
436
|
+
|
|
437
|
+
This is the main entry point for data preview.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
source_config: Source configuration dict with 'type' and 'config' keys
|
|
441
|
+
limit: Maximum number of rows to return
|
|
442
|
+
query: Optional SQL query for database sources
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
DataPreviewResult with the preview data
|
|
446
|
+
"""
|
|
447
|
+
source_type = source_config.get("type", "").lower()
|
|
448
|
+
config = source_config.get("config", {})
|
|
449
|
+
|
|
450
|
+
logger.info(f"Previewing source type: {source_type}")
|
|
451
|
+
|
|
452
|
+
# File-based sources
|
|
453
|
+
if source_type in ("file", "csv"):
|
|
454
|
+
path = config.get("path")
|
|
455
|
+
if not path:
|
|
456
|
+
return _error_result("File path not configured")
|
|
457
|
+
return preview_csv(path, limit)
|
|
458
|
+
|
|
459
|
+
elif source_type == "excel":
|
|
460
|
+
path = config.get("path")
|
|
461
|
+
if not path:
|
|
462
|
+
return _error_result("Excel path not configured")
|
|
463
|
+
return preview_excel(path, limit, config.get("sheet_name"))
|
|
464
|
+
|
|
465
|
+
elif source_type == "parquet":
|
|
466
|
+
path = config.get("path")
|
|
467
|
+
if not path:
|
|
468
|
+
return _error_result("Parquet path not configured")
|
|
469
|
+
return preview_parquet(path, limit)
|
|
470
|
+
|
|
471
|
+
elif source_type == "json":
|
|
472
|
+
path = config.get("path")
|
|
473
|
+
if not path:
|
|
474
|
+
return _error_result("JSON path not configured")
|
|
475
|
+
return preview_json(path)
|
|
476
|
+
|
|
477
|
+
elif source_type == "text":
|
|
478
|
+
path = config.get("path")
|
|
479
|
+
if not path:
|
|
480
|
+
return _error_result("Text file path not configured")
|
|
481
|
+
return preview_text(path)
|
|
482
|
+
|
|
483
|
+
elif source_type == "image":
|
|
484
|
+
path = config.get("path")
|
|
485
|
+
if not path:
|
|
486
|
+
return _error_result("Image path not configured")
|
|
487
|
+
return preview_image(path)
|
|
488
|
+
|
|
489
|
+
elif source_type == "pdf":
|
|
490
|
+
path = config.get("path")
|
|
491
|
+
if not path:
|
|
492
|
+
return _error_result("PDF path not configured")
|
|
493
|
+
return preview_pdf(path)
|
|
494
|
+
|
|
495
|
+
# Database sources
|
|
496
|
+
elif source_type in ("postgresql", "mysql", "mssql", "oracle", "sqlite"):
|
|
497
|
+
db_config = {**config, "type": source_type}
|
|
498
|
+
table = config.get("table") or config.get("default_table")
|
|
499
|
+
return preview_database(db_config, limit, query, table)
|
|
500
|
+
|
|
501
|
+
# Cloud storage sources
|
|
502
|
+
elif source_type == "s3":
|
|
503
|
+
return preview_s3(config, limit)
|
|
504
|
+
|
|
505
|
+
elif source_type in ("gcs", "azure_blob"):
|
|
506
|
+
return _error_result(f"Preview for {source_type} not yet implemented")
|
|
507
|
+
|
|
508
|
+
# Folder - try to preview first file
|
|
509
|
+
elif source_type == "folder":
|
|
510
|
+
path = config.get("path")
|
|
511
|
+
if not path or not os.path.isdir(path):
|
|
512
|
+
return _error_result("Folder path not configured or not accessible")
|
|
513
|
+
|
|
514
|
+
# Find first data file
|
|
515
|
+
for ext in ("*.csv", "*.xlsx", "*.parquet", "*.json"):
|
|
516
|
+
import glob
|
|
517
|
+
files = glob.glob(os.path.join(path, ext))
|
|
518
|
+
if files:
|
|
519
|
+
# Preview first file found
|
|
520
|
+
file_ext = os.path.splitext(files[0])[1].lower()
|
|
521
|
+
if file_ext == ".csv":
|
|
522
|
+
return preview_csv(files[0], limit)
|
|
523
|
+
elif file_ext == ".xlsx":
|
|
524
|
+
return preview_excel(files[0], limit)
|
|
525
|
+
elif file_ext == ".parquet":
|
|
526
|
+
return preview_parquet(files[0], limit)
|
|
527
|
+
elif file_ext == ".json":
|
|
528
|
+
return preview_json(files[0])
|
|
529
|
+
|
|
530
|
+
return _error_result("No supported data files found in folder")
|
|
531
|
+
|
|
532
|
+
else:
|
|
533
|
+
return _error_result(f"Unsupported source type: {source_type}")
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def detect_preview_type(file_path: str) -> str:
|
|
537
|
+
"""
|
|
538
|
+
Detect the type of preview based on file extension.
|
|
539
|
+
|
|
540
|
+
Returns one of: csv, excel, parquet, json, text, image, pdf, unknown
|
|
541
|
+
"""
|
|
542
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
543
|
+
|
|
544
|
+
ext_map = {
|
|
545
|
+
".csv": "csv",
|
|
546
|
+
".xlsx": "excel",
|
|
547
|
+
".xls": "excel",
|
|
548
|
+
".parquet": "parquet",
|
|
549
|
+
".pq": "parquet",
|
|
550
|
+
".json": "json",
|
|
551
|
+
".txt": "text",
|
|
552
|
+
".log": "text",
|
|
553
|
+
".md": "text",
|
|
554
|
+
".yml": "text",
|
|
555
|
+
".yaml": "text",
|
|
556
|
+
".xml": "text",
|
|
557
|
+
".png": "image",
|
|
558
|
+
".jpg": "image",
|
|
559
|
+
".jpeg": "image",
|
|
560
|
+
".gif": "image",
|
|
561
|
+
".webp": "image",
|
|
562
|
+
".pdf": "pdf",
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
return ext_map.get(ext, "unknown")
|
qalita/internal/request.py
CHANGED
|
@@ -36,7 +36,7 @@ def send_api_request(
|
|
|
36
36
|
continue
|
|
37
37
|
k, v = line.split('=', 1)
|
|
38
38
|
k = k.strip().upper(); v = v.strip().strip('"').strip("'")
|
|
39
|
-
if k in ("QALITA_URL", "URL"):
|
|
39
|
+
if k in ("QALITA_WORKER_ENDPOINT", "QALITA_AGENT_ENDPOINT", "AGENT_ENDPOINT", "QALITA_URL", "URL"):
|
|
40
40
|
base_url = v
|
|
41
41
|
break
|
|
42
42
|
except Exception:
|
|
@@ -110,7 +110,7 @@ def send_request(
|
|
|
110
110
|
continue
|
|
111
111
|
k, v = line.split('=', 1)
|
|
112
112
|
k = k.strip(); v = v.strip().strip('"').strip("'")
|
|
113
|
-
if k.upper() in ("QALITA_TOKEN", "TOKEN") and not token:
|
|
113
|
+
if k.upper() in ("QALITA_WORKER_TOKEN", "QALITA_AGENT_TOKEN", "QALITA_TOKEN", "TOKEN") and not token:
|
|
114
114
|
token = v
|
|
115
115
|
except Exception:
|
|
116
116
|
pass
|
|
@@ -171,7 +171,8 @@ def send_request(
|
|
|
171
171
|
f"Retrying {current_retry+1}/{total_retry} in {grace_period} seconds..."
|
|
172
172
|
)
|
|
173
173
|
time.sleep(grace_period)
|
|
174
|
-
|
|
174
|
+
# Use __wrapped__ to bypass the @pass_config decorator and pass config directly
|
|
175
|
+
r = send_request.__wrapped__(
|
|
175
176
|
config,
|
|
176
177
|
request,
|
|
177
178
|
mode,
|
qalita/internal/utils.py
CHANGED
qalita/web/app.py
CHANGED
|
@@ -25,7 +25,13 @@ def create_app(config_obj) -> Flask:
|
|
|
25
25
|
import sys
|
|
26
26
|
if sys.version_info < (3, 13):
|
|
27
27
|
# Only try gevent on Python < 3.13 due to zope.interface compatibility issues
|
|
28
|
+
# But first verify gevent is actually installed and working
|
|
29
|
+
import gevent # noqa: F401
|
|
30
|
+
from gevent import monkey # noqa: F401
|
|
28
31
|
async_mode = "gevent"
|
|
32
|
+
except ImportError:
|
|
33
|
+
# gevent not installed, use threading
|
|
34
|
+
pass
|
|
29
35
|
except Exception:
|
|
30
36
|
pass
|
|
31
37
|
|
|
@@ -112,8 +118,6 @@ def get_socketio():
|
|
|
112
118
|
def run_dashboard_ui(config_obj, host: str = "localhost", port: int = 7070):
|
|
113
119
|
app = create_app(config_obj)
|
|
114
120
|
socketio = get_socketio()
|
|
115
|
-
url = f"http://{host}:{port}"
|
|
116
|
-
print(f"QALITA CLI UI is running. Open {url}")
|
|
117
121
|
|
|
118
122
|
# Use gevent for production WebSocket support
|
|
119
123
|
if socketio:
|