qalita 2.9.2__py3-none-any.whl → 2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qalita/_frontend/.next/BUILD_ID +1 -1
- qalita/_frontend/.next/build-manifest.json +2 -2
- qalita/_frontend/.next/prerender-manifest.json +3 -3
- qalita/_frontend/.next/server/app/_global-error.html +2 -2
- qalita/_frontend/.next/server/app/_global-error.rsc +1 -1
- qalita/_frontend/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/_not-found.html +1 -1
- qalita/_frontend/.next/server/app/_not-found.rsc +2 -2
- qalita/_frontend/.next/server/app/_not-found.segments/_full.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_not-found.segments/_index.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/sources/add/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/sources/add.html +1 -1
- qalita/_frontend/.next/server/app/sources/add.rsc +3 -3
- qalita/_frontend/.next/server/app/sources/add.segments/_full.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/sources/add.segments/_head.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/sources/add.segments/_index.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/sources/add.segments/_tree.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/sources/add.segments/sources/add/__PAGE__.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/sources/add.segments/sources/add.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/sources/add.segments/sources.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/sources/edit/[id]/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/sources/page_client-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/app/sources.html +1 -1
- qalita/_frontend/.next/server/app/sources.rsc +3 -3
- qalita/_frontend/.next/server/app/sources.segments/_full.segment.rsc +3 -3
- qalita/_frontend/.next/server/app/sources.segments/_head.segment.rsc +1 -1
- qalita/_frontend/.next/server/app/sources.segments/_index.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/sources.segments/_tree.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/sources.segments/sources/__PAGE__.segment.rsc +2 -2
- qalita/_frontend/.next/server/app/sources.segments/sources.segment.rsc +1 -1
- qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__be91267c._.js +1 -1
- qalita/_frontend/.next/server/chunks/ssr/_cb7b44d6._.js +1 -1
- qalita/_frontend/.next/server/chunks/ssr/_d44c43ed._.js +1 -1
- qalita/_frontend/.next/server/chunks/ssr/components_DashboardContent_tsx_c3635665._.js +1 -1
- qalita/_frontend/.next/server/pages/404.html +1 -1
- qalita/_frontend/.next/server/pages/500.html +2 -2
- qalita/_frontend/.next/server/server-reference-manifest.js +1 -1
- qalita/_frontend/.next/server/server-reference-manifest.json +1 -1
- qalita/_frontend/.next/static/chunks/1e6a98e93c470083.css +1 -0
- qalita/_frontend/.next/static/chunks/499b7099996cc9f9.js +1 -0
- qalita/_frontend/.next/static/chunks/89c689b5748e28ed.js +1 -0
- qalita/_frontend/.next/static/chunks/ba22289f779d638e.js +1 -0
- qalita/_frontend/.next/static/chunks/dde1c328f398837e.js +1 -0
- qalita/_frontend/.next/static/chunks/facd124df217e016.js +1 -0
- qalita/commands/source.py +166 -2
- qalita/commands/worker.py +3 -3
- qalita/commands/worker_grpc.py +3 -3
- qalita/grpc/client.py +227 -32
- qalita/internal/action_executor.py +124 -11
- qalita/internal/config.py +7 -0
- qalita/internal/utils.py +1 -1
- {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/METADATA +2 -1
- {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/RECORD +69 -68
- qalita/_frontend/.next/static/chunks/02a64570f0a14789.js +0 -1
- qalita/_frontend/.next/static/chunks/27b3ba70c7ef50a8.js +0 -1
- qalita/_frontend/.next/static/chunks/517e9b74d1a3c0ce.js +0 -1
- qalita/_frontend/.next/static/chunks/6c99da4248e4fcfc.js +0 -1
- qalita/_frontend/.next/static/chunks/e4c3a252774ab7fd.css +0 -1
- /qalita/_frontend/.next/static/{SlJmHVnRND1B7HlzvPJuC → ymL1t781xjzJd1EX5euFe}/_buildManifest.js +0 -0
- /qalita/_frontend/.next/static/{SlJmHVnRND1B7HlzvPJuC → ymL1t781xjzJd1EX5euFe}/_clientMiddlewareManifest.json +0 -0
- /qalita/_frontend/.next/static/{SlJmHVnRND1B7HlzvPJuC → ymL1t781xjzJd1EX5euFe}/_ssgManifest.js +0 -0
- {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/WHEEL +0 -0
- {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/entry_points.txt +0 -0
- {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/licenses/LICENSE +0 -0
qalita/grpc/client.py
CHANGED
|
@@ -26,6 +26,7 @@ class GrpcClient:
|
|
|
26
26
|
- Keep-alive management
|
|
27
27
|
- Bidirectional streaming support
|
|
28
28
|
- Thread-safe connection state
|
|
29
|
+
- Stability detection before resetting reconnection counter
|
|
29
30
|
"""
|
|
30
31
|
|
|
31
32
|
def __init__(
|
|
@@ -36,6 +37,7 @@ class GrpcClient:
|
|
|
36
37
|
max_reconnect_attempts: int = 10,
|
|
37
38
|
initial_reconnect_delay: float = 1.0,
|
|
38
39
|
max_reconnect_delay: float = 60.0,
|
|
40
|
+
stability_threshold_seconds: float = 30.0,
|
|
39
41
|
):
|
|
40
42
|
"""
|
|
41
43
|
Initialize the gRPC client.
|
|
@@ -47,6 +49,7 @@ class GrpcClient:
|
|
|
47
49
|
max_reconnect_attempts: Maximum reconnection attempts (0 = unlimited)
|
|
48
50
|
initial_reconnect_delay: Initial delay between reconnection attempts
|
|
49
51
|
max_reconnect_delay: Maximum delay between reconnection attempts
|
|
52
|
+
stability_threshold_seconds: Time the connection must be stable before resetting attempts counter
|
|
50
53
|
"""
|
|
51
54
|
self._url = url
|
|
52
55
|
self._token = token
|
|
@@ -54,6 +57,7 @@ class GrpcClient:
|
|
|
54
57
|
self._max_reconnect_attempts = max_reconnect_attempts
|
|
55
58
|
self._initial_reconnect_delay = initial_reconnect_delay
|
|
56
59
|
self._max_reconnect_delay = max_reconnect_delay
|
|
60
|
+
self._stability_threshold_seconds = stability_threshold_seconds
|
|
57
61
|
|
|
58
62
|
# Connection state - set before parsing URL
|
|
59
63
|
self._use_secure_channel = False
|
|
@@ -66,12 +70,21 @@ class GrpcClient:
|
|
|
66
70
|
self._stub: Optional[qalita_pb2_grpc.WorkerServiceStub] = None
|
|
67
71
|
self._connected = False
|
|
68
72
|
self._reconnect_attempts = 0
|
|
73
|
+
self._current_reconnect_delay = initial_reconnect_delay
|
|
74
|
+
self._last_successful_stream_start: Optional[datetime] = None
|
|
75
|
+
self._stream_healthy = False
|
|
69
76
|
|
|
70
77
|
# Stream state
|
|
71
78
|
self._stream_call = None
|
|
72
79
|
self._outgoing_queue: asyncio.Queue = asyncio.Queue()
|
|
73
80
|
self._stream_active = False
|
|
74
81
|
|
|
82
|
+
# Stream health monitoring
|
|
83
|
+
self._last_message_received: Optional[datetime] = None
|
|
84
|
+
self._last_message_sent: Optional[datetime] = None
|
|
85
|
+
self._stream_health_timeout = 45.0 # Consider stream dead if no response in 45s
|
|
86
|
+
self._force_reconnect = False
|
|
87
|
+
|
|
75
88
|
# Callbacks
|
|
76
89
|
self._on_job_received: Optional[Callable] = None
|
|
77
90
|
self._on_routine_received: Optional[Callable] = None
|
|
@@ -129,10 +142,22 @@ class GrpcClient:
|
|
|
129
142
|
"""
|
|
130
143
|
Establish connection to the gRPC server.
|
|
131
144
|
|
|
145
|
+
Note: This method does NOT reset _reconnect_attempts. The counter is only
|
|
146
|
+
reset after the stream has been stable for _stability_threshold_seconds.
|
|
147
|
+
|
|
132
148
|
Returns:
|
|
133
149
|
True if connection successful, False otherwise
|
|
134
150
|
"""
|
|
135
151
|
try:
|
|
152
|
+
# Close any existing channel first
|
|
153
|
+
if self._channel:
|
|
154
|
+
try:
|
|
155
|
+
await self._channel.close()
|
|
156
|
+
except Exception:
|
|
157
|
+
pass
|
|
158
|
+
self._channel = None
|
|
159
|
+
self._stub = None
|
|
160
|
+
|
|
136
161
|
# Channel options for long-running streams
|
|
137
162
|
channel_options = [
|
|
138
163
|
('grpc.keepalive_time_ms', 30000),
|
|
@@ -142,6 +167,10 @@ class GrpcClient:
|
|
|
142
167
|
('grpc.http2.max_pings_without_data', 0),
|
|
143
168
|
('grpc.max_receive_message_length', 50 * 1024 * 1024),
|
|
144
169
|
('grpc.max_send_message_length', 50 * 1024 * 1024),
|
|
170
|
+
# Additional options for better connection resilience
|
|
171
|
+
('grpc.initial_reconnect_backoff_ms', 1000),
|
|
172
|
+
('grpc.max_reconnect_backoff_ms', 60000),
|
|
173
|
+
('grpc.enable_retries', 1),
|
|
145
174
|
]
|
|
146
175
|
|
|
147
176
|
# Create channel - secure for production, insecure for local dev
|
|
@@ -160,7 +189,7 @@ class GrpcClient:
|
|
|
160
189
|
|
|
161
190
|
self._stub = qalita_pb2_grpc.WorkerServiceStub(self._channel)
|
|
162
191
|
self._connected = True
|
|
163
|
-
|
|
192
|
+
# Note: Do NOT reset _reconnect_attempts here - only reset after stable stream
|
|
164
193
|
|
|
165
194
|
logger.info(f"Connected to gRPC server at {self._grpc_target}")
|
|
166
195
|
return True
|
|
@@ -197,31 +226,102 @@ class GrpcClient:
|
|
|
197
226
|
"""
|
|
198
227
|
Attempt to reconnect with exponential backoff.
|
|
199
228
|
|
|
229
|
+
The reconnection counter persists across reconnection cycles. It only resets
|
|
230
|
+
when the connection has been stable (stream healthy for _stability_threshold_seconds).
|
|
231
|
+
|
|
200
232
|
Returns:
|
|
201
233
|
True if reconnection successful, False if max attempts exceeded
|
|
202
234
|
"""
|
|
203
|
-
|
|
235
|
+
self._reconnect_attempts += 1
|
|
236
|
+
self._stream_healthy = False
|
|
237
|
+
|
|
238
|
+
# Check if max attempts exceeded
|
|
239
|
+
if self._max_reconnect_attempts > 0 and self._reconnect_attempts > self._max_reconnect_attempts:
|
|
240
|
+
logger.error(
|
|
241
|
+
f"Max reconnection attempts exceeded ({self._reconnect_attempts}/{self._max_reconnect_attempts}). "
|
|
242
|
+
f"Will continue trying with max backoff delay."
|
|
243
|
+
)
|
|
244
|
+
# Don't return False - keep trying but with max delay
|
|
245
|
+
# In production, we want the worker to eventually recover
|
|
204
246
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
self.
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
247
|
+
logger.warning(
|
|
248
|
+
f"Reconnection attempt {self._reconnect_attempts}"
|
|
249
|
+
f"{f'/{self._max_reconnect_attempts}' if self._max_reconnect_attempts > 0 else ''} "
|
|
250
|
+
f"(delay: {self._current_reconnect_delay:.1f}s)"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Wait before attempting reconnection (exponential backoff)
|
|
254
|
+
await asyncio.sleep(self._current_reconnect_delay)
|
|
255
|
+
|
|
256
|
+
# Attempt to connect
|
|
257
|
+
if await self.connect():
|
|
258
|
+
# Increase delay for next attempt (in case this stream also fails quickly)
|
|
259
|
+
self._current_reconnect_delay = min(
|
|
260
|
+
self._current_reconnect_delay * 2,
|
|
261
|
+
self._max_reconnect_delay
|
|
212
262
|
)
|
|
213
|
-
|
|
214
|
-
await asyncio.sleep(delay)
|
|
215
|
-
|
|
216
|
-
if await self.connect():
|
|
217
|
-
return True
|
|
218
|
-
|
|
219
|
-
# Exponential backoff
|
|
220
|
-
delay = min(delay * 2, self._max_reconnect_delay)
|
|
263
|
+
return True
|
|
221
264
|
|
|
222
|
-
|
|
265
|
+
# Connection failed, increase delay for next attempt
|
|
266
|
+
self._current_reconnect_delay = min(
|
|
267
|
+
self._current_reconnect_delay * 2,
|
|
268
|
+
self._max_reconnect_delay
|
|
269
|
+
)
|
|
223
270
|
return False
|
|
224
271
|
|
|
272
|
+
def _mark_stream_stable(self) -> None:
|
|
273
|
+
"""
|
|
274
|
+
Mark the stream as stable and reset reconnection counters.
|
|
275
|
+
|
|
276
|
+
Called when the stream has been healthy for _stability_threshold_seconds.
|
|
277
|
+
"""
|
|
278
|
+
if not self._stream_healthy:
|
|
279
|
+
logger.info("Stream connection is now stable - resetting reconnection counters")
|
|
280
|
+
self._stream_healthy = True
|
|
281
|
+
self._reconnect_attempts = 0
|
|
282
|
+
self._current_reconnect_delay = self._initial_reconnect_delay
|
|
283
|
+
|
|
284
|
+
async def _check_stream_health(self) -> None:
|
|
285
|
+
"""
|
|
286
|
+
Check if the stream is actually working by comparing sent vs received timestamps.
|
|
287
|
+
|
|
288
|
+
If we've been sending messages but haven't received any response (ack or other)
|
|
289
|
+
for _stream_health_timeout seconds, the stream is probably dead and we should reconnect.
|
|
290
|
+
"""
|
|
291
|
+
now = datetime.now(timezone.utc)
|
|
292
|
+
|
|
293
|
+
# Need both timestamps to make a comparison
|
|
294
|
+
if not self._last_message_sent:
|
|
295
|
+
return
|
|
296
|
+
|
|
297
|
+
# Calculate time since last message sent and received
|
|
298
|
+
time_since_sent = (now - self._last_message_sent).total_seconds()
|
|
299
|
+
|
|
300
|
+
if self._last_message_received:
|
|
301
|
+
time_since_received = (now - self._last_message_received).total_seconds()
|
|
302
|
+
else:
|
|
303
|
+
# Never received anything - use time since stream started
|
|
304
|
+
if self._last_successful_stream_start:
|
|
305
|
+
time_since_received = (now - self._last_successful_stream_start).total_seconds()
|
|
306
|
+
else:
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
# If we've been sending but not receiving for too long, stream is dead
|
|
310
|
+
if time_since_received > self._stream_health_timeout:
|
|
311
|
+
logger.warning(
|
|
312
|
+
f"Stream appears dead: last sent {time_since_sent:.1f}s ago, "
|
|
313
|
+
f"last received {time_since_received:.1f}s ago (timeout: {self._stream_health_timeout}s)"
|
|
314
|
+
)
|
|
315
|
+
logger.warning("Forcing reconnection due to unresponsive stream...")
|
|
316
|
+
self._force_reconnect = True
|
|
317
|
+
|
|
318
|
+
# Cancel the stream call to force the error path
|
|
319
|
+
if self._stream_call:
|
|
320
|
+
try:
|
|
321
|
+
self._stream_call.cancel()
|
|
322
|
+
except Exception as e:
|
|
323
|
+
logger.debug(f"Error cancelling stream for forced reconnect: {e}")
|
|
324
|
+
|
|
225
325
|
# =========================================================================
|
|
226
326
|
# Unary RPCs
|
|
227
327
|
# =========================================================================
|
|
@@ -646,12 +746,14 @@ class GrpcClient:
|
|
|
646
746
|
async def _outgoing_messages(self) -> AsyncIterator[qalita_pb2.WorkerMessage]:
|
|
647
747
|
"""Generator for outgoing stream messages."""
|
|
648
748
|
logger.info("Outgoing messages generator started")
|
|
649
|
-
while self._stream_active:
|
|
749
|
+
while self._stream_active and not self._force_reconnect:
|
|
650
750
|
try:
|
|
651
751
|
# Use get_nowait in a loop with sleep to avoid blocking gRPC
|
|
652
752
|
try:
|
|
653
753
|
msg = self._outgoing_queue.get_nowait()
|
|
654
|
-
|
|
754
|
+
msg_type = msg.WhichOneof('payload')
|
|
755
|
+
logger.debug(f"Yielding message type: {msg_type}")
|
|
756
|
+
self._last_message_sent = datetime.now(timezone.utc)
|
|
655
757
|
yield msg
|
|
656
758
|
except asyncio.QueueEmpty:
|
|
657
759
|
# No message available, yield control briefly
|
|
@@ -672,7 +774,9 @@ class GrpcClient:
|
|
|
672
774
|
- Keep-alive signals (sent every 10 seconds)
|
|
673
775
|
- Incoming job assignments
|
|
674
776
|
- Incoming routine triggers
|
|
675
|
-
- Automatic reconnection on failure
|
|
777
|
+
- Automatic reconnection on failure with exponential backoff
|
|
778
|
+
- Stability detection to reset reconnection counters
|
|
779
|
+
- Dead stream detection (sending but not receiving)
|
|
676
780
|
"""
|
|
677
781
|
if not self._connected:
|
|
678
782
|
if not await self.connect():
|
|
@@ -681,14 +785,30 @@ class GrpcClient:
|
|
|
681
785
|
# Recreate queue in async context to ensure proper event loop binding
|
|
682
786
|
self._outgoing_queue = asyncio.Queue()
|
|
683
787
|
self._stream_active = True
|
|
788
|
+
self._stream_healthy = False
|
|
789
|
+
self._last_successful_stream_start = None
|
|
790
|
+
self._last_message_received = None
|
|
791
|
+
self._last_message_sent = None
|
|
792
|
+
self._force_reconnect = False
|
|
684
793
|
|
|
685
794
|
async def keep_alive_loop():
|
|
686
|
-
"""Send keep-alive every 10 seconds."""
|
|
795
|
+
"""Send keep-alive every 10 seconds and monitor stream health."""
|
|
687
796
|
logger.info(f"Keep-alive loop started, worker_id={self._worker_id}")
|
|
688
|
-
while self._stream_active:
|
|
797
|
+
while self._stream_active and not self._force_reconnect:
|
|
689
798
|
try:
|
|
690
799
|
logger.debug(f"Sending keep-alive for worker {self._worker_id}")
|
|
691
800
|
await self.send_keep_alive()
|
|
801
|
+
|
|
802
|
+
# Check if stream has been healthy long enough to reset counters
|
|
803
|
+
if (self._last_successful_stream_start and
|
|
804
|
+
not self._stream_healthy):
|
|
805
|
+
elapsed = (datetime.now(timezone.utc) - self._last_successful_stream_start).total_seconds()
|
|
806
|
+
if elapsed >= self._stability_threshold_seconds:
|
|
807
|
+
self._mark_stream_stable()
|
|
808
|
+
|
|
809
|
+
# Health check: detect dead stream (sending but not receiving)
|
|
810
|
+
await self._check_stream_health()
|
|
811
|
+
|
|
692
812
|
await asyncio.sleep(10)
|
|
693
813
|
except asyncio.CancelledError:
|
|
694
814
|
logger.info("Keep-alive loop cancelled")
|
|
@@ -696,15 +816,28 @@ class GrpcClient:
|
|
|
696
816
|
except Exception as e:
|
|
697
817
|
logger.error(f"Keep-alive error: {e}")
|
|
698
818
|
|
|
699
|
-
async def
|
|
700
|
-
"""
|
|
819
|
+
async def process_single_stream() -> bool:
|
|
820
|
+
"""
|
|
821
|
+
Process incoming stream messages for one connection attempt.
|
|
822
|
+
|
|
823
|
+
Returns:
|
|
824
|
+
True if stream ended gracefully (should not reconnect)
|
|
825
|
+
False if stream had an error (should attempt reconnection)
|
|
826
|
+
"""
|
|
701
827
|
try:
|
|
702
828
|
self._stream_call = self._stub.Connect(
|
|
703
829
|
self._outgoing_messages(),
|
|
704
830
|
metadata=self.metadata,
|
|
705
831
|
)
|
|
706
832
|
|
|
833
|
+
# Mark the time when stream successfully started
|
|
834
|
+
self._last_successful_stream_start = datetime.now(timezone.utc)
|
|
835
|
+
logger.info("Stream established successfully")
|
|
836
|
+
|
|
707
837
|
async for msg in self._stream_call:
|
|
838
|
+
# Each message received confirms the stream is working
|
|
839
|
+
self._last_message_received = datetime.now(timezone.utc)
|
|
840
|
+
|
|
708
841
|
if msg.HasField('job_assignment'):
|
|
709
842
|
job = msg.job_assignment.job
|
|
710
843
|
logger.info(f"Received job assignment: {job.id}")
|
|
@@ -737,26 +870,88 @@ class GrpcClient:
|
|
|
737
870
|
|
|
738
871
|
elif msg.HasField('ack'):
|
|
739
872
|
logger.debug(f"Received ack: {msg.ack.message_type}")
|
|
873
|
+
# Ack received means stream is working, check stability
|
|
874
|
+
if (self._last_successful_stream_start and
|
|
875
|
+
not self._stream_healthy):
|
|
876
|
+
elapsed = (datetime.now(timezone.utc) - self._last_successful_stream_start).total_seconds()
|
|
877
|
+
if elapsed >= self._stability_threshold_seconds:
|
|
878
|
+
self._mark_stream_stable()
|
|
740
879
|
|
|
741
880
|
elif msg.HasField('error'):
|
|
742
881
|
logger.error(f"Server error: {msg.error.code} - {msg.error.message}")
|
|
882
|
+
|
|
883
|
+
# Stream ended normally (server closed it gracefully)
|
|
884
|
+
logger.info("Stream ended normally")
|
|
885
|
+
return False # Still try to reconnect for continuous operation
|
|
886
|
+
|
|
887
|
+
except asyncio.CancelledError:
|
|
888
|
+
# CancelledError is a BaseException in Python 3.8+, must catch explicitly
|
|
889
|
+
if self._force_reconnect:
|
|
890
|
+
logger.info("Stream cancelled due to forced reconnect (dead stream detection)")
|
|
891
|
+
return False # Reconnect
|
|
892
|
+
else:
|
|
893
|
+
logger.info("Stream cancelled by client")
|
|
894
|
+
return True # Don't reconnect if we intentionally cancelled it
|
|
743
895
|
|
|
744
896
|
except grpc.aio.AioRpcError as e:
|
|
745
897
|
if e.code() == grpc.StatusCode.CANCELLED:
|
|
746
|
-
|
|
898
|
+
if self._force_reconnect:
|
|
899
|
+
logger.info("Stream cancelled due to forced reconnect (dead stream detection)")
|
|
900
|
+
return False # Reconnect
|
|
901
|
+
else:
|
|
902
|
+
logger.info("Stream cancelled by client")
|
|
903
|
+
return True # Don't reconnect if we intentionally cancelled it
|
|
747
904
|
else:
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
if self.
|
|
751
|
-
|
|
905
|
+
# Calculate how long the stream was alive
|
|
906
|
+
stream_duration = 0
|
|
907
|
+
if self._last_successful_stream_start:
|
|
908
|
+
stream_duration = (datetime.now(timezone.utc) - self._last_successful_stream_start).total_seconds()
|
|
909
|
+
|
|
910
|
+
logger.error(
|
|
911
|
+
f"Stream error after {stream_duration:.1f}s: {e.code()} - {e.details()}"
|
|
912
|
+
)
|
|
913
|
+
return False # Should attempt reconnection
|
|
914
|
+
|
|
915
|
+
except Exception as e:
|
|
916
|
+
logger.error(f"Unexpected stream error: {e}")
|
|
917
|
+
return False # Should attempt reconnection
|
|
752
918
|
|
|
753
|
-
#
|
|
919
|
+
# Main stream loop with reconnection handling
|
|
754
920
|
keep_alive_task = asyncio.create_task(keep_alive_loop())
|
|
755
921
|
|
|
756
922
|
try:
|
|
757
|
-
|
|
923
|
+
while self._stream_active:
|
|
924
|
+
# Reset state before starting/restarting stream
|
|
925
|
+
self._force_reconnect = False
|
|
926
|
+
self._last_message_received = None
|
|
927
|
+
self._last_message_sent = None
|
|
928
|
+
|
|
929
|
+
# Process the stream
|
|
930
|
+
should_stop = await process_single_stream()
|
|
931
|
+
|
|
932
|
+
if should_stop or not self._stream_active:
|
|
933
|
+
break
|
|
934
|
+
|
|
935
|
+
# Stream failed, attempt reconnection
|
|
936
|
+
self._last_successful_stream_start = None
|
|
937
|
+
|
|
938
|
+
# Recreate the outgoing queue to clear any stale messages
|
|
939
|
+
self._outgoing_queue = asyncio.Queue()
|
|
940
|
+
|
|
941
|
+
# Attempt reconnection (this handles backoff)
|
|
942
|
+
if not await self._reconnect():
|
|
943
|
+
# _reconnect now always returns True after sleeping and connecting
|
|
944
|
+
# It only returns False if connect() itself fails
|
|
945
|
+
# In that case, keep trying
|
|
946
|
+
logger.warning("Reconnection failed, will retry...")
|
|
947
|
+
continue
|
|
948
|
+
|
|
949
|
+
# Reconnected successfully, loop will start a new stream
|
|
950
|
+
logger.info("Reconnected, restarting stream...")
|
|
951
|
+
|
|
758
952
|
finally:
|
|
759
953
|
self._stream_active = False
|
|
954
|
+
self._force_reconnect = True # Stop the outgoing generator
|
|
760
955
|
keep_alive_task.cancel()
|
|
761
956
|
try:
|
|
762
957
|
await keep_alive_task
|
|
@@ -36,7 +36,7 @@ class ActionResult:
|
|
|
36
36
|
|
|
37
37
|
# Supported action types
|
|
38
38
|
ACTION_TYPES = {
|
|
39
|
-
"query": "Execute a SQL query on
|
|
39
|
+
"query": "Execute a SQL query on any data source (database or file via pandasql)",
|
|
40
40
|
"read_data": "Read data from a file or database source",
|
|
41
41
|
"filter": "Filter data based on conditions",
|
|
42
42
|
"aggregate": "Perform aggregation on data",
|
|
@@ -48,6 +48,14 @@ ACTION_TYPES = {
|
|
|
48
48
|
"tail": "Get last N rows from a source",
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
# Check if pandasql is available for file SQL queries
|
|
52
|
+
_PANDASQL_AVAILABLE = False
|
|
53
|
+
try:
|
|
54
|
+
import pandasql
|
|
55
|
+
_PANDASQL_AVAILABLE = True
|
|
56
|
+
except ImportError:
|
|
57
|
+
pass
|
|
58
|
+
|
|
51
59
|
|
|
52
60
|
class ActionExecutor:
|
|
53
61
|
"""
|
|
@@ -171,14 +179,7 @@ class ActionExecutor:
|
|
|
171
179
|
return source_type in ("file", "csv", "excel", "parquet", "json", "folder")
|
|
172
180
|
|
|
173
181
|
def _execute_query(self, source_config: dict, params: dict) -> ActionResult:
|
|
174
|
-
"""Execute a SQL query on
|
|
175
|
-
if not self._is_database_source(source_config):
|
|
176
|
-
return ActionResult(
|
|
177
|
-
ok=False,
|
|
178
|
-
action_type="query",
|
|
179
|
-
error=f"Query action only supported for database sources, not {source_config.get('type')}",
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
+
"""Execute a SQL query on any data source (database or file via pandasql)."""
|
|
182
183
|
sql = params.get("sql")
|
|
183
184
|
if not sql:
|
|
184
185
|
return ActionResult(
|
|
@@ -189,11 +190,32 @@ class ActionExecutor:
|
|
|
189
190
|
|
|
190
191
|
limit = params.get("limit", DEFAULT_ROW_LIMIT)
|
|
191
192
|
|
|
192
|
-
#
|
|
193
|
+
# Security: reject modification queries
|
|
193
194
|
sql_lower = sql.strip().lower()
|
|
194
|
-
if
|
|
195
|
+
if sql_lower.startswith(("insert", "update", "delete", "create", "drop", "alter", "truncate")):
|
|
196
|
+
return ActionResult(
|
|
197
|
+
ok=False,
|
|
198
|
+
action_type="query",
|
|
199
|
+
error="Modification queries (INSERT, UPDATE, DELETE, etc.) are not allowed",
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Add LIMIT if not present (for safety)
|
|
203
|
+
if "limit" not in sql_lower:
|
|
195
204
|
sql = f"{sql.rstrip(';')} LIMIT {limit}"
|
|
196
205
|
|
|
206
|
+
if self._is_database_source(source_config):
|
|
207
|
+
return self._execute_database_query(source_config, sql, limit)
|
|
208
|
+
elif self._is_file_source(source_config):
|
|
209
|
+
return self._execute_file_query(source_config, sql, limit)
|
|
210
|
+
else:
|
|
211
|
+
return ActionResult(
|
|
212
|
+
ok=False,
|
|
213
|
+
action_type="query",
|
|
214
|
+
error=f"Query action not supported for source type: {source_config.get('type')}",
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def _execute_database_query(self, source_config: dict, sql: str, limit: int) -> ActionResult:
|
|
218
|
+
"""Execute a SQL query on a database source."""
|
|
197
219
|
try:
|
|
198
220
|
engine = self._get_database_engine(source_config)
|
|
199
221
|
with engine.connect() as conn:
|
|
@@ -213,6 +235,97 @@ class ActionExecutor:
|
|
|
213
235
|
error=f"Query execution failed: {str(e)}",
|
|
214
236
|
)
|
|
215
237
|
|
|
238
|
+
def _execute_file_query(self, source_config: dict, sql: str, limit: int) -> ActionResult:
|
|
239
|
+
"""Execute a SQL query on a file source using pandasql."""
|
|
240
|
+
import os
|
|
241
|
+
|
|
242
|
+
if not _PANDASQL_AVAILABLE:
|
|
243
|
+
return ActionResult(
|
|
244
|
+
ok=False,
|
|
245
|
+
action_type="query",
|
|
246
|
+
error="SQL queries on file sources require 'pandasql'. Install with: pip install pandasql",
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
config = source_config.get("config", {})
|
|
250
|
+
source_type = source_config.get("type", "").lower()
|
|
251
|
+
path = config.get("path")
|
|
252
|
+
|
|
253
|
+
if not path:
|
|
254
|
+
return ActionResult(
|
|
255
|
+
ok=False,
|
|
256
|
+
action_type="query",
|
|
257
|
+
error="File path not configured",
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
if not os.path.exists(path):
|
|
261
|
+
return ActionResult(
|
|
262
|
+
ok=False,
|
|
263
|
+
action_type="query",
|
|
264
|
+
error=f"File not found: {path}",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
# Load the file into a DataFrame
|
|
269
|
+
source_data = self._load_file_to_dataframe(path, source_type)
|
|
270
|
+
|
|
271
|
+
if source_data is None:
|
|
272
|
+
return ActionResult(
|
|
273
|
+
ok=False,
|
|
274
|
+
action_type="query",
|
|
275
|
+
error=f"Unsupported file type: {source_type}",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Execute the SQL query using pandasql
|
|
279
|
+
# The user must use 'source_data' as the table name in their query
|
|
280
|
+
from pandasql import sqldf
|
|
281
|
+
|
|
282
|
+
# Create a local namespace for pandasql
|
|
283
|
+
local_env = {"source_data": source_data}
|
|
284
|
+
result_df = sqldf(sql, local_env)
|
|
285
|
+
|
|
286
|
+
# Apply limit if result is larger
|
|
287
|
+
if len(result_df) > limit:
|
|
288
|
+
result_df = result_df.head(limit)
|
|
289
|
+
|
|
290
|
+
preview = _dataframe_to_preview(result_df, limit)
|
|
291
|
+
return ActionResult(
|
|
292
|
+
ok=True,
|
|
293
|
+
action_type="query",
|
|
294
|
+
data=preview,
|
|
295
|
+
result_json=json.dumps({
|
|
296
|
+
"rows_returned": len(result_df),
|
|
297
|
+
"columns": list(result_df.columns),
|
|
298
|
+
"source_rows": len(source_data),
|
|
299
|
+
}),
|
|
300
|
+
)
|
|
301
|
+
except Exception as e:
|
|
302
|
+
error_msg = str(e)
|
|
303
|
+
# Provide helpful hint if table name is wrong
|
|
304
|
+
if "no such table" in error_msg.lower():
|
|
305
|
+
error_msg += ". Hint: Use 'source_data' as the table name, e.g., SELECT * FROM source_data"
|
|
306
|
+
return ActionResult(
|
|
307
|
+
ok=False,
|
|
308
|
+
action_type="query",
|
|
309
|
+
error=f"Query execution failed: {error_msg}",
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
def _load_file_to_dataframe(self, path: str, source_type: str) -> Optional[pd.DataFrame]:
|
|
313
|
+
"""Load a file into a pandas DataFrame for SQL querying."""
|
|
314
|
+
try:
|
|
315
|
+
if source_type in ("csv", "file") or path.endswith(".csv"):
|
|
316
|
+
return pd.read_csv(path, low_memory=False)
|
|
317
|
+
elif source_type == "excel" or path.endswith((".xlsx", ".xls")):
|
|
318
|
+
return pd.read_excel(path, engine="openpyxl")
|
|
319
|
+
elif source_type == "parquet" or path.endswith(".parquet"):
|
|
320
|
+
return pd.read_parquet(path)
|
|
321
|
+
elif source_type == "json" or path.endswith(".json"):
|
|
322
|
+
return pd.read_json(path)
|
|
323
|
+
else:
|
|
324
|
+
return None
|
|
325
|
+
except Exception as e:
|
|
326
|
+
logger.error(f"Failed to load file {path}: {e}")
|
|
327
|
+
return None
|
|
328
|
+
|
|
216
329
|
def _read_data(self, source_config: dict, params: dict) -> ActionResult:
|
|
217
330
|
"""Read data from a source."""
|
|
218
331
|
limit = params.get("limit", DEFAULT_ROW_LIMIT)
|
qalita/internal/config.py
CHANGED
|
@@ -51,6 +51,13 @@ class Config(object):
|
|
|
51
51
|
logger.info(f"Loading source configuration from [{abs_path}]")
|
|
52
52
|
with open(abs_path, "r") as file:
|
|
53
53
|
self.config = yaml.safe_load(file)
|
|
54
|
+
# Handle empty file or invalid YAML that returns None
|
|
55
|
+
if self.config is None:
|
|
56
|
+
logger.warning(
|
|
57
|
+
f"Configuration file [{abs_path}] is empty, initializing with default structure."
|
|
58
|
+
)
|
|
59
|
+
self.config = {"version": 1, "sources": []}
|
|
60
|
+
self.save_source_config()
|
|
54
61
|
return self.config
|
|
55
62
|
except FileNotFoundError:
|
|
56
63
|
logger.warning(
|
qalita/internal/utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qalita
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.10.1
|
|
4
4
|
Summary: QALITA Platform Command Line Interface
|
|
5
5
|
Author-email: QALITA SAS <contact@qalita.io>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -34,6 +34,7 @@ Requires-Dist: loguru>=0.7.0
|
|
|
34
34
|
Requires-Dist: openpyxl>=3.1.5
|
|
35
35
|
Requires-Dist: oracledb>=2.5.0
|
|
36
36
|
Requires-Dist: pandas>=2.0.0
|
|
37
|
+
Requires-Dist: pandasql>=0.7.3
|
|
37
38
|
Requires-Dist: paramiko>=3.4.0
|
|
38
39
|
Requires-Dist: psycopg2-binary>=2.9.9
|
|
39
40
|
Requires-Dist: pyarrow>=14.0.0
|