qalita 2.9.2__py3-none-any.whl → 2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. qalita/_frontend/.next/BUILD_ID +1 -1
  2. qalita/_frontend/.next/build-manifest.json +2 -2
  3. qalita/_frontend/.next/prerender-manifest.json +3 -3
  4. qalita/_frontend/.next/server/app/_global-error.html +2 -2
  5. qalita/_frontend/.next/server/app/_global-error.rsc +1 -1
  6. qalita/_frontend/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +1 -1
  7. qalita/_frontend/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  8. qalita/_frontend/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  9. qalita/_frontend/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  10. qalita/_frontend/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  11. qalita/_frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  12. qalita/_frontend/.next/server/app/_not-found.html +1 -1
  13. qalita/_frontend/.next/server/app/_not-found.rsc +2 -2
  14. qalita/_frontend/.next/server/app/_not-found.segments/_full.segment.rsc +2 -2
  15. qalita/_frontend/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  16. qalita/_frontend/.next/server/app/_not-found.segments/_index.segment.rsc +2 -2
  17. qalita/_frontend/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  18. qalita/_frontend/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  19. qalita/_frontend/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
  20. qalita/_frontend/.next/server/app/page_client-reference-manifest.js +1 -1
  21. qalita/_frontend/.next/server/app/sources/add/page_client-reference-manifest.js +1 -1
  22. qalita/_frontend/.next/server/app/sources/add.html +1 -1
  23. qalita/_frontend/.next/server/app/sources/add.rsc +3 -3
  24. qalita/_frontend/.next/server/app/sources/add.segments/_full.segment.rsc +3 -3
  25. qalita/_frontend/.next/server/app/sources/add.segments/_head.segment.rsc +1 -1
  26. qalita/_frontend/.next/server/app/sources/add.segments/_index.segment.rsc +2 -2
  27. qalita/_frontend/.next/server/app/sources/add.segments/_tree.segment.rsc +2 -2
  28. qalita/_frontend/.next/server/app/sources/add.segments/sources/add/__PAGE__.segment.rsc +2 -2
  29. qalita/_frontend/.next/server/app/sources/add.segments/sources/add.segment.rsc +1 -1
  30. qalita/_frontend/.next/server/app/sources/add.segments/sources.segment.rsc +1 -1
  31. qalita/_frontend/.next/server/app/sources/edit/[id]/page_client-reference-manifest.js +1 -1
  32. qalita/_frontend/.next/server/app/sources/page_client-reference-manifest.js +1 -1
  33. qalita/_frontend/.next/server/app/sources.html +1 -1
  34. qalita/_frontend/.next/server/app/sources.rsc +3 -3
  35. qalita/_frontend/.next/server/app/sources.segments/_full.segment.rsc +3 -3
  36. qalita/_frontend/.next/server/app/sources.segments/_head.segment.rsc +1 -1
  37. qalita/_frontend/.next/server/app/sources.segments/_index.segment.rsc +2 -2
  38. qalita/_frontend/.next/server/app/sources.segments/_tree.segment.rsc +2 -2
  39. qalita/_frontend/.next/server/app/sources.segments/sources/__PAGE__.segment.rsc +2 -2
  40. qalita/_frontend/.next/server/app/sources.segments/sources.segment.rsc +1 -1
  41. qalita/_frontend/.next/server/chunks/ssr/[root-of-the-server]__be91267c._.js +1 -1
  42. qalita/_frontend/.next/server/chunks/ssr/_cb7b44d6._.js +1 -1
  43. qalita/_frontend/.next/server/chunks/ssr/_d44c43ed._.js +1 -1
  44. qalita/_frontend/.next/server/chunks/ssr/components_DashboardContent_tsx_c3635665._.js +1 -1
  45. qalita/_frontend/.next/server/pages/404.html +1 -1
  46. qalita/_frontend/.next/server/pages/500.html +2 -2
  47. qalita/_frontend/.next/server/server-reference-manifest.js +1 -1
  48. qalita/_frontend/.next/server/server-reference-manifest.json +1 -1
  49. qalita/_frontend/.next/static/chunks/1e6a98e93c470083.css +1 -0
  50. qalita/_frontend/.next/static/chunks/499b7099996cc9f9.js +1 -0
  51. qalita/_frontend/.next/static/chunks/89c689b5748e28ed.js +1 -0
  52. qalita/_frontend/.next/static/chunks/ba22289f779d638e.js +1 -0
  53. qalita/_frontend/.next/static/chunks/dde1c328f398837e.js +1 -0
  54. qalita/_frontend/.next/static/chunks/facd124df217e016.js +1 -0
  55. qalita/commands/source.py +166 -2
  56. qalita/commands/worker.py +3 -3
  57. qalita/commands/worker_grpc.py +3 -3
  58. qalita/grpc/client.py +227 -32
  59. qalita/internal/action_executor.py +124 -11
  60. qalita/internal/config.py +7 -0
  61. qalita/internal/utils.py +1 -1
  62. {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/METADATA +2 -1
  63. {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/RECORD +69 -68
  64. qalita/_frontend/.next/static/chunks/02a64570f0a14789.js +0 -1
  65. qalita/_frontend/.next/static/chunks/27b3ba70c7ef50a8.js +0 -1
  66. qalita/_frontend/.next/static/chunks/517e9b74d1a3c0ce.js +0 -1
  67. qalita/_frontend/.next/static/chunks/6c99da4248e4fcfc.js +0 -1
  68. qalita/_frontend/.next/static/chunks/e4c3a252774ab7fd.css +0 -1
  69. /qalita/_frontend/.next/static/{SlJmHVnRND1B7HlzvPJuC → ymL1t781xjzJd1EX5euFe}/_buildManifest.js +0 -0
  70. /qalita/_frontend/.next/static/{SlJmHVnRND1B7HlzvPJuC → ymL1t781xjzJd1EX5euFe}/_clientMiddlewareManifest.json +0 -0
  71. /qalita/_frontend/.next/static/{SlJmHVnRND1B7HlzvPJuC → ymL1t781xjzJd1EX5euFe}/_ssgManifest.js +0 -0
  72. {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/WHEEL +0 -0
  73. {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/entry_points.txt +0 -0
  74. {qalita-2.9.2.dist-info → qalita-2.10.1.dist-info}/licenses/LICENSE +0 -0
qalita/grpc/client.py CHANGED
@@ -26,6 +26,7 @@ class GrpcClient:
26
26
  - Keep-alive management
27
27
  - Bidirectional streaming support
28
28
  - Thread-safe connection state
29
+ - Stability detection before resetting reconnection counter
29
30
  """
30
31
 
31
32
  def __init__(
@@ -36,6 +37,7 @@ class GrpcClient:
36
37
  max_reconnect_attempts: int = 10,
37
38
  initial_reconnect_delay: float = 1.0,
38
39
  max_reconnect_delay: float = 60.0,
40
+ stability_threshold_seconds: float = 30.0,
39
41
  ):
40
42
  """
41
43
  Initialize the gRPC client.
@@ -47,6 +49,7 @@ class GrpcClient:
47
49
  max_reconnect_attempts: Maximum reconnection attempts (0 = unlimited)
48
50
  initial_reconnect_delay: Initial delay between reconnection attempts
49
51
  max_reconnect_delay: Maximum delay between reconnection attempts
52
+ stability_threshold_seconds: Time the connection must be stable before resetting attempts counter
50
53
  """
51
54
  self._url = url
52
55
  self._token = token
@@ -54,6 +57,7 @@ class GrpcClient:
54
57
  self._max_reconnect_attempts = max_reconnect_attempts
55
58
  self._initial_reconnect_delay = initial_reconnect_delay
56
59
  self._max_reconnect_delay = max_reconnect_delay
60
+ self._stability_threshold_seconds = stability_threshold_seconds
57
61
 
58
62
  # Connection state - set before parsing URL
59
63
  self._use_secure_channel = False
@@ -66,12 +70,21 @@ class GrpcClient:
66
70
  self._stub: Optional[qalita_pb2_grpc.WorkerServiceStub] = None
67
71
  self._connected = False
68
72
  self._reconnect_attempts = 0
73
+ self._current_reconnect_delay = initial_reconnect_delay
74
+ self._last_successful_stream_start: Optional[datetime] = None
75
+ self._stream_healthy = False
69
76
 
70
77
  # Stream state
71
78
  self._stream_call = None
72
79
  self._outgoing_queue: asyncio.Queue = asyncio.Queue()
73
80
  self._stream_active = False
74
81
 
82
+ # Stream health monitoring
83
+ self._last_message_received: Optional[datetime] = None
84
+ self._last_message_sent: Optional[datetime] = None
85
+ self._stream_health_timeout = 45.0 # Consider stream dead if no response in 45s
86
+ self._force_reconnect = False
87
+
75
88
  # Callbacks
76
89
  self._on_job_received: Optional[Callable] = None
77
90
  self._on_routine_received: Optional[Callable] = None
@@ -129,10 +142,22 @@ class GrpcClient:
129
142
  """
130
143
  Establish connection to the gRPC server.
131
144
 
145
+ Note: This method does NOT reset _reconnect_attempts. The counter is only
146
+ reset after the stream has been stable for _stability_threshold_seconds.
147
+
132
148
  Returns:
133
149
  True if connection successful, False otherwise
134
150
  """
135
151
  try:
152
+ # Close any existing channel first
153
+ if self._channel:
154
+ try:
155
+ await self._channel.close()
156
+ except Exception:
157
+ pass
158
+ self._channel = None
159
+ self._stub = None
160
+
136
161
  # Channel options for long-running streams
137
162
  channel_options = [
138
163
  ('grpc.keepalive_time_ms', 30000),
@@ -142,6 +167,10 @@ class GrpcClient:
142
167
  ('grpc.http2.max_pings_without_data', 0),
143
168
  ('grpc.max_receive_message_length', 50 * 1024 * 1024),
144
169
  ('grpc.max_send_message_length', 50 * 1024 * 1024),
170
+ # Additional options for better connection resilience
171
+ ('grpc.initial_reconnect_backoff_ms', 1000),
172
+ ('grpc.max_reconnect_backoff_ms', 60000),
173
+ ('grpc.enable_retries', 1),
145
174
  ]
146
175
 
147
176
  # Create channel - secure for production, insecure for local dev
@@ -160,7 +189,7 @@ class GrpcClient:
160
189
 
161
190
  self._stub = qalita_pb2_grpc.WorkerServiceStub(self._channel)
162
191
  self._connected = True
163
- self._reconnect_attempts = 0
192
+ # Note: Do NOT reset _reconnect_attempts here - only reset after stable stream
164
193
 
165
194
  logger.info(f"Connected to gRPC server at {self._grpc_target}")
166
195
  return True
@@ -197,31 +226,102 @@ class GrpcClient:
197
226
  """
198
227
  Attempt to reconnect with exponential backoff.
199
228
 
229
+ The reconnection counter persists across reconnection cycles. It only resets
230
+ when the connection has been stable (stream healthy for _stability_threshold_seconds).
231
+
200
232
  Returns:
201
233
  True if reconnection successful, False if max attempts exceeded
202
234
  """
203
- delay = self._initial_reconnect_delay
235
+ self._reconnect_attempts += 1
236
+ self._stream_healthy = False
237
+
238
+ # Check if max attempts exceeded
239
+ if self._max_reconnect_attempts > 0 and self._reconnect_attempts > self._max_reconnect_attempts:
240
+ logger.error(
241
+ f"Max reconnection attempts exceeded ({self._reconnect_attempts}/{self._max_reconnect_attempts}). "
242
+ f"Will continue trying with max backoff delay."
243
+ )
244
+ # Don't return False - keep trying but with max delay
245
+ # In production, we want the worker to eventually recover
204
246
 
205
- while (self._max_reconnect_attempts == 0 or
206
- self._reconnect_attempts < self._max_reconnect_attempts):
207
-
208
- self._reconnect_attempts += 1
209
- logger.warning(
210
- f"Reconnection attempt {self._reconnect_attempts}"
211
- f"{f'/{self._max_reconnect_attempts}' if self._max_reconnect_attempts > 0 else ''}"
247
+ logger.warning(
248
+ f"Reconnection attempt {self._reconnect_attempts}"
249
+ f"{f'/{self._max_reconnect_attempts}' if self._max_reconnect_attempts > 0 else ''} "
250
+ f"(delay: {self._current_reconnect_delay:.1f}s)"
251
+ )
252
+
253
+ # Wait before attempting reconnection (exponential backoff)
254
+ await asyncio.sleep(self._current_reconnect_delay)
255
+
256
+ # Attempt to connect
257
+ if await self.connect():
258
+ # Increase delay for next attempt (in case this stream also fails quickly)
259
+ self._current_reconnect_delay = min(
260
+ self._current_reconnect_delay * 2,
261
+ self._max_reconnect_delay
212
262
  )
213
-
214
- await asyncio.sleep(delay)
215
-
216
- if await self.connect():
217
- return True
218
-
219
- # Exponential backoff
220
- delay = min(delay * 2, self._max_reconnect_delay)
263
+ return True
221
264
 
222
- logger.error("Max reconnection attempts exceeded")
265
+ # Connection failed, increase delay for next attempt
266
+ self._current_reconnect_delay = min(
267
+ self._current_reconnect_delay * 2,
268
+ self._max_reconnect_delay
269
+ )
223
270
  return False
224
271
 
272
+ def _mark_stream_stable(self) -> None:
273
+ """
274
+ Mark the stream as stable and reset reconnection counters.
275
+
276
+ Called when the stream has been healthy for _stability_threshold_seconds.
277
+ """
278
+ if not self._stream_healthy:
279
+ logger.info("Stream connection is now stable - resetting reconnection counters")
280
+ self._stream_healthy = True
281
+ self._reconnect_attempts = 0
282
+ self._current_reconnect_delay = self._initial_reconnect_delay
283
+
284
+ async def _check_stream_health(self) -> None:
285
+ """
286
+ Check if the stream is actually working by comparing sent vs received timestamps.
287
+
288
+ If we've been sending messages but haven't received any response (ack or other)
289
+ for _stream_health_timeout seconds, the stream is probably dead and we should reconnect.
290
+ """
291
+ now = datetime.now(timezone.utc)
292
+
293
+ # Need both timestamps to make a comparison
294
+ if not self._last_message_sent:
295
+ return
296
+
297
+ # Calculate time since last message sent and received
298
+ time_since_sent = (now - self._last_message_sent).total_seconds()
299
+
300
+ if self._last_message_received:
301
+ time_since_received = (now - self._last_message_received).total_seconds()
302
+ else:
303
+ # Never received anything - use time since stream started
304
+ if self._last_successful_stream_start:
305
+ time_since_received = (now - self._last_successful_stream_start).total_seconds()
306
+ else:
307
+ return
308
+
309
+ # If we've been sending but not receiving for too long, stream is dead
310
+ if time_since_received > self._stream_health_timeout:
311
+ logger.warning(
312
+ f"Stream appears dead: last sent {time_since_sent:.1f}s ago, "
313
+ f"last received {time_since_received:.1f}s ago (timeout: {self._stream_health_timeout}s)"
314
+ )
315
+ logger.warning("Forcing reconnection due to unresponsive stream...")
316
+ self._force_reconnect = True
317
+
318
+ # Cancel the stream call to force the error path
319
+ if self._stream_call:
320
+ try:
321
+ self._stream_call.cancel()
322
+ except Exception as e:
323
+ logger.debug(f"Error cancelling stream for forced reconnect: {e}")
324
+
225
325
  # =========================================================================
226
326
  # Unary RPCs
227
327
  # =========================================================================
@@ -646,12 +746,14 @@ class GrpcClient:
646
746
  async def _outgoing_messages(self) -> AsyncIterator[qalita_pb2.WorkerMessage]:
647
747
  """Generator for outgoing stream messages."""
648
748
  logger.info("Outgoing messages generator started")
649
- while self._stream_active:
749
+ while self._stream_active and not self._force_reconnect:
650
750
  try:
651
751
  # Use get_nowait in a loop with sleep to avoid blocking gRPC
652
752
  try:
653
753
  msg = self._outgoing_queue.get_nowait()
654
- logger.debug(f"Yielding message type: {msg.WhichOneof('payload')}")
754
+ msg_type = msg.WhichOneof('payload')
755
+ logger.debug(f"Yielding message type: {msg_type}")
756
+ self._last_message_sent = datetime.now(timezone.utc)
655
757
  yield msg
656
758
  except asyncio.QueueEmpty:
657
759
  # No message available, yield control briefly
@@ -672,7 +774,9 @@ class GrpcClient:
672
774
  - Keep-alive signals (sent every 10 seconds)
673
775
  - Incoming job assignments
674
776
  - Incoming routine triggers
675
- - Automatic reconnection on failure
777
+ - Automatic reconnection on failure with exponential backoff
778
+ - Stability detection to reset reconnection counters
779
+ - Dead stream detection (sending but not receiving)
676
780
  """
677
781
  if not self._connected:
678
782
  if not await self.connect():
@@ -681,14 +785,30 @@ class GrpcClient:
681
785
  # Recreate queue in async context to ensure proper event loop binding
682
786
  self._outgoing_queue = asyncio.Queue()
683
787
  self._stream_active = True
788
+ self._stream_healthy = False
789
+ self._last_successful_stream_start = None
790
+ self._last_message_received = None
791
+ self._last_message_sent = None
792
+ self._force_reconnect = False
684
793
 
685
794
  async def keep_alive_loop():
686
- """Send keep-alive every 10 seconds."""
795
+ """Send keep-alive every 10 seconds and monitor stream health."""
687
796
  logger.info(f"Keep-alive loop started, worker_id={self._worker_id}")
688
- while self._stream_active:
797
+ while self._stream_active and not self._force_reconnect:
689
798
  try:
690
799
  logger.debug(f"Sending keep-alive for worker {self._worker_id}")
691
800
  await self.send_keep_alive()
801
+
802
+ # Check if stream has been healthy long enough to reset counters
803
+ if (self._last_successful_stream_start and
804
+ not self._stream_healthy):
805
+ elapsed = (datetime.now(timezone.utc) - self._last_successful_stream_start).total_seconds()
806
+ if elapsed >= self._stability_threshold_seconds:
807
+ self._mark_stream_stable()
808
+
809
+ # Health check: detect dead stream (sending but not receiving)
810
+ await self._check_stream_health()
811
+
692
812
  await asyncio.sleep(10)
693
813
  except asyncio.CancelledError:
694
814
  logger.info("Keep-alive loop cancelled")
@@ -696,15 +816,28 @@ class GrpcClient:
696
816
  except Exception as e:
697
817
  logger.error(f"Keep-alive error: {e}")
698
818
 
699
- async def process_stream():
700
- """Process incoming stream messages."""
819
+ async def process_single_stream() -> bool:
820
+ """
821
+ Process incoming stream messages for one connection attempt.
822
+
823
+ Returns:
824
+ True if stream ended gracefully (should not reconnect)
825
+ False if stream had an error (should attempt reconnection)
826
+ """
701
827
  try:
702
828
  self._stream_call = self._stub.Connect(
703
829
  self._outgoing_messages(),
704
830
  metadata=self.metadata,
705
831
  )
706
832
 
833
+ # Mark the time when stream successfully started
834
+ self._last_successful_stream_start = datetime.now(timezone.utc)
835
+ logger.info("Stream established successfully")
836
+
707
837
  async for msg in self._stream_call:
838
+ # Each message received confirms the stream is working
839
+ self._last_message_received = datetime.now(timezone.utc)
840
+
708
841
  if msg.HasField('job_assignment'):
709
842
  job = msg.job_assignment.job
710
843
  logger.info(f"Received job assignment: {job.id}")
@@ -737,26 +870,88 @@ class GrpcClient:
737
870
 
738
871
  elif msg.HasField('ack'):
739
872
  logger.debug(f"Received ack: {msg.ack.message_type}")
873
+ # Ack received means stream is working, check stability
874
+ if (self._last_successful_stream_start and
875
+ not self._stream_healthy):
876
+ elapsed = (datetime.now(timezone.utc) - self._last_successful_stream_start).total_seconds()
877
+ if elapsed >= self._stability_threshold_seconds:
878
+ self._mark_stream_stable()
740
879
 
741
880
  elif msg.HasField('error'):
742
881
  logger.error(f"Server error: {msg.error.code} - {msg.error.message}")
882
+
883
+ # Stream ended normally (server closed it gracefully)
884
+ logger.info("Stream ended normally")
885
+ return False # Still try to reconnect for continuous operation
886
+
887
+ except asyncio.CancelledError:
888
+ # CancelledError is a BaseException in Python 3.8+, must catch explicitly
889
+ if self._force_reconnect:
890
+ logger.info("Stream cancelled due to forced reconnect (dead stream detection)")
891
+ return False # Reconnect
892
+ else:
893
+ logger.info("Stream cancelled by client")
894
+ return True # Don't reconnect if we intentionally cancelled it
743
895
 
744
896
  except grpc.aio.AioRpcError as e:
745
897
  if e.code() == grpc.StatusCode.CANCELLED:
746
- logger.info("Stream cancelled")
898
+ if self._force_reconnect:
899
+ logger.info("Stream cancelled due to forced reconnect (dead stream detection)")
900
+ return False # Reconnect
901
+ else:
902
+ logger.info("Stream cancelled by client")
903
+ return True # Don't reconnect if we intentionally cancelled it
747
904
  else:
748
- logger.error(f"Stream error: {e.code()} - {e.details()}")
749
- # Attempt reconnection
750
- if self._stream_active and await self._reconnect():
751
- await process_stream()
905
+ # Calculate how long the stream was alive
906
+ stream_duration = 0
907
+ if self._last_successful_stream_start:
908
+ stream_duration = (datetime.now(timezone.utc) - self._last_successful_stream_start).total_seconds()
909
+
910
+ logger.error(
911
+ f"Stream error after {stream_duration:.1f}s: {e.code()} - {e.details()}"
912
+ )
913
+ return False # Should attempt reconnection
914
+
915
+ except Exception as e:
916
+ logger.error(f"Unexpected stream error: {e}")
917
+ return False # Should attempt reconnection
752
918
 
753
- # Run keep-alive and stream processing concurrently
919
+ # Main stream loop with reconnection handling
754
920
  keep_alive_task = asyncio.create_task(keep_alive_loop())
755
921
 
756
922
  try:
757
- await process_stream()
923
+ while self._stream_active:
924
+ # Reset state before starting/restarting stream
925
+ self._force_reconnect = False
926
+ self._last_message_received = None
927
+ self._last_message_sent = None
928
+
929
+ # Process the stream
930
+ should_stop = await process_single_stream()
931
+
932
+ if should_stop or not self._stream_active:
933
+ break
934
+
935
+ # Stream failed, attempt reconnection
936
+ self._last_successful_stream_start = None
937
+
938
+ # Recreate the outgoing queue to clear any stale messages
939
+ self._outgoing_queue = asyncio.Queue()
940
+
941
+ # Attempt reconnection (this handles backoff)
942
+ if not await self._reconnect():
943
+ # _reconnect now always returns True after sleeping and connecting
944
+ # It only returns False if connect() itself fails
945
+ # In that case, keep trying
946
+ logger.warning("Reconnection failed, will retry...")
947
+ continue
948
+
949
+ # Reconnected successfully, loop will start a new stream
950
+ logger.info("Reconnected, restarting stream...")
951
+
758
952
  finally:
759
953
  self._stream_active = False
954
+ self._force_reconnect = True # Stop the outgoing generator
760
955
  keep_alive_task.cancel()
761
956
  try:
762
957
  await keep_alive_task
@@ -36,7 +36,7 @@ class ActionResult:
36
36
 
37
37
  # Supported action types
38
38
  ACTION_TYPES = {
39
- "query": "Execute a SQL query on a database source",
39
+ "query": "Execute a SQL query on any data source (database or file via pandasql)",
40
40
  "read_data": "Read data from a file or database source",
41
41
  "filter": "Filter data based on conditions",
42
42
  "aggregate": "Perform aggregation on data",
@@ -48,6 +48,14 @@ ACTION_TYPES = {
48
48
  "tail": "Get last N rows from a source",
49
49
  }
50
50
 
51
+ # Check if pandasql is available for file SQL queries
52
+ _PANDASQL_AVAILABLE = False
53
+ try:
54
+ import pandasql
55
+ _PANDASQL_AVAILABLE = True
56
+ except ImportError:
57
+ pass
58
+
51
59
 
52
60
  class ActionExecutor:
53
61
  """
@@ -171,14 +179,7 @@ class ActionExecutor:
171
179
  return source_type in ("file", "csv", "excel", "parquet", "json", "folder")
172
180
 
173
181
  def _execute_query(self, source_config: dict, params: dict) -> ActionResult:
174
- """Execute a SQL query on a database source."""
175
- if not self._is_database_source(source_config):
176
- return ActionResult(
177
- ok=False,
178
- action_type="query",
179
- error=f"Query action only supported for database sources, not {source_config.get('type')}",
180
- )
181
-
182
+ """Execute a SQL query on any data source (database or file via pandasql)."""
182
183
  sql = params.get("sql")
183
184
  if not sql:
184
185
  return ActionResult(
@@ -189,11 +190,32 @@ class ActionExecutor:
189
190
 
190
191
  limit = params.get("limit", DEFAULT_ROW_LIMIT)
191
192
 
192
- # Add LIMIT if not present (for safety)
193
+ # Security: reject modification queries
193
194
  sql_lower = sql.strip().lower()
194
- if "limit" not in sql_lower and not sql_lower.startswith(("insert", "update", "delete", "create", "drop", "alter")):
195
+ if sql_lower.startswith(("insert", "update", "delete", "create", "drop", "alter", "truncate")):
196
+ return ActionResult(
197
+ ok=False,
198
+ action_type="query",
199
+ error="Modification queries (INSERT, UPDATE, DELETE, etc.) are not allowed",
200
+ )
201
+
202
+ # Add LIMIT if not present (for safety)
203
+ if "limit" not in sql_lower:
195
204
  sql = f"{sql.rstrip(';')} LIMIT {limit}"
196
205
 
206
+ if self._is_database_source(source_config):
207
+ return self._execute_database_query(source_config, sql, limit)
208
+ elif self._is_file_source(source_config):
209
+ return self._execute_file_query(source_config, sql, limit)
210
+ else:
211
+ return ActionResult(
212
+ ok=False,
213
+ action_type="query",
214
+ error=f"Query action not supported for source type: {source_config.get('type')}",
215
+ )
216
+
217
+ def _execute_database_query(self, source_config: dict, sql: str, limit: int) -> ActionResult:
218
+ """Execute a SQL query on a database source."""
197
219
  try:
198
220
  engine = self._get_database_engine(source_config)
199
221
  with engine.connect() as conn:
@@ -213,6 +235,97 @@ class ActionExecutor:
213
235
  error=f"Query execution failed: {str(e)}",
214
236
  )
215
237
 
238
+ def _execute_file_query(self, source_config: dict, sql: str, limit: int) -> ActionResult:
239
+ """Execute a SQL query on a file source using pandasql."""
240
+ import os
241
+
242
+ if not _PANDASQL_AVAILABLE:
243
+ return ActionResult(
244
+ ok=False,
245
+ action_type="query",
246
+ error="SQL queries on file sources require 'pandasql'. Install with: pip install pandasql",
247
+ )
248
+
249
+ config = source_config.get("config", {})
250
+ source_type = source_config.get("type", "").lower()
251
+ path = config.get("path")
252
+
253
+ if not path:
254
+ return ActionResult(
255
+ ok=False,
256
+ action_type="query",
257
+ error="File path not configured",
258
+ )
259
+
260
+ if not os.path.exists(path):
261
+ return ActionResult(
262
+ ok=False,
263
+ action_type="query",
264
+ error=f"File not found: {path}",
265
+ )
266
+
267
+ try:
268
+ # Load the file into a DataFrame
269
+ source_data = self._load_file_to_dataframe(path, source_type)
270
+
271
+ if source_data is None:
272
+ return ActionResult(
273
+ ok=False,
274
+ action_type="query",
275
+ error=f"Unsupported file type: {source_type}",
276
+ )
277
+
278
+ # Execute the SQL query using pandasql
279
+ # The user must use 'source_data' as the table name in their query
280
+ from pandasql import sqldf
281
+
282
+ # Create a local namespace for pandasql
283
+ local_env = {"source_data": source_data}
284
+ result_df = sqldf(sql, local_env)
285
+
286
+ # Apply limit if result is larger
287
+ if len(result_df) > limit:
288
+ result_df = result_df.head(limit)
289
+
290
+ preview = _dataframe_to_preview(result_df, limit)
291
+ return ActionResult(
292
+ ok=True,
293
+ action_type="query",
294
+ data=preview,
295
+ result_json=json.dumps({
296
+ "rows_returned": len(result_df),
297
+ "columns": list(result_df.columns),
298
+ "source_rows": len(source_data),
299
+ }),
300
+ )
301
+ except Exception as e:
302
+ error_msg = str(e)
303
+ # Provide helpful hint if table name is wrong
304
+ if "no such table" in error_msg.lower():
305
+ error_msg += ". Hint: Use 'source_data' as the table name, e.g., SELECT * FROM source_data"
306
+ return ActionResult(
307
+ ok=False,
308
+ action_type="query",
309
+ error=f"Query execution failed: {error_msg}",
310
+ )
311
+
312
+ def _load_file_to_dataframe(self, path: str, source_type: str) -> Optional[pd.DataFrame]:
313
+ """Load a file into a pandas DataFrame for SQL querying."""
314
+ try:
315
+ if source_type in ("csv", "file") or path.endswith(".csv"):
316
+ return pd.read_csv(path, low_memory=False)
317
+ elif source_type == "excel" or path.endswith((".xlsx", ".xls")):
318
+ return pd.read_excel(path, engine="openpyxl")
319
+ elif source_type == "parquet" or path.endswith(".parquet"):
320
+ return pd.read_parquet(path)
321
+ elif source_type == "json" or path.endswith(".json"):
322
+ return pd.read_json(path)
323
+ else:
324
+ return None
325
+ except Exception as e:
326
+ logger.error(f"Failed to load file {path}: {e}")
327
+ return None
328
+
216
329
  def _read_data(self, source_config: dict, params: dict) -> ActionResult:
217
330
  """Read data from a source."""
218
331
  limit = params.get("limit", DEFAULT_ROW_LIMIT)
qalita/internal/config.py CHANGED
@@ -51,6 +51,13 @@ class Config(object):
51
51
  logger.info(f"Loading source configuration from [{abs_path}]")
52
52
  with open(abs_path, "r") as file:
53
53
  self.config = yaml.safe_load(file)
54
+ # Handle empty file or invalid YAML that returns None
55
+ if self.config is None:
56
+ logger.warning(
57
+ f"Configuration file [{abs_path}] is empty, initializing with default structure."
58
+ )
59
+ self.config = {"version": 1, "sources": []}
60
+ self.save_source_config()
54
61
  return self.config
55
62
  except FileNotFoundError:
56
63
  logger.warning(
qalita/internal/utils.py CHANGED
@@ -224,7 +224,7 @@ def safe_path_check(path: str) -> str: # lgtm[py/path-injection]
224
224
 
225
225
 
226
226
  def get_version():
227
- return "2.9.2"
227
+ return "2.10.1"
228
228
 
229
229
 
230
230
  def make_tarfile(output_filename, source_dir):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: qalita
3
- Version: 2.9.2
3
+ Version: 2.10.1
4
4
  Summary: QALITA Platform Command Line Interface
5
5
  Author-email: QALITA SAS <contact@qalita.io>
6
6
  License-File: LICENSE
@@ -34,6 +34,7 @@ Requires-Dist: loguru>=0.7.0
34
34
  Requires-Dist: openpyxl>=3.1.5
35
35
  Requires-Dist: oracledb>=2.5.0
36
36
  Requires-Dist: pandas>=2.0.0
37
+ Requires-Dist: pandasql>=0.7.3
37
38
  Requires-Dist: paramiko>=3.4.0
38
39
  Requires-Dist: psycopg2-binary>=2.9.9
39
40
  Requires-Dist: pyarrow>=14.0.0