relationalai 0.11.3__py3-none-any.whl → 0.11.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. relationalai/clients/snowflake.py +6 -1
  2. relationalai/clients/use_index_poller.py +349 -188
  3. relationalai/early_access/dsl/bindings/csv.py +2 -2
  4. relationalai/semantics/internal/internal.py +22 -4
  5. relationalai/semantics/lqp/executor.py +61 -12
  6. relationalai/semantics/lqp/intrinsics.py +23 -0
  7. relationalai/semantics/lqp/model2lqp.py +13 -4
  8. relationalai/semantics/lqp/passes.py +2 -3
  9. relationalai/semantics/lqp/primitives.py +12 -1
  10. relationalai/semantics/metamodel/builtins.py +8 -1
  11. relationalai/semantics/metamodel/factory.py +3 -2
  12. relationalai/semantics/reasoners/graph/core.py +54 -2
  13. relationalai/semantics/reasoners/optimization/solvers_dev.py +20 -1
  14. relationalai/semantics/reasoners/optimization/solvers_pb.py +24 -3
  15. relationalai/semantics/rel/compiler.py +5 -17
  16. relationalai/semantics/rel/executor.py +2 -2
  17. relationalai/semantics/rel/rel.py +6 -0
  18. relationalai/semantics/rel/rel_utils.py +8 -1
  19. relationalai/semantics/rel/rewrite/extract_common.py +153 -242
  20. relationalai/semantics/sql/compiler.py +120 -39
  21. relationalai/semantics/sql/executor/duck_db.py +21 -0
  22. relationalai/semantics/sql/rewrite/denormalize.py +4 -6
  23. relationalai/semantics/sql/rewrite/recursive_union.py +23 -3
  24. relationalai/semantics/sql/sql.py +27 -0
  25. relationalai/semantics/std/__init__.py +2 -1
  26. relationalai/semantics/std/datetime.py +4 -0
  27. relationalai/semantics/std/re.py +83 -0
  28. relationalai/semantics/std/strings.py +1 -1
  29. relationalai/tools/cli_controls.py +445 -60
  30. relationalai/util/format.py +78 -1
  31. {relationalai-0.11.3.dist-info → relationalai-0.11.4.dist-info}/METADATA +3 -2
  32. {relationalai-0.11.3.dist-info → relationalai-0.11.4.dist-info}/RECORD +35 -33
  33. {relationalai-0.11.3.dist-info → relationalai-0.11.4.dist-info}/WHEEL +0 -0
  34. {relationalai-0.11.3.dist-info → relationalai-0.11.4.dist-info}/entry_points.txt +0 -0
  35. {relationalai-0.11.3.dist-info → relationalai-0.11.4.dist-info}/licenses/LICENSE +0 -0
@@ -11,7 +11,7 @@ import threading
11
11
  import time
12
12
  from dataclasses import dataclass
13
13
  from pathlib import Path
14
- from typing import Any, Callable, List, Sequence, TextIO, cast
14
+ from typing import Any, Callable, Dict, List, Sequence, TextIO, cast
15
15
 
16
16
  # Third-party imports
17
17
  import rich
@@ -26,6 +26,7 @@ from wcwidth import wcwidth
26
26
 
27
27
  # Local imports
28
28
  from relationalai import debugging
29
+ from relationalai.util.format import format_duration
29
30
  from ..environments import (
30
31
  HexEnvironment,
31
32
  JupyterEnvironment,
@@ -38,18 +39,64 @@ from ..environments import (
38
39
  # Constants
39
40
  #--------------------------------------------------
40
41
 
42
+ # Display symbols
43
+ ARROW = "➜"
44
+ CHECK_MARK = "✓"
45
+ SUCCESS_ICON = "✅"
46
+ FAIL_ICON = "❌"
47
+
48
+ # Spinner animation frames
49
+ SPINNER_FRAMES = ["▰▱▱▱", "▰▰▱▱", "▰▰▰▱", "▰▰▰▰", "▱▰▰▰", "▱▱▰▰", "▱▱▱▰", "▱▱▱▱"]
50
+
51
+ # Terminal display constants
52
+ DEFAULT_TERMINAL_WIDTH = 80
53
+ SEPARATOR_WIDTH = 40
54
+
55
+ # Task progress constants
56
+ INITIALIZATION_COMPLETED_TEXT = "Parallel init finished in"
57
+ MIN_CATEGORY_DURATION_SECONDS = 0.25 # Only show categories with duration > 250ms
58
+
59
+ # Task category constants
60
+ TASK_CATEGORY_INDEXING = "indexing"
61
+ TASK_CATEGORY_PROVISIONING = "provisioning"
62
+ TASK_CATEGORY_CHANGE_TRACKING = "change_tracking"
63
+ TASK_CATEGORY_CACHE = "cache"
64
+ TASK_CATEGORY_RELATIONS = "relations"
65
+ TASK_CATEGORY_STATUS = "status"
66
+ TASK_CATEGORY_VALIDATION = "validation"
67
+ TASK_CATEGORY_OTHER = "other"
68
+
69
+ # Default summary categories
70
+ DEFAULT_SUMMARY_CATEGORIES = {
71
+ TASK_CATEGORY_INDEXING: "Indexing",
72
+ TASK_CATEGORY_PROVISIONING: "Provisioning",
73
+ TASK_CATEGORY_CHANGE_TRACKING: "Change tracking",
74
+ TASK_CATEGORY_RELATIONS: "Relations",
75
+ TASK_CATEGORY_STATUS: "Status",
76
+ TASK_CATEGORY_VALIDATION: "Validation",
77
+ TASK_CATEGORY_OTHER: "Other"
78
+ }
79
+
80
+ # Parallel task categories (for duration calculation)
81
+ PARALLEL_TASK_CATEGORIES = {
82
+ TASK_CATEGORY_INDEXING,
83
+ TASK_CATEGORY_PROVISIONING,
84
+ TASK_CATEGORY_VALIDATION,
85
+ TASK_CATEGORY_CHANGE_TRACKING
86
+ }
87
+
88
+ # Prompt constants
41
89
  REFETCH = "[REFETCH LIST]"
42
90
  MANUAL_ENTRY = "[MANUAL ENTRY]"
43
91
 
44
- # TaskProgress timing constants
92
+ # Timing constants
45
93
  HIGHLIGHT_DURATION = 2.0
46
94
  COMPLETION_DISPLAY_DURATION = 8.0
47
95
  TIMER_CHECK_INTERVAL = 0.1
48
96
  SPINNER_UPDATE_INTERVAL = 0.15
49
-
50
- # Display symbols
51
- SUCCESS_ICON = "✅"
52
- FAIL_ICON = "❌"
97
+ INITIAL_DISPLAY_DELAY = 0.25
98
+ BRIEF_PAUSE = 0.1
99
+ LIVE_REFRESH_RATE = 10
53
100
 
54
101
  #--------------------------------------------------
55
102
  # Style
@@ -159,7 +206,7 @@ def _enumerate_choices(choices: inquirer_utils.InquirerPyListChoices) -> inquire
159
206
  else:
160
207
  return _enumerate_static_choices(choices)
161
208
 
162
- def _fuzzy(message:str, choices:inquirer_utils.InquirerPyListChoices, default:str|None = None, multiselect=False, show_index=False, **kwargs) -> str|list[str]:
209
+ def _fuzzy(message:str, choices:inquirer_utils.InquirerPyListChoices, default:str|None = None, multiselect=False, show_index=False, **kwargs) -> str|list[str]|None:
163
210
  if show_index:
164
211
  choices = _enumerate_choices(choices)
165
212
 
@@ -452,7 +499,7 @@ class Spinner(LineClearingMixin):
452
499
  self.message = message
453
500
  self.finished_message = finished_message
454
501
  self.failed_message = failed_message
455
- self.spinner_generator = itertools.cycle(["▰▱▱▱", "▰▰▱▱", "▰▰▰▱", "▰▰▰▰", "▱▰▰▰", "▱▱▰▰", "▱▱▱▰", "▱▱▱▱"])
502
+ self.spinner_generator = itertools.cycle(SPINNER_FRAMES)
456
503
  self.is_snowflake_notebook = isinstance(runtime_env, SnowbookEnvironment)
457
504
  self.is_hex = isinstance(runtime_env, HexEnvironment)
458
505
  self.is_jupyter = isinstance(runtime_env, JupyterEnvironment)
@@ -565,12 +612,12 @@ class Spinner(LineClearingMixin):
565
612
  self.update(color="magenta", starting=True)
566
613
  # return control to the event loop briefly so stdout can be sure to flush:
567
614
  if self.delay:
568
- time.sleep(0.25)
615
+ time.sleep(INITIAL_DISPLAY_DELAY)
569
616
  self.reset_cursor()
570
617
  if not self.delay:
571
618
  return self
572
619
  self.busy = True
573
- threading.Thread(target=self.spinner_task).start()
620
+ threading.Thread(target=self.spinner_task, daemon=True).start()
574
621
  return self
575
622
 
576
623
  def __exit__(self, exception, value, _):
@@ -637,13 +684,23 @@ class SpanSpinner(Spinner):
637
684
  class TaskInfo:
638
685
  """Represents a single task with its state and metadata."""
639
686
  description: str
687
+ category: str = "other"
640
688
  completed: bool = False
641
689
  added_time: float = 0.0
690
+ completed_time: float = 0.0
691
+ hidden: bool = False
642
692
 
643
693
  def __post_init__(self):
644
694
  if self.added_time == 0.0:
645
695
  self.added_time = time.time()
646
696
 
697
+ def get_duration(self) -> float:
698
+ """Get the duration of this task in seconds."""
699
+ if not self.completed or self.completed_time == 0.0:
700
+ return 0.0
701
+
702
+ return self.completed_time - self.added_time
703
+
647
704
 
648
705
  class _TimerManager:
649
706
  """Manages all delayed operations for TaskProgress."""
@@ -670,6 +727,15 @@ class _TimerManager:
670
727
  self._operations[task_id] = ("delayed_removal", scheduled_time)
671
728
  self._start()
672
729
 
730
+ def schedule_task_hiding(self, task_id: str, delay: float | None = None):
731
+ """Schedule hiding of a completed task from display (but keep in data structure)."""
732
+ if delay is None:
733
+ delay = COMPLETION_DISPLAY_DURATION
734
+ scheduled_time = time.time() + delay
735
+ self._operations[task_id] = ("delayed_hiding", scheduled_time)
736
+ self._start()
737
+
738
+
673
739
  def _start(self):
674
740
  """Start the timer thread if not already running."""
675
741
  if self._thread is None or not self._thread.is_alive():
@@ -717,6 +783,14 @@ class _TimerManager:
717
783
  elif hasattr(self._progress, 'sub_tasks') and task_id in self._progress.sub_tasks:
718
784
  del self._progress.sub_tasks[task_id]
719
785
  # For NotebookTaskProgress, no special update needed
786
+ elif op_type == "delayed_hiding":
787
+ if hasattr(self._progress, '_tasks') and task_id in self._progress._tasks:
788
+ # Mark task as hidden but keep it in the data structure
789
+ self._progress._tasks[task_id].hidden = True
790
+ # For TaskProgress, invalidate cache and update display
791
+ if hasattr(self._progress, '_invalidate_cache'):
792
+ self._progress._invalidate_cache()
793
+ self._progress._update_display()
720
794
 
721
795
  def stop(self):
722
796
  """Stop the timer manager."""
@@ -736,11 +810,6 @@ class TaskProgress:
736
810
  - Consistent task ordering with active tasks displayed above completed ones
737
811
  """
738
812
 
739
- # Display symbols
740
- SPINNER_FRAMES = ["▰▱▱▱", "▰▰▱▱", "▰▰▰▱", "▰▰▰▰", "▱▰▰▰", "▱▱▰▰", "▱▱▱▰", "▱▱▱▱"]
741
- ARROW = "➜"
742
- CHECK_MARK = "✓"
743
-
744
813
  def __init__(
745
814
  self,
746
815
  description: str = "",
@@ -750,6 +819,7 @@ class TaskProgress:
750
819
  trailing_newline: bool = False,
751
820
  transient: bool = False,
752
821
  hide_on_completion: bool = False,
822
+ show_duration_summary: bool = True,
753
823
  ):
754
824
  # Public configuration
755
825
  self.description = description
@@ -759,6 +829,7 @@ class TaskProgress:
759
829
  self.trailing_newline = trailing_newline
760
830
  self.transient = transient
761
831
  self.hide_on_completion = hide_on_completion
832
+ self.show_duration_summary = show_duration_summary
762
833
 
763
834
  # Detect CI environment to avoid cursor control issues
764
835
  from ..environments import CIEnvironment
@@ -775,6 +846,10 @@ class TaskProgress:
775
846
  self._tasks = {} # task_id -> TaskInfo
776
847
  self._next_task_id = 1
777
848
 
849
+ # Overall process timing
850
+ self._process_start_time = None
851
+ self._process_end_time = None
852
+
778
853
  # Animation state
779
854
  self.spinner_index = 0
780
855
 
@@ -839,7 +914,7 @@ class TaskProgress:
839
914
  elif self.main_completed:
840
915
  main_line = Text(f"{SUCCESS_ICON} ", style="green") + Text(self.description, style="green")
841
916
  else:
842
- spinner_text = self.SPINNER_FRAMES[self.spinner_index]
917
+ spinner_text = SPINNER_FRAMES[self.spinner_index]
843
918
  main_line = Text(f"{spinner_text} ", style="magenta") + Text(self.description, style="magenta")
844
919
 
845
920
  # Build subtask lines
@@ -866,6 +941,9 @@ class TaskProgress:
866
941
  completed_tasks = []
867
942
 
868
943
  for task_id, task_info in self._tasks.items():
944
+ # Skip hidden tasks
945
+ if task_info.hidden:
946
+ continue
869
947
  if task_info.completed:
870
948
  completed_tasks.append((task_id, task_info))
871
949
  else:
@@ -873,23 +951,23 @@ class TaskProgress:
873
951
 
874
952
  # Render incomplete tasks first
875
953
  for task_id, task_info in incomplete_tasks:
876
- is_highlighted = (task_id in self._highlighted_tasks and
954
+ is_highlighted = (task_id in self._highlighted_tasks and
877
955
  current_time < self._highlighted_tasks[task_id])
878
956
 
879
957
  style = "yellow" if is_highlighted else "white"
880
- line = Text(f" {self.ARROW} ", style=style) + Text(task_info.description, style=style)
958
+ line = Text(f" {ARROW} ", style=style) + Text(task_info.description, style=style)
881
959
  subtask_lines.append(line)
882
960
 
883
961
  # Render completed tasks
884
962
  for task_id, task_info in completed_tasks:
885
- line = Text(f" {self.CHECK_MARK} ", style="green") + Text(task_info.description, style="green")
963
+ line = Text(f" {CHECK_MARK} ", style="green") + Text(task_info.description, style="green")
886
964
  subtask_lines.append(line)
887
965
 
888
966
  return subtask_lines
889
967
 
890
968
  def _advance_spinner(self):
891
969
  """Advance the spinner animation."""
892
- self.spinner_index = (self.spinner_index + 1) % len(self.SPINNER_FRAMES)
970
+ self.spinner_index = (self.spinner_index + 1) % len(SPINNER_FRAMES)
893
971
 
894
972
  def _invalidate_cache(self):
895
973
  """Invalidate the render cache to force re-rendering."""
@@ -901,12 +979,13 @@ class TaskProgress:
901
979
  if self.live:
902
980
  self.live.update(self._render_display())
903
981
 
904
- def add_sub_task(self, description: str, task_id: str | None = None) -> str:
982
+ def add_sub_task(self, description: str, task_id: str | None = None, category: str = "general") -> str:
905
983
  """Add a new sub-task and return its unique ID.
906
984
 
907
985
  Args:
908
986
  description: Description of the subtask
909
987
  task_id: Optional custom task ID, if not provided one will be generated
988
+ category: Category for this task (e.g., "indexing", "provisioning", "change_tracking")
910
989
 
911
990
  Returns:
912
991
  str: The task ID for this subtask
@@ -915,7 +994,7 @@ class TaskProgress:
915
994
  task_id = self._generate_task_id()
916
995
 
917
996
  if task_id not in self._tasks:
918
- self._tasks[task_id] = TaskInfo(description=description)
997
+ self._tasks[task_id] = TaskInfo(description=description, category=category)
919
998
  self._invalidate_cache()
920
999
  self._update_display()
921
1000
 
@@ -940,19 +1019,24 @@ class TaskProgress:
940
1019
  self._invalidate_cache()
941
1020
  self._update_display()
942
1021
 
943
- def complete_sub_task(self, task_id: str) -> None:
1022
+ def complete_sub_task(self, task_id: str, record_time: bool = True) -> None:
944
1023
  """Complete a sub-task by marking it as done."""
945
1024
  if task_id in self._tasks:
946
1025
  # Remove any highlighting when completing
947
1026
  if task_id in self._highlighted_tasks:
948
1027
  del self._highlighted_tasks[task_id]
949
1028
 
1029
+ # Record completion time (only if not already completed and record_time is True)
1030
+ if not self._tasks[task_id].completed and record_time:
1031
+ self._tasks[task_id].completed_time = time.time()
950
1032
  self._tasks[task_id].completed = True
1033
+
951
1034
  self._invalidate_cache()
952
1035
  self._update_display()
953
1036
 
954
- # Schedule removal after completion display duration
955
- self._timer_manager.schedule_task_removal(task_id)
1037
+ # Schedule hiding the task from display after a short delay
1038
+ # but keep it in the data structure for summary generation
1039
+ self._timer_manager.schedule_task_hiding(task_id)
956
1040
 
957
1041
  def remove_sub_task(self, task_id: str, animate: bool = True) -> None:
958
1042
  """Remove a sub-task by ID with optional completion animation."""
@@ -1006,13 +1090,136 @@ class TaskProgress:
1006
1090
  current_count = len(self._tasks)
1007
1091
  return f"› Active tasks: {current_count}"
1008
1092
 
1093
+ def get_task_duration(self, task_id: str) -> float:
1094
+ """Get the duration of a specific task in seconds."""
1095
+ if task_id in self._tasks:
1096
+ return self._tasks[task_id].get_duration()
1097
+ return 0.0
1098
+
1099
+
1100
+ def get_completed_tasks(self) -> dict[str, TaskInfo]:
1101
+ """Get all completed tasks with their timing information."""
1102
+ return {task_id: task_info for task_id, task_info in self._tasks.items() if task_info.completed}
1103
+
1104
+ def get_tasks_by_category(self, category: str) -> dict[str, TaskInfo]:
1105
+ """Get all tasks (completed and active) for a specific category."""
1106
+ return {task_id: task_info for task_id, task_info in self._tasks.items() if task_info.category == category}
1107
+
1108
+ def get_completed_tasks_by_category(self, category: str) -> dict[str, TaskInfo]:
1109
+ """Get all completed tasks for a specific category."""
1110
+ return {task_id: task_info for task_id, task_info in self._tasks.items()
1111
+ if task_info.category == category and task_info.completed}
1112
+
1113
+ def set_process_start_time(self) -> None:
1114
+ """Set the overall process start time."""
1115
+ self._process_start_time = time.time()
1116
+
1117
+ def set_process_end_time(self) -> None:
1118
+ """Set the overall process end time."""
1119
+ self._process_end_time = time.time()
1120
+
1121
+ def get_total_duration(self) -> float:
1122
+ """Get the total duration from first task added to last task completed."""
1123
+ if not self._tasks:
1124
+ return 0.0
1125
+
1126
+ completed_tasks = self.get_completed_tasks()
1127
+ if not completed_tasks:
1128
+ return 0.0
1129
+
1130
+ # Find earliest start time and latest completion time
1131
+ start_times = [task.added_time for task in self._tasks.values()]
1132
+ completion_times = [task.completed_time for task in completed_tasks.values() if task.completed_time > 0]
1133
+
1134
+ if not start_times or not completion_times:
1135
+ return 0.0
1136
+
1137
+ earliest_start = min(start_times)
1138
+ latest_completion = max(completion_times)
1139
+
1140
+ return latest_completion - earliest_start
1141
+
1142
+ def generate_summary(self, categories: dict[str, str] | None = None) -> str:
1143
+ """Generate a summary of completed tasks by category.
1144
+
1145
+ Args:
1146
+ categories: Optional dict mapping category names to display names.
1147
+ Defaults to standard UseIndexPoller categories.
1148
+
1149
+ Returns:
1150
+ Formatted summary string, or empty string if no meaningful tasks.
1151
+ """
1152
+ if categories is None:
1153
+ categories = DEFAULT_SUMMARY_CATEGORIES
1154
+
1155
+ # Get completed tasks by category and calculate durations
1156
+ category_durations = {}
1157
+ for category_name in categories:
1158
+ tasks = self.get_completed_tasks_by_category(category_name)
1159
+ category_durations[category_name] = _calculate_category_duration(category_name, tasks)
1160
+
1161
+ # If there's nothing meaningful to show, return empty string
1162
+ if not any(category_durations.values()):
1163
+ return ""
1164
+
1165
+ total_duration = self.get_total_duration()
1166
+
1167
+ # Build Rich table directly from data (not from formatted strings)
1168
+ try:
1169
+ from rich.console import Console
1170
+ from rich.table import Table
1171
+
1172
+ table = Table(show_header=False, box=None, padding=(0, 1))
1173
+ table.add_column("Operation", style="white")
1174
+ table.add_column("Duration", style="green", justify="right")
1175
+
1176
+ # Add total duration row
1177
+ if total_duration > 0:
1178
+ table.add_row(
1179
+ INITIALIZATION_COMPLETED_TEXT,
1180
+ format_duration(total_duration)
1181
+ )
1182
+
1183
+ # Add category rows
1184
+ for category_name, display_name in categories.items():
1185
+ duration = category_durations[category_name]
1186
+ if duration > MIN_CATEGORY_DURATION_SECONDS:
1187
+ table.add_row(
1188
+ f" {ARROW} {display_name}",
1189
+ format_duration(duration)
1190
+ )
1191
+
1192
+ # Add blank row for spacing
1193
+ table.add_row("", "")
1194
+
1195
+ console = Console()
1196
+ with console.capture() as capture:
1197
+ console.print(table)
1198
+ return capture.get()
1199
+
1200
+ except ImportError:
1201
+ # Fallback to simple text format
1202
+ lines = []
1203
+ if total_duration > 0:
1204
+ lines.append(f"{INITIALIZATION_COMPLETED_TEXT} {format_duration(total_duration)}")
1205
+
1206
+ for category_name, display_name in categories.items():
1207
+ duration = category_durations[category_name]
1208
+ if duration > MIN_CATEGORY_DURATION_SECONDS:
1209
+ lines.append(f" {ARROW} {display_name} {format_duration(duration)}")
1210
+
1211
+ if lines:
1212
+ lines.append("")
1213
+
1214
+ return "\n".join(lines)
1215
+
1009
1216
  def __enter__(self):
1010
1217
  if self.leading_newline:
1011
1218
  print()
1012
1219
 
1013
1220
  # Start the live display
1014
1221
  from rich.live import Live
1015
- self.live = Live(self._render_display(), console=self.console, refresh_per_second=10)
1222
+ self.live = Live(self._render_display(), console=self.console, refresh_per_second=LIVE_REFRESH_RATE)
1016
1223
  self.live.start()
1017
1224
 
1018
1225
  # Start spinner animation
@@ -1051,19 +1258,19 @@ class TaskProgress:
1051
1258
  # Clear all tasks and update main task to show failure state
1052
1259
  self._clear_all_tasks()
1053
1260
  self.main_failed = True
1054
-
1261
+
1055
1262
  # Update main task description to show failure message
1056
1263
  if self.failure_message:
1057
1264
  self.description = self.failure_message
1058
1265
  else:
1059
1266
  self.description = f"Failed: {exc_val}"
1060
-
1267
+
1061
1268
  # Update the display to show the failure state before stopping
1062
1269
  if self.live:
1063
1270
  self.live.update(self._render_display())
1064
1271
  # Brief pause to show the failure state
1065
- time.sleep(0.1)
1066
-
1272
+ time.sleep(BRIEF_PAUSE)
1273
+
1067
1274
  if self.trailing_newline:
1068
1275
  print()
1069
1276
  self._cleanup()
@@ -1071,6 +1278,11 @@ class TaskProgress:
1071
1278
  def _handle_success(self):
1072
1279
  """Handle success case in context manager exit."""
1073
1280
  self.main_completed = True
1281
+
1282
+ # Generate summary before clearing tasks (so we have the timing data)
1283
+ # Only generate if show_duration_summary flag is True
1284
+ summary = self.generate_summary() if self.show_duration_summary else ""
1285
+
1074
1286
  self._clear_all_tasks()
1075
1287
 
1076
1288
  # Update main task description to show success message
@@ -1083,6 +1295,12 @@ class TaskProgress:
1083
1295
  # Stop the live display
1084
1296
  self.live.stop()
1085
1297
 
1298
+ # Print summary if available
1299
+ if summary:
1300
+ print() # Blank line for separation
1301
+ print(summary, end="") # summary already has trailing newline
1302
+ print() # Add extra blank line after summary
1303
+
1086
1304
  if self.trailing_newline:
1087
1305
  print()
1088
1306
  self._cleanup()
@@ -1101,33 +1319,55 @@ class TaskProgress:
1101
1319
  if not self.is_ci and sys.stdout.isatty():
1102
1320
  print("\r\033[K", end="", flush=True)
1103
1321
 
1322
+ def _calculate_category_duration(category_name: str, tasks: Dict[str, TaskInfo]) -> float:
1323
+ """Calculate duration for a category based on task type (parallel vs sequential)."""
1324
+ if not tasks:
1325
+ return 0.0
1326
+
1327
+ if category_name in PARALLEL_TASK_CATEGORIES:
1328
+ # For parallel tasks, use time span (max completion - min start)
1329
+ category_start_times = [task_info.added_time for task_info in tasks.values()]
1330
+ category_completion_times = [
1331
+ task_info.completed_time for task_info in tasks.values()
1332
+ if task_info.completed_time > 0
1333
+ ]
1334
+ if category_start_times and category_completion_times:
1335
+ return max(category_completion_times) - min(category_start_times)
1336
+ else:
1337
+ return 0.0
1338
+ else:
1339
+ # For sequential tasks, sum individual durations
1340
+ return sum(task_info.get_duration() for task_info in tasks.values())
1341
+
1104
1342
 
1105
1343
  def create_progress(description: str = "", success_message: str = "", failure_message: str = "",
1106
- leading_newline: bool = False, trailing_newline: bool = False):
1344
+ leading_newline: bool = False, trailing_newline: bool = False, show_duration_summary: bool = True):
1107
1345
  """Factory function to create the appropriate progress component based on environment.
1108
1346
 
1109
- Automatically detects if we're in a Snowflake notebook or similar environment
1347
+ Automatically detects if we're in a notebook environment (Snowflake, Jupyter, etc.)
1110
1348
  and returns the appropriate progress class.
1111
1349
  """
1112
- from ..environments import runtime_env, SnowbookEnvironment
1350
+ from ..environments import runtime_env, SnowbookEnvironment, JupyterEnvironment
1113
1351
 
1114
- if isinstance(runtime_env, SnowbookEnvironment):
1115
- # Use NotebookTaskProgress for Snowflake notebooks
1352
+ if isinstance(runtime_env, (SnowbookEnvironment, JupyterEnvironment)):
1353
+ # Use NotebookTaskProgress for Snowflake and Jupyter notebooks
1116
1354
  return NotebookTaskProgress(
1117
1355
  description=description,
1118
1356
  success_message=success_message,
1119
1357
  failure_message=failure_message,
1120
1358
  leading_newline=leading_newline,
1121
- trailing_newline=trailing_newline
1359
+ trailing_newline=trailing_newline,
1360
+ show_duration_summary=show_duration_summary
1122
1361
  )
1123
1362
  else:
1124
- # Use TaskProgress for other environments
1363
+ # Use TaskProgress for other environments (terminal, CI, etc.)
1125
1364
  return TaskProgress(
1126
1365
  description=description,
1127
1366
  success_message=success_message,
1128
1367
  failure_message=failure_message,
1129
1368
  leading_newline=leading_newline,
1130
- trailing_newline=trailing_newline
1369
+ trailing_newline=trailing_newline,
1370
+ show_duration_summary=show_duration_summary
1131
1371
  )
1132
1372
 
1133
1373
 
@@ -1166,14 +1406,24 @@ class NotebookTaskProgress:
1166
1406
  failure_message: str = "",
1167
1407
  leading_newline: bool = False,
1168
1408
  trailing_newline: bool = False,
1409
+ show_duration_summary: bool = True,
1169
1410
  ):
1170
1411
  self.description = description
1171
1412
  self.success_message = success_message
1172
1413
  self.failure_message = failure_message
1173
1414
  self.leading_newline = leading_newline
1174
1415
  self.trailing_newline = trailing_newline
1416
+ self.show_duration_summary = show_duration_summary
1175
1417
 
1176
- self.spinner_generator = itertools.cycle(["▰▱▱▱", "▰▰▱▱", "▰▰▰▱", "▰▰▰▰", "▱▰▰▰", "▱▱▰▰", "▱▱▱▰", "▱▱▱▱"])
1418
+ # Task management - unified data structure
1419
+ self._tasks = {} # task_id -> TaskInfo
1420
+ self._next_task_id = 1
1421
+
1422
+ # Overall process timing
1423
+ self._process_start_time = None
1424
+ self._process_end_time = None
1425
+
1426
+ self.spinner_generator = itertools.cycle(SPINNER_FRAMES)
1177
1427
 
1178
1428
  # Environment detection for notebook environments only
1179
1429
  self.is_snowflake_notebook = isinstance(runtime_env, SnowbookEnvironment)
@@ -1188,11 +1438,15 @@ class NotebookTaskProgress:
1188
1438
  self._update_lock = threading.Lock()
1189
1439
 
1190
1440
  # Add sub-task support for TaskProgress compatibility
1191
- self._tasks = {} # Use same data structure as TaskProgress
1192
- self._next_task_id = 1
1441
+ # Note: _tasks and _next_task_id already initialized above (lines 1393-1394)
1193
1442
  self.main_completed = False
1194
1443
  self.spinner_thread = None
1195
1444
  self._current_subtask = ""
1445
+ self.busy = False # Initialize busy state
1446
+
1447
+ # Timer manager for delayed operations
1448
+ self._timer_manager = _TimerManager(self)
1449
+
1196
1450
 
1197
1451
  def _generate_task_id(self) -> str:
1198
1452
  """Generate a unique task ID."""
@@ -1219,7 +1473,7 @@ class NotebookTaskProgress:
1219
1473
  def get_message(self, starting=False):
1220
1474
  """Get the current message with spinner - notebook environments only."""
1221
1475
  # For notebook environments, use a reasonable default width
1222
- max_width = 80 # Default width for notebooks
1476
+ max_width = DEFAULT_TERMINAL_WIDTH
1223
1477
  try:
1224
1478
  max_width = shutil.get_terminal_size().columns
1225
1479
  except (OSError, AttributeError):
@@ -1264,7 +1518,7 @@ class NotebookTaskProgress:
1264
1518
  diff = width(self.last_message) - width(rich_string)
1265
1519
 
1266
1520
  sys.stdout.write("\r") # Move to beginning
1267
- sys.stdout.write(" " * 80) # Clear with spaces (same as Spinner)
1521
+ sys.stdout.write(" " * DEFAULT_TERMINAL_WIDTH) # Clear with spaces
1268
1522
  sys.stdout.write("\r") # Move back to beginning
1269
1523
 
1270
1524
  sys.stdout.write(message + (" " * diff)) # Write text directly
@@ -1298,12 +1552,13 @@ class NotebookTaskProgress:
1298
1552
  self._current_display = subtask_text
1299
1553
  # The spinner will now show the subtask instead of main task
1300
1554
 
1301
- def add_sub_task(self, description: str, task_id: str | None = None) -> str:
1555
+ def add_sub_task(self, description: str, task_id: str | None = None, category: str = "general") -> str:
1302
1556
  """Add a new sub-task and return its unique ID.
1303
1557
 
1304
1558
  Args:
1305
1559
  description: Description of the subtask
1306
1560
  task_id: Optional custom task ID, if not provided one will be generated
1561
+ category: Category for this task (e.g., "indexing", "provisioning", "change_tracking")
1307
1562
 
1308
1563
  Returns:
1309
1564
  str: The task ID for this subtask
@@ -1312,7 +1567,7 @@ class NotebookTaskProgress:
1312
1567
  task_id = self._generate_task_id()
1313
1568
 
1314
1569
  if task_id not in self._tasks:
1315
- self._tasks[task_id] = TaskInfo(description=description)
1570
+ self._tasks[task_id] = TaskInfo(description=description, category=category)
1316
1571
 
1317
1572
  # Show the subtask by updating the main task text
1318
1573
  self._update_subtask_display(description)
@@ -1326,9 +1581,12 @@ class NotebookTaskProgress:
1326
1581
  # Show the updated subtask by updating the main task text
1327
1582
  self._update_subtask_display(description)
1328
1583
 
1329
- def complete_sub_task(self, task_id: str) -> None:
1584
+ def complete_sub_task(self, task_id: str, record_time: bool = True) -> None:
1330
1585
  """Complete a sub-task by marking it as done."""
1331
1586
  if task_id in self._tasks:
1587
+ # Record completion time (only if not already completed and record_time is True)
1588
+ if not self._tasks[task_id].completed and record_time:
1589
+ self._tasks[task_id].completed_time = time.time()
1332
1590
  self._tasks[task_id].completed = True
1333
1591
 
1334
1592
  # Clear the subtask display when completed
@@ -1336,8 +1594,9 @@ class NotebookTaskProgress:
1336
1594
  self._current_display = ""
1337
1595
  # The spinner will now show the main task again
1338
1596
 
1339
- # Remove completed task immediately (no delay needed in notebooks)
1340
- del self._tasks[task_id]
1597
+ # Schedule hiding the task from display after a short delay
1598
+ # but keep it in the data structure for summary generation
1599
+ self._timer_manager.schedule_task_hiding(task_id)
1341
1600
 
1342
1601
  def remove_sub_task(self, task_id: str, animate: bool = True) -> None:
1343
1602
  """Remove a sub-task by ID."""
@@ -1406,17 +1665,18 @@ class NotebookTaskProgress:
1406
1665
  self._current_display = ""
1407
1666
 
1408
1667
  def __enter__(self):
1409
- if self.leading_newline:
1668
+ # Skip leading newline for Jupyter - it interferes with IPython display
1669
+ if self.leading_newline and not self.is_jupyter:
1410
1670
  rich.print()
1411
1671
  self.update(starting=True)
1412
1672
  # return control to the event loop briefly so stdout can be sure to flush:
1413
1673
  if self.delay:
1414
- time.sleep(0.25)
1674
+ time.sleep(INITIAL_DISPLAY_DELAY)
1415
1675
  self.reset_cursor()
1416
1676
  if not self.delay:
1417
1677
  return self
1418
1678
  self.busy = True
1419
- threading.Thread(target=self.spinner_task).start()
1679
+ threading.Thread(target=self.spinner_task, daemon=True).start()
1420
1680
  return self
1421
1681
 
1422
1682
  def __exit__(self, exc_type, exc_val, exc_tb):
@@ -1424,21 +1684,146 @@ class NotebookTaskProgress:
1424
1684
  if exc_type is not None:
1425
1685
  if self.failure_message is not None:
1426
1686
  self.update(f"{self.failure_message} {exc_val}", file=sys.stderr)
1427
- # Use rich.print with explicit newline to ensure proper formatting
1428
- rich.print(file=sys.stderr)
1687
+ # For non-Jupyter, add newline to ensure proper formatting
1688
+ # For Jupyter, IPython display handles formatting
1689
+ if not self.is_jupyter:
1690
+ rich.print(file=sys.stderr)
1429
1691
  return True
1430
1692
  return False
1431
1693
  if self.delay: # will be None for non-interactive environments
1432
1694
  time.sleep(self.delay)
1433
- self.reset_cursor()
1695
+
1696
+ # Generate summary BEFORE clearing the spinner line (so we have timing data)
1697
+ # Only generate if show_duration_summary flag is True
1698
+ summary = self.generate_summary() if self.show_duration_summary else ""
1699
+
1700
+ # Clear the spinner line completely
1701
+ self._clear_spinner_line()
1702
+
1434
1703
  if self.success_message != "":
1435
1704
  final_message = f"{SUCCESS_ICON} {self.success_message}"
1436
- self.update(final_message)
1437
- # Use rich.print with explicit newline to ensure proper formatting
1438
- rich.print()
1705
+ # For Jupyter, use update() to properly handle IPython display
1706
+ # For Snowflake, use print() to get a new line
1707
+ if self.is_jupyter:
1708
+ self.update(final_message)
1709
+ else:
1710
+ # Print the success message on a clean line
1711
+ print(final_message)
1439
1712
  elif self.success_message == "":
1440
- self.update("")
1441
- self.reset_cursor()
1442
- if self.trailing_newline:
1713
+ # When there's no success message, clear the display for notebooks
1714
+ # The summary will be printed below if available
1715
+ if self.is_jupyter:
1716
+ self.update("")
1717
+ # For non-Jupyter notebooks, _clear_spinner_line() already handled it
1718
+
1719
+ # Print summary if there are completed tasks
1720
+ if summary:
1721
+ # For all notebook environments: display was cleared above, now print summary
1722
+ print()
1723
+ print(summary.strip()) # Summary includes visual separator line
1724
+
1725
+ # Skip trailing newline for Jupyter - it interferes with IPython display
1726
+ if self.trailing_newline and not self.is_jupyter:
1443
1727
  rich.print()
1444
1728
  return True
1729
+
1730
+ def _clear_spinner_line(self):
1731
+ """Clear the current spinner line completely."""
1732
+ # Skip clearing for Jupyter notebooks - IPython display handles it
1733
+ if self.is_jupyter:
1734
+ return
1735
+
1736
+ # Write enough spaces to clear any content, then move to start of line
1737
+ terminal_width = DEFAULT_TERMINAL_WIDTH
1738
+ try:
1739
+ terminal_width = shutil.get_terminal_size().columns
1740
+ except (OSError, AttributeError):
1741
+ pass
1742
+
1743
+ # Clear with spaces, carriage return, and newline to ensure we're on a fresh line
1744
+ sys.stdout.write("\r" + " " * terminal_width + "\r\n")
1745
+ sys.stdout.flush()
1746
+
1747
+ def set_process_start_time(self) -> None:
1748
+ """Set the overall process start time."""
1749
+ self._process_start_time = time.time()
1750
+
1751
+ def set_process_end_time(self) -> None:
1752
+ """Set the overall process end time."""
1753
+ self._process_end_time = time.time()
1754
+
1755
+ def get_total_duration(self) -> float:
1756
+ """Get the total duration from first task added to last task completed."""
1757
+ if not self._tasks:
1758
+ return 0.0
1759
+
1760
+ completed_tasks = self.get_completed_tasks()
1761
+ if not completed_tasks:
1762
+ return 0.0
1763
+
1764
+ # Find earliest start time and latest completion time
1765
+ start_times = [task.added_time for task in self._tasks.values()]
1766
+ completion_times = [task.completed_time for task in completed_tasks.values() if task.completed_time > 0]
1767
+
1768
+ if not start_times or not completion_times:
1769
+ return 0.0
1770
+
1771
+ earliest_start = min(start_times)
1772
+ latest_completion = max(completion_times)
1773
+
1774
+ return latest_completion - earliest_start
1775
+
1776
+ def generate_summary(self, categories: dict[str, str] | None = None) -> str:
1777
+ """Generate a summary of completed tasks by category."""
1778
+ if categories is None:
1779
+ categories = DEFAULT_SUMMARY_CATEGORIES
1780
+
1781
+ # Get completed tasks by category and calculate durations
1782
+ category_durations = {}
1783
+ for category_name in categories:
1784
+ tasks = self.get_completed_tasks_by_category(category_name)
1785
+ category_durations[category_name] = _calculate_category_duration(category_name, tasks)
1786
+
1787
+ # If there's nothing meaningful to show, return empty string
1788
+ if not any(category_durations.values()):
1789
+ return ""
1790
+
1791
+ # Generate summary lines with proper alignment
1792
+ summary_lines = []
1793
+ label_width = 30 # Width for category labels
1794
+ time_width = 10 # Width for time column (right-aligned)
1795
+
1796
+ # Add total time FIRST (at the top) - align with arrow lines
1797
+ total_duration = self.get_total_duration()
1798
+ if total_duration > 0:
1799
+ formatted_total = format_duration(total_duration)
1800
+ # Use the same format as arrow lines but with a different prefix
1801
+ # This ensures perfect alignment with the time column
1802
+ summary_lines.append(f" {INITIALIZATION_COMPLETED_TEXT:<{label_width-1}} {formatted_total:>{time_width}}")
1803
+
1804
+ # Add category breakdown
1805
+ category_lines = []
1806
+ for category_name, display_name in categories.items():
1807
+ duration = category_durations[category_name]
1808
+ if duration > MIN_CATEGORY_DURATION_SECONDS: # Only show significant durations
1809
+ formatted_duration = format_duration(duration)
1810
+ # Use arrow for visual consistency with right-aligned time
1811
+ category_lines.append(f" {ARROW} {display_name:<{label_width-4}} {formatted_duration:>{time_width}}")
1812
+
1813
+ # Only add category lines if there are any
1814
+ if category_lines:
1815
+ summary_lines.extend(category_lines)
1816
+
1817
+ # Add a visual separator line for Snowflake notebook environment
1818
+ summary_lines.append("─" * SEPARATOR_WIDTH)
1819
+
1820
+ return "\n".join(summary_lines) + "\n"
1821
+
1822
+ def get_completed_tasks(self) -> dict[str, TaskInfo]:
1823
+ """Get all completed tasks with their timing information."""
1824
+ return {task_id: task_info for task_id, task_info in self._tasks.items() if task_info.completed}
1825
+
1826
+ def get_completed_tasks_by_category(self, category: str) -> dict[str, TaskInfo]:
1827
+ """Get all completed tasks for a specific category."""
1828
+ return {task_id: task_info for task_id, task_info in self._tasks.items()
1829
+ if task_info.category == category and task_info.completed}