pomera-ai-commander 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +680 -0
- package/bin/pomera-ai-commander.js +62 -0
- package/core/__init__.py +66 -0
- package/core/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/__pycache__/app_context.cpython-313.pyc +0 -0
- package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
- package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
- package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
- package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/error_service.cpython-313.pyc +0 -0
- package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
- package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
- package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
- package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
- package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
- package/core/app_context.py +482 -0
- package/core/async_text_processor.py +422 -0
- package/core/backup_manager.py +656 -0
- package/core/backup_recovery_manager.py +1034 -0
- package/core/content_hash_cache.py +509 -0
- package/core/context_menu.py +313 -0
- package/core/data_validator.py +1067 -0
- package/core/database_connection_manager.py +745 -0
- package/core/database_curl_settings_manager.py +609 -0
- package/core/database_promera_ai_settings_manager.py +447 -0
- package/core/database_schema.py +412 -0
- package/core/database_schema_manager.py +396 -0
- package/core/database_settings_manager.py +1508 -0
- package/core/database_settings_manager_interface.py +457 -0
- package/core/dialog_manager.py +735 -0
- package/core/efficient_line_numbers.py +511 -0
- package/core/error_handler.py +747 -0
- package/core/error_service.py +431 -0
- package/core/event_consolidator.py +512 -0
- package/core/mcp/__init__.py +43 -0
- package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
- package/core/mcp/protocol.py +288 -0
- package/core/mcp/schema.py +251 -0
- package/core/mcp/server_stdio.py +299 -0
- package/core/mcp/tool_registry.py +2345 -0
- package/core/memory_efficient_text_widget.py +712 -0
- package/core/migration_manager.py +915 -0
- package/core/migration_test_suite.py +1086 -0
- package/core/migration_validator.py +1144 -0
- package/core/optimized_find_replace.py +715 -0
- package/core/optimized_pattern_engine.py +424 -0
- package/core/optimized_search_highlighter.py +553 -0
- package/core/performance_monitor.py +675 -0
- package/core/persistence_manager.py +713 -0
- package/core/progressive_stats_calculator.py +632 -0
- package/core/regex_pattern_cache.py +530 -0
- package/core/regex_pattern_library.py +351 -0
- package/core/search_operation_manager.py +435 -0
- package/core/settings_defaults_registry.py +1087 -0
- package/core/settings_integrity_validator.py +1112 -0
- package/core/settings_serializer.py +558 -0
- package/core/settings_validator.py +1824 -0
- package/core/smart_stats_calculator.py +710 -0
- package/core/statistics_update_manager.py +619 -0
- package/core/stats_config_manager.py +858 -0
- package/core/streaming_text_handler.py +723 -0
- package/core/task_scheduler.py +596 -0
- package/core/update_pattern_library.py +169 -0
- package/core/visibility_monitor.py +596 -0
- package/core/widget_cache.py +498 -0
- package/mcp.json +61 -0
- package/package.json +57 -0
- package/pomera.py +7483 -0
- package/pomera_mcp_server.py +144 -0
- package/tools/__init__.py +5 -0
- package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
- package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
- package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
- package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
- package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
- package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
- package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
- package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
- package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
- package/tools/ai_tools.py +2892 -0
- package/tools/ascii_art_generator.py +353 -0
- package/tools/base64_tools.py +184 -0
- package/tools/base_tool.py +511 -0
- package/tools/case_tool.py +309 -0
- package/tools/column_tools.py +396 -0
- package/tools/cron_tool.py +885 -0
- package/tools/curl_history.py +601 -0
- package/tools/curl_processor.py +1208 -0
- package/tools/curl_settings.py +503 -0
- package/tools/curl_tool.py +5467 -0
- package/tools/diff_viewer.py +1072 -0
- package/tools/email_extraction_tool.py +249 -0
- package/tools/email_header_analyzer.py +426 -0
- package/tools/extraction_tools.py +250 -0
- package/tools/find_replace.py +1751 -0
- package/tools/folder_file_reporter.py +1463 -0
- package/tools/folder_file_reporter_adapter.py +480 -0
- package/tools/generator_tools.py +1217 -0
- package/tools/hash_generator.py +256 -0
- package/tools/html_tool.py +657 -0
- package/tools/huggingface_helper.py +449 -0
- package/tools/jsonxml_tool.py +730 -0
- package/tools/line_tools.py +419 -0
- package/tools/list_comparator.py +720 -0
- package/tools/markdown_tools.py +562 -0
- package/tools/mcp_widget.py +1417 -0
- package/tools/notes_widget.py +973 -0
- package/tools/number_base_converter.py +373 -0
- package/tools/regex_extractor.py +572 -0
- package/tools/slug_generator.py +311 -0
- package/tools/sorter_tools.py +459 -0
- package/tools/string_escape_tool.py +393 -0
- package/tools/text_statistics_tool.py +366 -0
- package/tools/text_wrapper.py +431 -0
- package/tools/timestamp_converter.py +422 -0
- package/tools/tool_loader.py +710 -0
- package/tools/translator_tools.py +523 -0
- package/tools/url_link_extractor.py +262 -0
- package/tools/url_parser.py +205 -0
- package/tools/whitespace_tools.py +356 -0
- package/tools/word_frequency_counter.py +147 -0
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Progressive Statistics Calculator for Pomera AI Commander.
|
|
3
|
+
|
|
4
|
+
This module provides progressive statistics calculation for large text content
|
|
5
|
+
without blocking the UI. It implements chunked processing, cancellable calculations,
|
|
6
|
+
and progress indicators for long-running operations.
|
|
7
|
+
|
|
8
|
+
Requirements addressed:
|
|
9
|
+
- 5.1: Calculate statistics in chunks for text exceeding 50,000 characters
|
|
10
|
+
- 5.2: Yield control to UI thread periodically during calculations
|
|
11
|
+
- 5.3: Show processing indicator for calculations taking longer than 100ms
|
|
12
|
+
- 5.4: Cancel and restart calculations when user continues typing
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
import threading
|
|
17
|
+
import hashlib
|
|
18
|
+
from typing import Optional, Callable, Dict, Any, List
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from enum import Enum
|
|
21
|
+
import re
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CalculationStatus(Enum):
|
|
25
|
+
"""Status of a progressive calculation."""
|
|
26
|
+
PENDING = "pending"
|
|
27
|
+
RUNNING = "running"
|
|
28
|
+
COMPLETED = "completed"
|
|
29
|
+
CANCELLED = "cancelled"
|
|
30
|
+
FAILED = "failed"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ProgressInfo:
|
|
35
|
+
"""Information about calculation progress."""
|
|
36
|
+
calculation_id: str
|
|
37
|
+
status: CalculationStatus
|
|
38
|
+
progress_percent: float = 0.0
|
|
39
|
+
chunks_processed: int = 0
|
|
40
|
+
total_chunks: int = 0
|
|
41
|
+
elapsed_time_ms: float = 0.0
|
|
42
|
+
estimated_remaining_ms: float = 0.0
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def is_complete(self) -> bool:
|
|
46
|
+
"""Check if calculation is complete."""
|
|
47
|
+
return self.status in [CalculationStatus.COMPLETED, CalculationStatus.CANCELLED, CalculationStatus.FAILED]
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def should_show_indicator(self) -> bool:
|
|
51
|
+
"""Check if progress indicator should be shown (>100ms)."""
|
|
52
|
+
return self.elapsed_time_ms > 100.0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class TextStats:
|
|
57
|
+
"""Text statistics result."""
|
|
58
|
+
char_count: int = 0
|
|
59
|
+
word_count: int = 0
|
|
60
|
+
sentence_count: int = 0
|
|
61
|
+
line_count: int = 0
|
|
62
|
+
token_count: int = 0
|
|
63
|
+
|
|
64
|
+
# Metadata
|
|
65
|
+
content_hash: str = ""
|
|
66
|
+
calculation_time_ms: float = 0.0
|
|
67
|
+
chunk_processed: bool = False
|
|
68
|
+
processing_method: str = "full"
|
|
69
|
+
|
|
70
|
+
def to_status_string(self) -> str:
|
|
71
|
+
"""Convert to status bar string format."""
|
|
72
|
+
formatted_bytes = self._format_bytes(self.char_count)
|
|
73
|
+
return f"Bytes: {formatted_bytes} | Word: {self.word_count} | Sentence: {self.sentence_count} | Line: {self.line_count} | Tokens: {self.token_count}"
|
|
74
|
+
|
|
75
|
+
def _format_bytes(self, byte_count):
|
|
76
|
+
"""Format byte count with K/M suffixes for readability."""
|
|
77
|
+
if byte_count >= 1000000:
|
|
78
|
+
value = byte_count / 1000000
|
|
79
|
+
formatted = f"{value:.1f}M"
|
|
80
|
+
elif byte_count >= 1000:
|
|
81
|
+
value = byte_count / 1000
|
|
82
|
+
if round(value, 1) >= 1000:
|
|
83
|
+
formatted = f"{value / 1000:.1f}M"
|
|
84
|
+
else:
|
|
85
|
+
formatted = f"{value:.1f}K"
|
|
86
|
+
else:
|
|
87
|
+
return str(byte_count)
|
|
88
|
+
|
|
89
|
+
return formatted.rstrip('0').rstrip('.')
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class CalculationTask:
|
|
94
|
+
"""A calculation task with cancellation support."""
|
|
95
|
+
calculation_id: str
|
|
96
|
+
text: str
|
|
97
|
+
chunk_size: int
|
|
98
|
+
callback: Optional[Callable]
|
|
99
|
+
progress_callback: Optional[Callable]
|
|
100
|
+
start_time: float = field(default_factory=time.time)
|
|
101
|
+
cancelled: bool = False
|
|
102
|
+
|
|
103
|
+
def cancel(self):
|
|
104
|
+
"""Cancel this calculation."""
|
|
105
|
+
self.cancelled = True
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class ProgressiveStatsCalculator:
|
|
109
|
+
"""
|
|
110
|
+
Progressive statistics calculator for handling large text content without blocking UI.
|
|
111
|
+
|
|
112
|
+
This calculator implements chunked processing for text exceeding 50,000 characters,
|
|
113
|
+
yields control to the UI thread periodically, provides cancellable calculations,
|
|
114
|
+
and shows processing indicators for long-running operations.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, chunk_size: int = 10000, progress_indicator_threshold_ms: float = 100.0):
|
|
118
|
+
"""
|
|
119
|
+
Initialize the progressive statistics calculator.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
chunk_size: Size of text chunks for processing (default: 10,000 characters)
|
|
123
|
+
progress_indicator_threshold_ms: Threshold for showing progress indicator (default: 100ms)
|
|
124
|
+
"""
|
|
125
|
+
self.chunk_size = chunk_size
|
|
126
|
+
self.progress_indicator_threshold_ms = progress_indicator_threshold_ms
|
|
127
|
+
|
|
128
|
+
# Active calculations
|
|
129
|
+
self.active_calculations: Dict[str, CalculationTask] = {}
|
|
130
|
+
self.calculation_lock = threading.RLock()
|
|
131
|
+
|
|
132
|
+
# Regex patterns (compiled once for performance)
|
|
133
|
+
self.word_pattern = re.compile(r'\b\w+\b')
|
|
134
|
+
self.sentence_pattern = re.compile(r'[.!?]+')
|
|
135
|
+
|
|
136
|
+
# Statistics
|
|
137
|
+
self.stats = {
|
|
138
|
+
'total_calculations': 0,
|
|
139
|
+
'progressive_calculations': 0,
|
|
140
|
+
'cancelled_calculations': 0,
|
|
141
|
+
'completed_calculations': 0,
|
|
142
|
+
'total_processing_time_ms': 0.0
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
def calculate_progressive(self,
|
|
146
|
+
text: str,
|
|
147
|
+
callback: Optional[Callable[[TextStats], None]] = None,
|
|
148
|
+
progress_callback: Optional[Callable[[ProgressInfo], None]] = None,
|
|
149
|
+
widget_id: Optional[str] = None) -> str:
|
|
150
|
+
"""
|
|
151
|
+
Calculate statistics progressively for large text content.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
text: Text content to analyze
|
|
155
|
+
callback: Optional callback to receive final results
|
|
156
|
+
progress_callback: Optional callback to receive progress updates
|
|
157
|
+
widget_id: Optional widget identifier for tracking
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Calculation ID for tracking and cancellation
|
|
161
|
+
"""
|
|
162
|
+
# Track total calculations
|
|
163
|
+
self.stats['total_calculations'] += 1
|
|
164
|
+
|
|
165
|
+
# Generate calculation ID
|
|
166
|
+
calculation_id = self._generate_calculation_id(text, widget_id)
|
|
167
|
+
|
|
168
|
+
# Cancel any existing calculation for this widget
|
|
169
|
+
if widget_id:
|
|
170
|
+
self.cancel_calculation_for_widget(widget_id)
|
|
171
|
+
|
|
172
|
+
# Check if text is large enough to require progressive calculation
|
|
173
|
+
text_length = len(text)
|
|
174
|
+
|
|
175
|
+
if text_length < 50000:
|
|
176
|
+
# Small text - calculate immediately
|
|
177
|
+
stats = self._calculate_stats_fast(text)
|
|
178
|
+
self.stats['completed_calculations'] += 1
|
|
179
|
+
if callback:
|
|
180
|
+
callback(stats)
|
|
181
|
+
return calculation_id
|
|
182
|
+
|
|
183
|
+
# Large text - use progressive calculation
|
|
184
|
+
self.stats['progressive_calculations'] += 1
|
|
185
|
+
|
|
186
|
+
# Create calculation task
|
|
187
|
+
task = CalculationTask(
|
|
188
|
+
calculation_id=calculation_id,
|
|
189
|
+
text=text,
|
|
190
|
+
chunk_size=self.chunk_size,
|
|
191
|
+
callback=callback,
|
|
192
|
+
progress_callback=progress_callback
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
with self.calculation_lock:
|
|
196
|
+
self.active_calculations[calculation_id] = task
|
|
197
|
+
|
|
198
|
+
# Start calculation in background thread
|
|
199
|
+
thread = threading.Thread(
|
|
200
|
+
target=self._calculate_progressive_impl,
|
|
201
|
+
args=(task,),
|
|
202
|
+
daemon=True
|
|
203
|
+
)
|
|
204
|
+
thread.start()
|
|
205
|
+
|
|
206
|
+
return calculation_id
|
|
207
|
+
|
|
208
|
+
def _calculate_progressive_impl(self, task: CalculationTask) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Internal implementation of progressive calculation.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
task: Calculation task to execute
|
|
214
|
+
"""
|
|
215
|
+
start_time = time.time()
|
|
216
|
+
text = task.text
|
|
217
|
+
text_length = len(text)
|
|
218
|
+
|
|
219
|
+
# Calculate number of chunks
|
|
220
|
+
total_chunks = (text_length + task.chunk_size - 1) // task.chunk_size
|
|
221
|
+
|
|
222
|
+
# Initialize accumulators
|
|
223
|
+
total_char_count = 0
|
|
224
|
+
total_word_count = 0
|
|
225
|
+
total_sentence_count = 0
|
|
226
|
+
total_line_count = 0
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
# Process text in chunks
|
|
230
|
+
for chunk_idx in range(total_chunks):
|
|
231
|
+
# Check if cancelled
|
|
232
|
+
if task.cancelled:
|
|
233
|
+
self._handle_cancellation(task)
|
|
234
|
+
return
|
|
235
|
+
|
|
236
|
+
# Calculate chunk boundaries
|
|
237
|
+
start_pos = chunk_idx * task.chunk_size
|
|
238
|
+
end_pos = min(start_pos + task.chunk_size, text_length)
|
|
239
|
+
chunk = text[start_pos:end_pos]
|
|
240
|
+
|
|
241
|
+
# Process chunk
|
|
242
|
+
chunk_stats = self._process_chunk(chunk, start_pos, end_pos, text_length)
|
|
243
|
+
|
|
244
|
+
# Accumulate results
|
|
245
|
+
total_char_count += chunk_stats['char_count']
|
|
246
|
+
total_word_count += chunk_stats['word_count']
|
|
247
|
+
total_sentence_count += chunk_stats['sentence_count']
|
|
248
|
+
total_line_count += chunk_stats['line_count']
|
|
249
|
+
|
|
250
|
+
# Calculate progress
|
|
251
|
+
chunks_processed = chunk_idx + 1
|
|
252
|
+
progress_percent = (chunks_processed / total_chunks) * 100.0
|
|
253
|
+
elapsed_time_ms = (time.time() - start_time) * 1000.0
|
|
254
|
+
|
|
255
|
+
# Estimate remaining time
|
|
256
|
+
if chunks_processed > 0:
|
|
257
|
+
avg_time_per_chunk = elapsed_time_ms / chunks_processed
|
|
258
|
+
remaining_chunks = total_chunks - chunks_processed
|
|
259
|
+
estimated_remaining_ms = avg_time_per_chunk * remaining_chunks
|
|
260
|
+
else:
|
|
261
|
+
estimated_remaining_ms = 0.0
|
|
262
|
+
|
|
263
|
+
# Send progress update
|
|
264
|
+
if task.progress_callback and elapsed_time_ms > self.progress_indicator_threshold_ms:
|
|
265
|
+
progress_info = ProgressInfo(
|
|
266
|
+
calculation_id=task.calculation_id,
|
|
267
|
+
status=CalculationStatus.RUNNING,
|
|
268
|
+
progress_percent=progress_percent,
|
|
269
|
+
chunks_processed=chunks_processed,
|
|
270
|
+
total_chunks=total_chunks,
|
|
271
|
+
elapsed_time_ms=elapsed_time_ms,
|
|
272
|
+
estimated_remaining_ms=estimated_remaining_ms
|
|
273
|
+
)
|
|
274
|
+
task.progress_callback(progress_info)
|
|
275
|
+
|
|
276
|
+
# Yield control to UI thread periodically (every 2 chunks)
|
|
277
|
+
if chunk_idx % 2 == 0:
|
|
278
|
+
time.sleep(0.001) # Small sleep to yield control
|
|
279
|
+
|
|
280
|
+
# Calculation complete
|
|
281
|
+
calculation_time_ms = (time.time() - start_time) * 1000.0
|
|
282
|
+
|
|
283
|
+
# Create final stats
|
|
284
|
+
stats = TextStats(
|
|
285
|
+
char_count=total_char_count,
|
|
286
|
+
word_count=total_word_count,
|
|
287
|
+
sentence_count=total_sentence_count,
|
|
288
|
+
line_count=total_line_count,
|
|
289
|
+
token_count=max(1, round(text_length / 4)),
|
|
290
|
+
content_hash=self._generate_content_hash(text),
|
|
291
|
+
calculation_time_ms=calculation_time_ms,
|
|
292
|
+
chunk_processed=True,
|
|
293
|
+
processing_method="progressive"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Send final result
|
|
297
|
+
if task.callback:
|
|
298
|
+
task.callback(stats)
|
|
299
|
+
|
|
300
|
+
# Update statistics
|
|
301
|
+
self.stats['completed_calculations'] += 1
|
|
302
|
+
self.stats['total_processing_time_ms'] += calculation_time_ms
|
|
303
|
+
|
|
304
|
+
# Send completion progress update
|
|
305
|
+
if task.progress_callback:
|
|
306
|
+
progress_info = ProgressInfo(
|
|
307
|
+
calculation_id=task.calculation_id,
|
|
308
|
+
status=CalculationStatus.COMPLETED,
|
|
309
|
+
progress_percent=100.0,
|
|
310
|
+
chunks_processed=total_chunks,
|
|
311
|
+
total_chunks=total_chunks,
|
|
312
|
+
elapsed_time_ms=calculation_time_ms,
|
|
313
|
+
estimated_remaining_ms=0.0
|
|
314
|
+
)
|
|
315
|
+
task.progress_callback(progress_info)
|
|
316
|
+
|
|
317
|
+
except Exception as e:
|
|
318
|
+
# Handle calculation error
|
|
319
|
+
print(f"Error in progressive calculation: {e}")
|
|
320
|
+
|
|
321
|
+
if task.progress_callback:
|
|
322
|
+
progress_info = ProgressInfo(
|
|
323
|
+
calculation_id=task.calculation_id,
|
|
324
|
+
status=CalculationStatus.FAILED,
|
|
325
|
+
progress_percent=0.0,
|
|
326
|
+
chunks_processed=0,
|
|
327
|
+
total_chunks=total_chunks,
|
|
328
|
+
elapsed_time_ms=(time.time() - start_time) * 1000.0,
|
|
329
|
+
estimated_remaining_ms=0.0
|
|
330
|
+
)
|
|
331
|
+
task.progress_callback(progress_info)
|
|
332
|
+
|
|
333
|
+
finally:
|
|
334
|
+
# Clean up
|
|
335
|
+
with self.calculation_lock:
|
|
336
|
+
self.active_calculations.pop(task.calculation_id, None)
|
|
337
|
+
|
|
338
|
+
def _process_chunk(self, chunk: str, start_pos: int, end_pos: int, total_length: int) -> Dict[str, int]:
|
|
339
|
+
"""
|
|
340
|
+
Process a single chunk of text.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
chunk: Text chunk to process
|
|
344
|
+
start_pos: Start position in original text
|
|
345
|
+
end_pos: End position in original text
|
|
346
|
+
total_length: Total length of original text
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Dictionary with chunk statistics
|
|
350
|
+
"""
|
|
351
|
+
# Character count (bytes)
|
|
352
|
+
char_count = len(chunk.encode('utf-8'))
|
|
353
|
+
|
|
354
|
+
# Word count
|
|
355
|
+
words = self.word_pattern.findall(chunk)
|
|
356
|
+
word_count = len(words)
|
|
357
|
+
|
|
358
|
+
# Sentence count
|
|
359
|
+
sentences = self.sentence_pattern.findall(chunk)
|
|
360
|
+
sentence_count = len(sentences)
|
|
361
|
+
|
|
362
|
+
# Line count
|
|
363
|
+
line_count = chunk.count('\n')
|
|
364
|
+
|
|
365
|
+
# Adjust counts for chunk boundaries
|
|
366
|
+
# Only count full lines for first and last chunks
|
|
367
|
+
if start_pos == 0 and chunk.strip():
|
|
368
|
+
line_count += 1 # Add first line
|
|
369
|
+
|
|
370
|
+
return {
|
|
371
|
+
'char_count': char_count,
|
|
372
|
+
'word_count': word_count,
|
|
373
|
+
'sentence_count': sentence_count,
|
|
374
|
+
'line_count': line_count
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
def _calculate_stats_fast(self, text: str) -> TextStats:
|
|
378
|
+
"""
|
|
379
|
+
Fast calculation for small text content.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
text: Text to analyze
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
TextStats object
|
|
386
|
+
"""
|
|
387
|
+
start_time = time.time()
|
|
388
|
+
|
|
389
|
+
# Basic statistics
|
|
390
|
+
char_count = len(text.encode('utf-8'))
|
|
391
|
+
line_count = text.count('\n') + (1 if text.strip() else 0)
|
|
392
|
+
|
|
393
|
+
# Word statistics
|
|
394
|
+
words = self.word_pattern.findall(text)
|
|
395
|
+
word_count = len(words)
|
|
396
|
+
|
|
397
|
+
# Sentence statistics
|
|
398
|
+
sentences = self.sentence_pattern.findall(text)
|
|
399
|
+
sentence_count = len(sentences)
|
|
400
|
+
|
|
401
|
+
# Token count (rough estimate: 1 token ≈ 4 characters)
|
|
402
|
+
token_count = max(1, round(len(text) / 4))
|
|
403
|
+
|
|
404
|
+
calculation_time_ms = (time.time() - start_time) * 1000.0
|
|
405
|
+
|
|
406
|
+
return TextStats(
|
|
407
|
+
char_count=char_count,
|
|
408
|
+
word_count=word_count,
|
|
409
|
+
sentence_count=sentence_count,
|
|
410
|
+
line_count=line_count,
|
|
411
|
+
token_count=token_count,
|
|
412
|
+
content_hash=self._generate_content_hash(text),
|
|
413
|
+
calculation_time_ms=calculation_time_ms,
|
|
414
|
+
chunk_processed=False,
|
|
415
|
+
processing_method="fast"
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
def cancel_calculation(self, calculation_id: str) -> bool:
|
|
419
|
+
"""
|
|
420
|
+
Cancel a specific calculation.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
calculation_id: ID of calculation to cancel
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
True if calculation was cancelled, False if not found
|
|
427
|
+
"""
|
|
428
|
+
with self.calculation_lock:
|
|
429
|
+
task = self.active_calculations.get(calculation_id)
|
|
430
|
+
if task:
|
|
431
|
+
task.cancel()
|
|
432
|
+
self.stats['cancelled_calculations'] += 1
|
|
433
|
+
return True
|
|
434
|
+
return False
|
|
435
|
+
|
|
436
|
+
def cancel_calculation_for_widget(self, widget_id: str) -> int:
|
|
437
|
+
"""
|
|
438
|
+
Cancel all calculations for a specific widget.
|
|
439
|
+
|
|
440
|
+
Args:
|
|
441
|
+
widget_id: Widget identifier
|
|
442
|
+
|
|
443
|
+
Returns:
|
|
444
|
+
Number of calculations cancelled
|
|
445
|
+
"""
|
|
446
|
+
cancelled_count = 0
|
|
447
|
+
|
|
448
|
+
with self.calculation_lock:
|
|
449
|
+
# Find all calculations for this widget
|
|
450
|
+
# Check if calculation_id contains the widget_id
|
|
451
|
+
to_cancel = []
|
|
452
|
+
for calc_id, task in self.active_calculations.items():
|
|
453
|
+
# The calculation_id format is: {content_hash}_{timestamp}_{widget_id}
|
|
454
|
+
# So we check if it ends with the widget_id
|
|
455
|
+
if widget_id and calc_id.endswith(f"_{widget_id}"):
|
|
456
|
+
to_cancel.append(calc_id)
|
|
457
|
+
|
|
458
|
+
# Cancel them
|
|
459
|
+
for calc_id in to_cancel:
|
|
460
|
+
task = self.active_calculations.get(calc_id)
|
|
461
|
+
if task and not task.cancelled:
|
|
462
|
+
task.cancel()
|
|
463
|
+
self.stats['cancelled_calculations'] += 1
|
|
464
|
+
cancelled_count += 1
|
|
465
|
+
|
|
466
|
+
return cancelled_count
|
|
467
|
+
|
|
468
|
+
def cancel_all_calculations(self) -> int:
|
|
469
|
+
"""
|
|
470
|
+
Cancel all active calculations.
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
Number of calculations cancelled
|
|
474
|
+
"""
|
|
475
|
+
with self.calculation_lock:
|
|
476
|
+
calc_ids = list(self.active_calculations.keys())
|
|
477
|
+
|
|
478
|
+
for calc_id in calc_ids:
|
|
479
|
+
self.cancel_calculation(calc_id)
|
|
480
|
+
|
|
481
|
+
return len(calc_ids)
|
|
482
|
+
|
|
483
|
+
def _handle_cancellation(self, task: CalculationTask) -> None:
|
|
484
|
+
"""
|
|
485
|
+
Handle calculation cancellation.
|
|
486
|
+
|
|
487
|
+
Args:
|
|
488
|
+
task: Cancelled task
|
|
489
|
+
"""
|
|
490
|
+
# Send cancellation progress update
|
|
491
|
+
if task.progress_callback:
|
|
492
|
+
progress_info = ProgressInfo(
|
|
493
|
+
calculation_id=task.calculation_id,
|
|
494
|
+
status=CalculationStatus.CANCELLED,
|
|
495
|
+
progress_percent=0.0,
|
|
496
|
+
chunks_processed=0,
|
|
497
|
+
total_chunks=0,
|
|
498
|
+
elapsed_time_ms=(time.time() - task.start_time) * 1000.0,
|
|
499
|
+
estimated_remaining_ms=0.0
|
|
500
|
+
)
|
|
501
|
+
task.progress_callback(progress_info)
|
|
502
|
+
|
|
503
|
+
# Clean up
|
|
504
|
+
with self.calculation_lock:
|
|
505
|
+
self.active_calculations.pop(task.calculation_id, None)
|
|
506
|
+
|
|
507
|
+
def _generate_calculation_id(self, text: str, widget_id: Optional[str] = None) -> str:
|
|
508
|
+
"""
|
|
509
|
+
Generate a unique calculation ID.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
text: Text content
|
|
513
|
+
widget_id: Optional widget identifier
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Unique calculation ID
|
|
517
|
+
"""
|
|
518
|
+
content_hash = self._generate_content_hash(text)
|
|
519
|
+
timestamp = str(time.time())
|
|
520
|
+
widget_part = f"_{widget_id}" if widget_id else ""
|
|
521
|
+
|
|
522
|
+
id_string = f"{content_hash}_{timestamp}{widget_part}"
|
|
523
|
+
return hashlib.md5(id_string.encode('utf-8')).hexdigest()[:16]
|
|
524
|
+
|
|
525
|
+
def _generate_content_hash(self, text: str) -> str:
|
|
526
|
+
"""
|
|
527
|
+
Generate a hash for content identification.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
text: Text content
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
Content hash
|
|
534
|
+
"""
|
|
535
|
+
content_sample = text[:100] + text[-100:] if len(text) > 200 else text
|
|
536
|
+
hash_input = f"{len(text)}_{content_sample}"
|
|
537
|
+
return hashlib.md5(hash_input.encode('utf-8')).hexdigest()[:16]
|
|
538
|
+
|
|
539
|
+
def get_active_calculations(self) -> List[str]:
|
|
540
|
+
"""
|
|
541
|
+
Get list of active calculation IDs.
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
List of calculation IDs
|
|
545
|
+
"""
|
|
546
|
+
with self.calculation_lock:
|
|
547
|
+
return list(self.active_calculations.keys())
|
|
548
|
+
|
|
549
|
+
def get_calculation_progress(self, calculation_id: str) -> Optional[ProgressInfo]:
|
|
550
|
+
"""
|
|
551
|
+
Get progress information for a calculation.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
calculation_id: Calculation ID
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
ProgressInfo or None if not found
|
|
558
|
+
"""
|
|
559
|
+
with self.calculation_lock:
|
|
560
|
+
task = self.active_calculations.get(calculation_id)
|
|
561
|
+
if not task:
|
|
562
|
+
return None
|
|
563
|
+
|
|
564
|
+
elapsed_time_ms = (time.time() - task.start_time) * 1000.0
|
|
565
|
+
|
|
566
|
+
return ProgressInfo(
|
|
567
|
+
calculation_id=calculation_id,
|
|
568
|
+
status=CalculationStatus.RUNNING if not task.cancelled else CalculationStatus.CANCELLED,
|
|
569
|
+
progress_percent=0.0, # Would need to track this in task
|
|
570
|
+
chunks_processed=0,
|
|
571
|
+
total_chunks=0,
|
|
572
|
+
elapsed_time_ms=elapsed_time_ms,
|
|
573
|
+
estimated_remaining_ms=0.0
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
def get_statistics(self) -> Dict[str, Any]:
|
|
577
|
+
"""
|
|
578
|
+
Get calculator statistics.
|
|
579
|
+
|
|
580
|
+
Returns:
|
|
581
|
+
Dictionary with statistics
|
|
582
|
+
"""
|
|
583
|
+
with self.calculation_lock:
|
|
584
|
+
stats = self.stats.copy()
|
|
585
|
+
stats['active_calculations'] = len(self.active_calculations)
|
|
586
|
+
|
|
587
|
+
# Calculate average processing time
|
|
588
|
+
if stats['completed_calculations'] > 0:
|
|
589
|
+
stats['avg_processing_time_ms'] = (
|
|
590
|
+
stats['total_processing_time_ms'] / stats['completed_calculations']
|
|
591
|
+
)
|
|
592
|
+
else:
|
|
593
|
+
stats['avg_processing_time_ms'] = 0.0
|
|
594
|
+
|
|
595
|
+
return stats
|
|
596
|
+
|
|
597
|
+
def clear_statistics(self) -> None:
|
|
598
|
+
"""Clear all statistics."""
|
|
599
|
+
self.stats = {
|
|
600
|
+
'total_calculations': 0,
|
|
601
|
+
'progressive_calculations': 0,
|
|
602
|
+
'cancelled_calculations': 0,
|
|
603
|
+
'completed_calculations': 0,
|
|
604
|
+
'total_processing_time_ms': 0.0
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
# Global instance for easy access
|
|
609
|
+
_global_progressive_calculator: Optional[ProgressiveStatsCalculator] = None
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def get_progressive_stats_calculator() -> ProgressiveStatsCalculator:
|
|
613
|
+
"""Get the global progressive statistics calculator instance."""
|
|
614
|
+
global _global_progressive_calculator
|
|
615
|
+
if _global_progressive_calculator is None:
|
|
616
|
+
_global_progressive_calculator = ProgressiveStatsCalculator()
|
|
617
|
+
return _global_progressive_calculator
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def create_progressive_stats_calculator(chunk_size: int = 10000,
|
|
621
|
+
progress_indicator_threshold_ms: float = 100.0) -> ProgressiveStatsCalculator:
|
|
622
|
+
"""
|
|
623
|
+
Create a new progressive statistics calculator instance.
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
chunk_size: Size of text chunks for processing
|
|
627
|
+
progress_indicator_threshold_ms: Threshold for showing progress indicator
|
|
628
|
+
|
|
629
|
+
Returns:
|
|
630
|
+
New ProgressiveStatsCalculator instance
|
|
631
|
+
"""
|
|
632
|
+
return ProgressiveStatsCalculator(chunk_size, progress_indicator_threshold_ms)
|