pomera-ai-commander 1.1.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +105 -680
  3. package/bin/pomera-ai-commander.js +62 -62
  4. package/core/__init__.py +65 -65
  5. package/core/app_context.py +482 -482
  6. package/core/async_text_processor.py +421 -421
  7. package/core/backup_manager.py +655 -655
  8. package/core/backup_recovery_manager.py +1199 -1033
  9. package/core/content_hash_cache.py +508 -508
  10. package/core/context_menu.py +313 -313
  11. package/core/data_directory.py +549 -0
  12. package/core/data_validator.py +1066 -1066
  13. package/core/database_connection_manager.py +744 -744
  14. package/core/database_curl_settings_manager.py +608 -608
  15. package/core/database_promera_ai_settings_manager.py +446 -446
  16. package/core/database_schema.py +411 -411
  17. package/core/database_schema_manager.py +395 -395
  18. package/core/database_settings_manager.py +1507 -1507
  19. package/core/database_settings_manager_interface.py +456 -456
  20. package/core/dialog_manager.py +734 -734
  21. package/core/diff_utils.py +239 -0
  22. package/core/efficient_line_numbers.py +540 -510
  23. package/core/error_handler.py +746 -746
  24. package/core/error_service.py +431 -431
  25. package/core/event_consolidator.py +511 -511
  26. package/core/mcp/__init__.py +43 -43
  27. package/core/mcp/find_replace_diff.py +334 -0
  28. package/core/mcp/protocol.py +288 -288
  29. package/core/mcp/schema.py +251 -251
  30. package/core/mcp/server_stdio.py +299 -299
  31. package/core/mcp/tool_registry.py +2699 -2345
  32. package/core/memento.py +275 -0
  33. package/core/memory_efficient_text_widget.py +711 -711
  34. package/core/migration_manager.py +914 -914
  35. package/core/migration_test_suite.py +1085 -1085
  36. package/core/migration_validator.py +1143 -1143
  37. package/core/optimized_find_replace.py +714 -714
  38. package/core/optimized_pattern_engine.py +424 -424
  39. package/core/optimized_search_highlighter.py +552 -552
  40. package/core/performance_monitor.py +674 -674
  41. package/core/persistence_manager.py +712 -712
  42. package/core/progressive_stats_calculator.py +632 -632
  43. package/core/regex_pattern_cache.py +529 -529
  44. package/core/regex_pattern_library.py +350 -350
  45. package/core/search_operation_manager.py +434 -434
  46. package/core/settings_defaults_registry.py +1087 -1087
  47. package/core/settings_integrity_validator.py +1111 -1111
  48. package/core/settings_serializer.py +557 -557
  49. package/core/settings_validator.py +1823 -1823
  50. package/core/smart_stats_calculator.py +709 -709
  51. package/core/statistics_update_manager.py +619 -619
  52. package/core/stats_config_manager.py +858 -858
  53. package/core/streaming_text_handler.py +723 -723
  54. package/core/task_scheduler.py +596 -596
  55. package/core/update_pattern_library.py +168 -168
  56. package/core/visibility_monitor.py +596 -596
  57. package/core/widget_cache.py +498 -498
  58. package/mcp.json +51 -61
  59. package/migrate_data.py +127 -0
  60. package/package.json +64 -57
  61. package/pomera.py +7883 -7482
  62. package/pomera_mcp_server.py +183 -144
  63. package/requirements.txt +33 -0
  64. package/scripts/Dockerfile.alpine +43 -0
  65. package/scripts/Dockerfile.gui-test +54 -0
  66. package/scripts/Dockerfile.linux +43 -0
  67. package/scripts/Dockerfile.test-linux +80 -0
  68. package/scripts/Dockerfile.ubuntu +39 -0
  69. package/scripts/README.md +53 -0
  70. package/scripts/build-all.bat +113 -0
  71. package/scripts/build-docker.bat +53 -0
  72. package/scripts/build-docker.sh +55 -0
  73. package/scripts/build-optimized.bat +101 -0
  74. package/scripts/build.sh +78 -0
  75. package/scripts/docker-compose.test.yml +27 -0
  76. package/scripts/docker-compose.yml +32 -0
  77. package/scripts/postinstall.js +62 -0
  78. package/scripts/requirements-minimal.txt +33 -0
  79. package/scripts/test-linux-simple.bat +28 -0
  80. package/scripts/validate-release-workflow.py +450 -0
  81. package/tools/__init__.py +4 -4
  82. package/tools/ai_tools.py +2891 -2891
  83. package/tools/ascii_art_generator.py +352 -352
  84. package/tools/base64_tools.py +183 -183
  85. package/tools/base_tool.py +511 -511
  86. package/tools/case_tool.py +308 -308
  87. package/tools/column_tools.py +395 -395
  88. package/tools/cron_tool.py +884 -884
  89. package/tools/curl_history.py +600 -600
  90. package/tools/curl_processor.py +1207 -1207
  91. package/tools/curl_settings.py +502 -502
  92. package/tools/curl_tool.py +5467 -5467
  93. package/tools/diff_viewer.py +1817 -1072
  94. package/tools/email_extraction_tool.py +248 -248
  95. package/tools/email_header_analyzer.py +425 -425
  96. package/tools/extraction_tools.py +250 -250
  97. package/tools/find_replace.py +2289 -1750
  98. package/tools/folder_file_reporter.py +1463 -1463
  99. package/tools/folder_file_reporter_adapter.py +480 -480
  100. package/tools/generator_tools.py +1216 -1216
  101. package/tools/hash_generator.py +255 -255
  102. package/tools/html_tool.py +656 -656
  103. package/tools/jsonxml_tool.py +729 -729
  104. package/tools/line_tools.py +419 -419
  105. package/tools/markdown_tools.py +561 -561
  106. package/tools/mcp_widget.py +1417 -1417
  107. package/tools/notes_widget.py +978 -973
  108. package/tools/number_base_converter.py +372 -372
  109. package/tools/regex_extractor.py +571 -571
  110. package/tools/slug_generator.py +310 -310
  111. package/tools/sorter_tools.py +458 -458
  112. package/tools/string_escape_tool.py +392 -392
  113. package/tools/text_statistics_tool.py +365 -365
  114. package/tools/text_wrapper.py +430 -430
  115. package/tools/timestamp_converter.py +421 -421
  116. package/tools/tool_loader.py +710 -710
  117. package/tools/translator_tools.py +522 -522
  118. package/tools/url_link_extractor.py +261 -261
  119. package/tools/url_parser.py +204 -204
  120. package/tools/whitespace_tools.py +355 -355
  121. package/tools/word_frequency_counter.py +146 -146
  122. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  123. package/core/__pycache__/app_context.cpython-313.pyc +0 -0
  124. package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
  125. package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
  126. package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
  127. package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
  128. package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
  129. package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
  130. package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
  131. package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
  132. package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
  133. package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
  134. package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
  135. package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
  136. package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
  137. package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
  138. package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
  139. package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
  140. package/core/__pycache__/error_service.cpython-313.pyc +0 -0
  141. package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
  142. package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
  143. package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
  144. package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
  145. package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
  146. package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
  147. package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
  148. package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
  149. package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
  150. package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
  151. package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
  152. package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
  153. package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
  154. package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
  155. package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
  156. package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
  157. package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
  158. package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
  159. package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
  160. package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
  161. package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
  162. package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
  163. package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
  164. package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
  165. package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
  166. package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
  167. package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
  168. package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
  169. package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
  170. package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
  171. package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  172. package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
  173. package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
  174. package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
  175. package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
  176. package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
  177. package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
  178. package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
  179. package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
  180. package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
  181. package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
  182. package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
  183. package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
  184. package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
  185. package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
  186. package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
  187. package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
  188. package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
  189. package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
  190. package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
  191. package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
  192. package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
  193. package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
  194. package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
  195. package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
  196. package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
  197. package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
  198. package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
  199. package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
  200. package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
  201. package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
  202. package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
  203. package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
  204. package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
  205. package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
  206. package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
  207. package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
  208. package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
  209. package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
  210. package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
  211. package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
  212. package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
  213. package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
@@ -1,1067 +1,1067 @@
1
- """
2
- Data Validation and Corruption Detection for Settings Database
3
-
4
- This module provides comprehensive data validation and corruption detection
5
- for the settings database system. It ensures data integrity and detects
6
- various types of corruption or invalid data.
7
-
8
- Features:
9
- - Schema validation and integrity checks
10
- - Data type validation and conversion
11
- - Corruption detection algorithms
12
- - Automatic data repair procedures
13
- - Validation reporting and logging
14
- """
15
-
16
- import json
17
- import sqlite3
18
- import logging
19
- import re
20
- from typing import Dict, List, Tuple, Any, Optional, Union, Set
21
- from datetime import datetime
22
- from dataclasses import dataclass
23
- from enum import Enum
24
-
25
-
26
- class ValidationSeverity(Enum):
27
- """Severity levels for validation issues."""
28
- INFO = "info"
29
- WARNING = "warning"
30
- ERROR = "error"
31
- CRITICAL = "critical"
32
-
33
-
34
- class ValidationCategory(Enum):
35
- """Categories of validation issues."""
36
- SCHEMA = "schema"
37
- DATA_TYPE = "data_type"
38
- DATA_INTEGRITY = "data_integrity"
39
- FOREIGN_KEY = "foreign_key"
40
- CONSTRAINT = "constraint"
41
- CORRUPTION = "corruption"
42
- MISSING_DATA = "missing_data"
43
- INVALID_FORMAT = "invalid_format"
44
-
45
-
46
- @dataclass
47
- class ValidationIssue:
48
- """Information about a validation issue."""
49
- category: ValidationCategory
50
- severity: ValidationSeverity
51
- message: str
52
- table: Optional[str] = None
53
- column: Optional[str] = None
54
- row_id: Optional[int] = None
55
- expected_value: Optional[Any] = None
56
- actual_value: Optional[Any] = None
57
- auto_fixable: bool = False
58
- fix_applied: bool = False
59
-
60
-
61
- class DataValidator:
62
- """
63
- Comprehensive data validator for the settings database system.
64
-
65
- Provides validation, corruption detection, and automatic repair
66
- capabilities to ensure data integrity.
67
- """
68
-
69
- def __init__(self, connection_manager, schema_manager):
70
- """
71
- Initialize the data validator.
72
-
73
- Args:
74
- connection_manager: Database connection manager
75
- schema_manager: Database schema manager
76
- """
77
- self.connection_manager = connection_manager
78
- self.schema_manager = schema_manager
79
- self.logger = logging.getLogger(__name__)
80
-
81
- # Validation configuration
82
- self.auto_fix_enabled = True
83
- self.strict_validation = False
84
-
85
- # Validation rules
86
- self._validation_rules = self._initialize_validation_rules()
87
-
88
- # Data type validators
89
- self._type_validators = {
90
- 'str': self._validate_string,
91
- 'int': self._validate_integer,
92
- 'float': self._validate_float,
93
- 'bool': self._validate_boolean,
94
- 'json': self._validate_json,
95
- 'array': self._validate_array
96
- }
97
-
98
- # Expected data patterns
99
- self._data_patterns = {
100
- 'export_path': r'^[a-zA-Z]:\\.*|^/.*|^~.*', # Windows/Unix paths
101
- 'debug_level': r'^(DEBUG|INFO|WARNING|ERROR|CRITICAL)$',
102
- 'selected_tool': r'^.+$', # Non-empty string
103
- 'active_input_tab': r'^\d+$', # Non-negative integer
104
- 'active_output_tab': r'^\d+$', # Non-negative integer
105
- }
106
-
107
- # Critical settings that must exist
108
- self._critical_settings = {
109
- 'export_path', 'debug_level', 'selected_tool',
110
- 'active_input_tab', 'active_output_tab'
111
- }
112
-
113
- # Tool settings validation rules
114
- self._tool_validation_rules = {
115
- 'cURL Tool': {
116
- 'default_timeout': {'type': int, 'min': 1, 'max': 3600},
117
- 'follow_redirects': {'type': bool},
118
- 'verify_ssl': {'type': bool},
119
- 'max_redirects': {'type': int, 'min': 0, 'max': 50},
120
- 'user_agent': {'type': str, 'max_length': 200},
121
- 'save_history': {'type': bool},
122
- 'max_history_items': {'type': int, 'min': 0, 'max': 10000}
123
- },
124
- 'JSON/XML Tool': {
125
- 'json_indent': {'type': int, 'min': 0, 'max': 10},
126
- 'xml_indent': {'type': int, 'min': 0, 'max': 10},
127
- 'preserve_attributes': {'type': bool},
128
- 'sort_keys': {'type': bool}
129
- }
130
- }
131
-
132
- def validate_database(self, fix_issues: bool = None) -> List[ValidationIssue]:
133
- """
134
- Perform comprehensive database validation.
135
-
136
- Args:
137
- fix_issues: Whether to automatically fix issues (None = use default)
138
-
139
- Returns:
140
- List of validation issues found
141
- """
142
- if fix_issues is None:
143
- fix_issues = self.auto_fix_enabled
144
-
145
- issues = []
146
-
147
- try:
148
- self.logger.info("Starting comprehensive database validation")
149
-
150
- # Schema validation
151
- issues.extend(self._validate_schema())
152
-
153
- # Data integrity validation
154
- issues.extend(self._validate_data_integrity())
155
-
156
- # Data type validation
157
- issues.extend(self._validate_data_types())
158
-
159
- # Foreign key validation
160
- issues.extend(self._validate_foreign_keys())
161
-
162
- # Constraint validation
163
- issues.extend(self._validate_constraints())
164
-
165
- # Corruption detection
166
- issues.extend(self._detect_corruption())
167
-
168
- # Critical data validation
169
- issues.extend(self._validate_critical_data())
170
-
171
- # Tool settings validation
172
- issues.extend(self._validate_tool_settings())
173
-
174
- # Apply fixes if enabled
175
- if fix_issues:
176
- self._apply_automatic_fixes(issues)
177
-
178
- # Log summary
179
- self._log_validation_summary(issues)
180
-
181
- return issues
182
-
183
- except Exception as e:
184
- self.logger.error(f"Database validation failed: {e}")
185
- return [ValidationIssue(
186
- category=ValidationCategory.CORRUPTION,
187
- severity=ValidationSeverity.CRITICAL,
188
- message=f"Validation process failed: {e}"
189
- )]
190
-
191
- def validate_settings_data(self, settings_data: Dict[str, Any]) -> List[ValidationIssue]:
192
- """
193
- Validate settings data structure before database insertion.
194
-
195
- Args:
196
- settings_data: Settings data to validate
197
-
198
- Returns:
199
- List of validation issues
200
- """
201
- issues = []
202
-
203
- try:
204
- # Validate required top-level keys
205
- required_keys = ['export_path', 'debug_level', 'selected_tool']
206
- for key in required_keys:
207
- if key not in settings_data:
208
- issues.append(ValidationIssue(
209
- category=ValidationCategory.MISSING_DATA,
210
- severity=ValidationSeverity.ERROR,
211
- message=f"Required setting '{key}' is missing",
212
- auto_fixable=True
213
- ))
214
-
215
- # Validate data types and formats
216
- for key, value in settings_data.items():
217
- validation_issues = self._validate_setting_value(key, value)
218
- issues.extend(validation_issues)
219
-
220
- # Validate tool settings structure
221
- if 'tool_settings' in settings_data:
222
- tool_issues = self._validate_tool_settings_structure(
223
- settings_data['tool_settings']
224
- )
225
- issues.extend(tool_issues)
226
-
227
- # Validate tab arrays
228
- for tab_type in ['input_tabs', 'output_tabs']:
229
- if tab_type in settings_data:
230
- tab_issues = self._validate_tab_array(
231
- tab_type, settings_data[tab_type]
232
- )
233
- issues.extend(tab_issues)
234
-
235
- return issues
236
-
237
- except Exception as e:
238
- self.logger.error(f"Settings data validation failed: {e}")
239
- return [ValidationIssue(
240
- category=ValidationCategory.CORRUPTION,
241
- severity=ValidationSeverity.CRITICAL,
242
- message=f"Settings validation failed: {e}"
243
- )]
244
-
245
- def detect_data_corruption(self) -> List[ValidationIssue]:
246
- """
247
- Detect various types of data corruption in the database.
248
-
249
- Returns:
250
- List of corruption issues found
251
- """
252
- issues = []
253
-
254
- try:
255
- conn = self.connection_manager.get_connection()
256
-
257
- # SQLite integrity check
258
- cursor = conn.execute("PRAGMA integrity_check")
259
- integrity_result = cursor.fetchone()[0]
260
-
261
- if integrity_result != "ok":
262
- issues.append(ValidationIssue(
263
- category=ValidationCategory.CORRUPTION,
264
- severity=ValidationSeverity.CRITICAL,
265
- message=f"SQLite integrity check failed: {integrity_result}"
266
- ))
267
-
268
- # Check for orphaned records
269
- orphaned_issues = self._detect_orphaned_records(conn)
270
- issues.extend(orphaned_issues)
271
-
272
- # Check for duplicate records
273
- duplicate_issues = self._detect_duplicate_records(conn)
274
- issues.extend(duplicate_issues)
275
-
276
- # Check for invalid JSON data
277
- json_issues = self._detect_invalid_json(conn)
278
- issues.extend(json_issues)
279
-
280
- # Check for encoding issues
281
- encoding_issues = self._detect_encoding_issues(conn)
282
- issues.extend(encoding_issues)
283
-
284
- return issues
285
-
286
- except Exception as e:
287
- self.logger.error(f"Corruption detection failed: {e}")
288
- return [ValidationIssue(
289
- category=ValidationCategory.CORRUPTION,
290
- severity=ValidationSeverity.CRITICAL,
291
- message=f"Corruption detection failed: {e}"
292
- )]
293
-
294
- def repair_data_corruption(self, issues: List[ValidationIssue]) -> bool:
295
- """
296
- Attempt to repair data corruption issues.
297
-
298
- Args:
299
- issues: List of validation issues to repair
300
-
301
- Returns:
302
- True if all repairs successful
303
- """
304
- try:
305
- repaired_count = 0
306
-
307
- with self.connection_manager.transaction() as conn:
308
- for issue in issues:
309
- if issue.auto_fixable and not issue.fix_applied:
310
- success = self._repair_issue(conn, issue)
311
- if success:
312
- issue.fix_applied = True
313
- repaired_count += 1
314
-
315
- self.logger.info(f"Repaired {repaired_count} data corruption issues")
316
- return repaired_count == len([i for i in issues if i.auto_fixable])
317
-
318
- except Exception as e:
319
- self.logger.error(f"Data corruption repair failed: {e}")
320
- return False
321
-
322
- def get_validation_report(self, issues: List[ValidationIssue]) -> Dict[str, Any]:
323
- """
324
- Generate a comprehensive validation report.
325
-
326
- Args:
327
- issues: List of validation issues
328
-
329
- Returns:
330
- Validation report dictionary
331
- """
332
- report = {
333
- 'timestamp': datetime.now().isoformat(),
334
- 'total_issues': len(issues),
335
- 'issues_by_severity': {},
336
- 'issues_by_category': {},
337
- 'auto_fixable_issues': 0,
338
- 'fixed_issues': 0,
339
- 'critical_issues': [],
340
- 'recommendations': []
341
- }
342
-
343
- # Count by severity
344
- for severity in ValidationSeverity:
345
- count = len([i for i in issues if i.severity == severity])
346
- report['issues_by_severity'][severity.value] = count
347
-
348
- # Count by category
349
- for category in ValidationCategory:
350
- count = len([i for i in issues if i.category == category])
351
- report['issues_by_category'][category.value] = count
352
-
353
- # Count fixable and fixed issues
354
- report['auto_fixable_issues'] = len([i for i in issues if i.auto_fixable])
355
- report['fixed_issues'] = len([i for i in issues if i.fix_applied])
356
-
357
- # Critical issues details
358
- critical_issues = [i for i in issues if i.severity == ValidationSeverity.CRITICAL]
359
- report['critical_issues'] = [
360
- {
361
- 'category': issue.category.value,
362
- 'message': issue.message,
363
- 'table': issue.table,
364
- 'auto_fixable': issue.auto_fixable,
365
- 'fix_applied': issue.fix_applied
366
- }
367
- for issue in critical_issues
368
- ]
369
-
370
- # Generate recommendations
371
- report['recommendations'] = self._generate_recommendations(issues)
372
-
373
- return report
374
-
375
- # Private validation methods
376
-
377
- def _initialize_validation_rules(self) -> Dict[str, Any]:
378
- """Initialize validation rules for different data types and tables."""
379
- return {
380
- 'core_settings': {
381
- 'required_columns': ['key', 'value', 'data_type'],
382
- 'key_constraints': {
383
- 'max_length': 100,
384
- 'pattern': r'^[a-zA-Z_][a-zA-Z0-9_]*$'
385
- },
386
- 'value_constraints': {
387
- 'max_length': 10000
388
- }
389
- },
390
- 'tool_settings': {
391
- 'required_columns': ['tool_name', 'setting_path', 'setting_value', 'data_type'],
392
- 'tool_name_constraints': {
393
- 'max_length': 100,
394
- 'pattern': r'^.+$' # Non-empty
395
- },
396
- 'setting_path_constraints': {
397
- 'max_length': 200,
398
- 'pattern': r'^[a-zA-Z_][a-zA-Z0-9_\.]*$'
399
- }
400
- },
401
- 'tab_content': {
402
- 'required_columns': ['tab_type', 'tab_index', 'content'],
403
- 'tab_type_values': ['input', 'output'],
404
- 'tab_index_range': (0, 6) # 0-6 for 7 tabs
405
- }
406
- }
407
-
408
- def _validate_schema(self) -> List[ValidationIssue]:
409
- """Validate database schema structure."""
410
- issues = []
411
-
412
- try:
413
- conn = self.connection_manager.get_connection()
414
-
415
- # Check table existence
416
- cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
417
- existing_tables = {row[0] for row in cursor.fetchall()}
418
-
419
- required_tables = {
420
- 'core_settings', 'tool_settings', 'tab_content',
421
- 'performance_settings', 'font_settings', 'dialog_settings',
422
- 'settings_metadata'
423
- }
424
-
425
- missing_tables = required_tables - existing_tables
426
- for table in missing_tables:
427
- issues.append(ValidationIssue(
428
- category=ValidationCategory.SCHEMA,
429
- severity=ValidationSeverity.CRITICAL,
430
- message=f"Required table '{table}' is missing",
431
- table=table,
432
- auto_fixable=True
433
- ))
434
-
435
- # Validate table schemas
436
- for table in existing_tables & required_tables:
437
- table_issues = self._validate_table_schema(conn, table)
438
- issues.extend(table_issues)
439
-
440
- return issues
441
-
442
- except Exception as e:
443
- return [ValidationIssue(
444
- category=ValidationCategory.SCHEMA,
445
- severity=ValidationSeverity.CRITICAL,
446
- message=f"Schema validation failed: {e}"
447
- )]
448
-
449
- def _validate_table_schema(self, conn: sqlite3.Connection, table_name: str) -> List[ValidationIssue]:
450
- """Validate individual table schema."""
451
- issues = []
452
-
453
- try:
454
- # Get table info
455
- cursor = conn.execute(f"PRAGMA table_info({table_name})")
456
- columns = {row[1]: row[2] for row in cursor.fetchall()} # name: type
457
-
458
- # Check required columns
459
- rules = self._validation_rules.get(table_name, {})
460
- required_columns = rules.get('required_columns', [])
461
-
462
- for column in required_columns:
463
- if column not in columns:
464
- issues.append(ValidationIssue(
465
- category=ValidationCategory.SCHEMA,
466
- severity=ValidationSeverity.ERROR,
467
- message=f"Required column '{column}' missing in table '{table_name}'",
468
- table=table_name,
469
- column=column,
470
- auto_fixable=True
471
- ))
472
-
473
- return issues
474
-
475
- except Exception as e:
476
- return [ValidationIssue(
477
- category=ValidationCategory.SCHEMA,
478
- severity=ValidationSeverity.ERROR,
479
- message=f"Table schema validation failed for '{table_name}': {e}",
480
- table=table_name
481
- )]
482
-
483
- def _validate_data_integrity(self) -> List[ValidationIssue]:
484
- """Validate data integrity across tables."""
485
- issues = []
486
-
487
- try:
488
- conn = self.connection_manager.get_connection()
489
-
490
- # Check for NULL values in required fields
491
- null_issues = self._check_null_values(conn)
492
- issues.extend(null_issues)
493
-
494
- # Check data consistency
495
- consistency_issues = self._check_data_consistency(conn)
496
- issues.extend(consistency_issues)
497
-
498
- # Check referential integrity
499
- referential_issues = self._check_referential_integrity(conn)
500
- issues.extend(referential_issues)
501
-
502
- return issues
503
-
504
- except Exception as e:
505
- return [ValidationIssue(
506
- category=ValidationCategory.DATA_INTEGRITY,
507
- severity=ValidationSeverity.ERROR,
508
- message=f"Data integrity validation failed: {e}"
509
- )]
510
-
511
- def _validate_data_types(self) -> List[ValidationIssue]:
512
- """Validate data types in all tables."""
513
- issues = []
514
-
515
- try:
516
- conn = self.connection_manager.get_connection()
517
-
518
- # Validate core settings data types
519
- cursor = conn.execute("SELECT key, value, data_type FROM core_settings")
520
- for key, value, data_type in cursor.fetchall():
521
- type_issues = self._validate_data_type(key, value, data_type, 'core_settings')
522
- issues.extend(type_issues)
523
-
524
- # Validate tool settings data types
525
- cursor = conn.execute("SELECT tool_name, setting_path, setting_value, data_type FROM tool_settings")
526
- for tool_name, setting_path, setting_value, data_type in cursor.fetchall():
527
- type_issues = self._validate_data_type(
528
- f"{tool_name}.{setting_path}", setting_value, data_type, 'tool_settings'
529
- )
530
- issues.extend(type_issues)
531
-
532
- return issues
533
-
534
- except Exception as e:
535
- return [ValidationIssue(
536
- category=ValidationCategory.DATA_TYPE,
537
- severity=ValidationSeverity.ERROR,
538
- message=f"Data type validation failed: {e}"
539
- )]
540
-
541
- def _validate_data_type(self, key: str, value: str, data_type: str, table: str) -> List[ValidationIssue]:
542
- """Validate a specific data type."""
543
- issues = []
544
-
545
- try:
546
- validator = self._type_validators.get(data_type)
547
- if validator:
548
- is_valid, error_msg = validator(value)
549
- if not is_valid:
550
- issues.append(ValidationIssue(
551
- category=ValidationCategory.DATA_TYPE,
552
- severity=ValidationSeverity.WARNING,
553
- message=f"Invalid {data_type} value for '{key}': {error_msg}",
554
- table=table,
555
- actual_value=value,
556
- auto_fixable=True
557
- ))
558
- else:
559
- issues.append(ValidationIssue(
560
- category=ValidationCategory.DATA_TYPE,
561
- severity=ValidationSeverity.WARNING,
562
- message=f"Unknown data type '{data_type}' for '{key}'",
563
- table=table,
564
- actual_value=data_type
565
- ))
566
-
567
- return issues
568
-
569
- except Exception as e:
570
- return [ValidationIssue(
571
- category=ValidationCategory.DATA_TYPE,
572
- severity=ValidationSeverity.ERROR,
573
- message=f"Data type validation failed for '{key}': {e}",
574
- table=table
575
- )]
576
-
577
- def _validate_foreign_keys(self) -> List[ValidationIssue]:
578
- """Validate foreign key constraints."""
579
- issues = []
580
-
581
- try:
582
- conn = self.connection_manager.get_connection()
583
-
584
- # Check foreign key violations
585
- cursor = conn.execute("PRAGMA foreign_key_check")
586
- violations = cursor.fetchall()
587
-
588
- for violation in violations:
589
- issues.append(ValidationIssue(
590
- category=ValidationCategory.FOREIGN_KEY,
591
- severity=ValidationSeverity.ERROR,
592
- message=f"Foreign key violation: {violation}",
593
- auto_fixable=False
594
- ))
595
-
596
- return issues
597
-
598
- except Exception as e:
599
- return [ValidationIssue(
600
- category=ValidationCategory.FOREIGN_KEY,
601
- severity=ValidationSeverity.ERROR,
602
- message=f"Foreign key validation failed: {e}"
603
- )]
604
-
605
- def _validate_constraints(self) -> List[ValidationIssue]:
606
- """Validate database constraints."""
607
- issues = []
608
-
609
- try:
610
- conn = self.connection_manager.get_connection()
611
-
612
- # Check unique constraints
613
- unique_issues = self._check_unique_constraints(conn)
614
- issues.extend(unique_issues)
615
-
616
- # Check check constraints (if any)
617
- check_issues = self._check_check_constraints(conn)
618
- issues.extend(check_issues)
619
-
620
- return issues
621
-
622
- except Exception as e:
623
- return [ValidationIssue(
624
- category=ValidationCategory.CONSTRAINT,
625
- severity=ValidationSeverity.ERROR,
626
- message=f"Constraint validation failed: {e}"
627
- )]
628
-
629
- def _detect_corruption(self) -> List[ValidationIssue]:
630
- """Detect various types of data corruption."""
631
- issues = []
632
-
633
- try:
634
- conn = self.connection_manager.get_connection()
635
-
636
- # SQLite integrity check
637
- cursor = conn.execute("PRAGMA integrity_check")
638
- result = cursor.fetchone()[0]
639
-
640
- if result != "ok":
641
- issues.append(ValidationIssue(
642
- category=ValidationCategory.CORRUPTION,
643
- severity=ValidationSeverity.CRITICAL,
644
- message=f"Database corruption detected: {result}",
645
- auto_fixable=False
646
- ))
647
-
648
- # Check for truncated data
649
- truncation_issues = self._detect_truncated_data(conn)
650
- issues.extend(truncation_issues)
651
-
652
- # Check for encoding corruption
653
- encoding_issues = self._detect_encoding_corruption(conn)
654
- issues.extend(encoding_issues)
655
-
656
- return issues
657
-
658
- except Exception as e:
659
- return [ValidationIssue(
660
- category=ValidationCategory.CORRUPTION,
661
- severity=ValidationSeverity.CRITICAL,
662
- message=f"Corruption detection failed: {e}"
663
- )]
664
-
665
- def _validate_critical_data(self) -> List[ValidationIssue]:
666
- """Validate critical settings that must exist."""
667
- issues = []
668
-
669
- try:
670
- conn = self.connection_manager.get_connection()
671
-
672
- # Check critical core settings
673
- cursor = conn.execute("SELECT key FROM core_settings")
674
- existing_keys = {row[0] for row in cursor.fetchall()}
675
-
676
- missing_critical = self._critical_settings - existing_keys
677
- for key in missing_critical:
678
- issues.append(ValidationIssue(
679
- category=ValidationCategory.MISSING_DATA,
680
- severity=ValidationSeverity.CRITICAL,
681
- message=f"Critical setting '{key}' is missing",
682
- table='core_settings',
683
- auto_fixable=True
684
- ))
685
-
686
- # Validate tab content completeness
687
- cursor = conn.execute("SELECT tab_type, COUNT(*) FROM tab_content GROUP BY tab_type")
688
- tab_counts = dict(cursor.fetchall())
689
-
690
- for tab_type in ['input', 'output']:
691
- count = tab_counts.get(tab_type, 0)
692
- if count != 7: # Should have 7 tabs
693
- issues.append(ValidationIssue(
694
- category=ValidationCategory.MISSING_DATA,
695
- severity=ValidationSeverity.ERROR,
696
- message=f"Incomplete {tab_type} tabs: expected 7, found {count}",
697
- table='tab_content',
698
- auto_fixable=True
699
- ))
700
-
701
- return issues
702
-
703
- except Exception as e:
704
- return [ValidationIssue(
705
- category=ValidationCategory.MISSING_DATA,
706
- severity=ValidationSeverity.ERROR,
707
- message=f"Critical data validation failed: {e}"
708
- )]
709
-
710
- def _validate_tool_settings(self) -> List[ValidationIssue]:
711
- """Validate tool-specific settings."""
712
- issues = []
713
-
714
- try:
715
- conn = self.connection_manager.get_connection()
716
-
717
- # Get all tool settings
718
- cursor = conn.execute("""
719
- SELECT tool_name, setting_path, setting_value, data_type
720
- FROM tool_settings
721
- ORDER BY tool_name, setting_path
722
- """)
723
-
724
- tool_settings = {}
725
- for tool_name, setting_path, setting_value, data_type in cursor.fetchall():
726
- if tool_name not in tool_settings:
727
- tool_settings[tool_name] = {}
728
- tool_settings[tool_name][setting_path] = {
729
- 'value': setting_value,
730
- 'type': data_type
731
- }
732
-
733
- # Validate each tool's settings
734
- for tool_name, settings in tool_settings.items():
735
- tool_issues = self._validate_individual_tool_settings(tool_name, settings)
736
- issues.extend(tool_issues)
737
-
738
- return issues
739
-
740
- except Exception as e:
741
- return [ValidationIssue(
742
- category=ValidationCategory.DATA_INTEGRITY,
743
- severity=ValidationSeverity.ERROR,
744
- message=f"Tool settings validation failed: {e}"
745
- )]
746
-
747
- # Type validators
748
-
749
- def _validate_string(self, value: str) -> Tuple[bool, str]:
750
- """Validate string value."""
751
- try:
752
- if not isinstance(value, str):
753
- return False, f"Expected string, got {type(value).__name__}"
754
- return True, ""
755
- except Exception as e:
756
- return False, str(e)
757
-
758
- def _validate_integer(self, value: str) -> Tuple[bool, str]:
759
- """Validate integer value."""
760
- try:
761
- int(value)
762
- return True, ""
763
- except ValueError:
764
- return False, f"Cannot convert '{value}' to integer"
765
- except Exception as e:
766
- return False, str(e)
767
-
768
- def _validate_float(self, value: str) -> Tuple[bool, str]:
769
- """Validate float value."""
770
- try:
771
- float(value)
772
- return True, ""
773
- except ValueError:
774
- return False, f"Cannot convert '{value}' to float"
775
- except Exception as e:
776
- return False, str(e)
777
-
778
- def _validate_boolean(self, value: str) -> Tuple[bool, str]:
779
- """Validate boolean value."""
780
- try:
781
- if value.lower() in ('true', 'false', '1', '0'):
782
- return True, ""
783
- return False, f"Invalid boolean value: '{value}'"
784
- except Exception as e:
785
- return False, str(e)
786
-
787
- def _validate_json(self, value: str) -> Tuple[bool, str]:
788
- """Validate JSON value."""
789
- try:
790
- json.loads(value)
791
- return True, ""
792
- except json.JSONDecodeError as e:
793
- return False, f"Invalid JSON: {e}"
794
- except Exception as e:
795
- return False, str(e)
796
-
797
- def _validate_array(self, value: str) -> Tuple[bool, str]:
798
- """Validate array value."""
799
- try:
800
- parsed = json.loads(value)
801
- if not isinstance(parsed, list):
802
- return False, f"Expected array, got {type(parsed).__name__}"
803
- return True, ""
804
- except json.JSONDecodeError as e:
805
- return False, f"Invalid array JSON: {e}"
806
- except Exception as e:
807
- return False, str(e)
808
-
809
- # Helper methods for specific validation checks
810
-
811
- def _validate_setting_value(self, key: str, value: Any) -> List[ValidationIssue]:
812
- """Validate a specific setting value."""
813
- issues = []
814
-
815
- # Check against patterns
816
- if key in self._data_patterns:
817
- pattern = self._data_patterns[key]
818
- if isinstance(value, str) and not re.match(pattern, value):
819
- issues.append(ValidationIssue(
820
- category=ValidationCategory.INVALID_FORMAT,
821
- severity=ValidationSeverity.WARNING,
822
- message=f"Setting '{key}' value '{value}' doesn't match expected pattern",
823
- actual_value=value,
824
- auto_fixable=True
825
- ))
826
-
827
- return issues
828
-
829
- def _validate_tool_settings_structure(self, tool_settings: Dict[str, Any]) -> List[ValidationIssue]:
830
- """Validate tool settings structure."""
831
- issues = []
832
-
833
- for tool_name, tool_config in tool_settings.items():
834
- if not isinstance(tool_config, dict):
835
- continue
836
-
837
- # Validate against tool-specific rules
838
- if tool_name in self._tool_validation_rules:
839
- rules = self._tool_validation_rules[tool_name]
840
- for setting_key, rule in rules.items():
841
- if setting_key in tool_config:
842
- value = tool_config[setting_key]
843
- validation_issues = self._validate_tool_setting_value(
844
- tool_name, setting_key, value, rule
845
- )
846
- issues.extend(validation_issues)
847
-
848
- return issues
849
-
850
- def _validate_tool_setting_value(self, tool_name: str, setting_key: str,
851
- value: Any, rule: Dict[str, Any]) -> List[ValidationIssue]:
852
- """Validate individual tool setting value."""
853
- issues = []
854
-
855
- # Type validation
856
- expected_type = rule.get('type')
857
- if expected_type and not isinstance(value, expected_type):
858
- issues.append(ValidationIssue(
859
- category=ValidationCategory.DATA_TYPE,
860
- severity=ValidationSeverity.WARNING,
861
- message=f"Tool '{tool_name}' setting '{setting_key}' has wrong type: expected {expected_type.__name__}, got {type(value).__name__}",
862
- actual_value=value,
863
- expected_value=expected_type.__name__,
864
- auto_fixable=True
865
- ))
866
-
867
- # Range validation for numeric types
868
- if isinstance(value, (int, float)):
869
- min_val = rule.get('min')
870
- max_val = rule.get('max')
871
-
872
- if min_val is not None and value < min_val:
873
- issues.append(ValidationIssue(
874
- category=ValidationCategory.CONSTRAINT,
875
- severity=ValidationSeverity.WARNING,
876
- message=f"Tool '{tool_name}' setting '{setting_key}' value {value} is below minimum {min_val}",
877
- actual_value=value,
878
- expected_value=min_val,
879
- auto_fixable=True
880
- ))
881
-
882
- if max_val is not None and value > max_val:
883
- issues.append(ValidationIssue(
884
- category=ValidationCategory.CONSTRAINT,
885
- severity=ValidationSeverity.WARNING,
886
- message=f"Tool '{tool_name}' setting '{setting_key}' value {value} is above maximum {max_val}",
887
- actual_value=value,
888
- expected_value=max_val,
889
- auto_fixable=True
890
- ))
891
-
892
- # Length validation for strings
893
- if isinstance(value, str):
894
- max_length = rule.get('max_length')
895
- if max_length and len(value) > max_length:
896
- issues.append(ValidationIssue(
897
- category=ValidationCategory.CONSTRAINT,
898
- severity=ValidationSeverity.WARNING,
899
- message=f"Tool '{tool_name}' setting '{setting_key}' value is too long: {len(value)} > {max_length}",
900
- actual_value=len(value),
901
- expected_value=max_length,
902
- auto_fixable=True
903
- ))
904
-
905
- return issues
906
-
907
- def _validate_tab_array(self, tab_type: str, tab_array: List[str]) -> List[ValidationIssue]:
908
- """Validate tab array structure."""
909
- issues = []
910
-
911
- if not isinstance(tab_array, list):
912
- issues.append(ValidationIssue(
913
- category=ValidationCategory.DATA_TYPE,
914
- severity=ValidationSeverity.ERROR,
915
- message=f"{tab_type} is not an array",
916
- actual_value=type(tab_array).__name__,
917
- expected_value="list",
918
- auto_fixable=True
919
- ))
920
- return issues
921
-
922
- if len(tab_array) != 7:
923
- issues.append(ValidationIssue(
924
- category=ValidationCategory.CONSTRAINT,
925
- severity=ValidationSeverity.ERROR,
926
- message=f"{tab_type} should have 7 elements, found {len(tab_array)}",
927
- actual_value=len(tab_array),
928
- expected_value=7,
929
- auto_fixable=True
930
- ))
931
-
932
- # Validate each tab content
933
- for i, content in enumerate(tab_array):
934
- if not isinstance(content, str):
935
- issues.append(ValidationIssue(
936
- category=ValidationCategory.DATA_TYPE,
937
- severity=ValidationSeverity.WARNING,
938
- message=f"{tab_type}[{i}] is not a string",
939
- actual_value=type(content).__name__,
940
- expected_value="string",
941
- auto_fixable=True
942
- ))
943
-
944
- return issues
945
-
946
- # Additional helper methods would continue here...
947
- # (Implementing remaining validation methods for completeness)
948
-
949
- def _check_null_values(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
950
- """Check for NULL values in required fields."""
951
- issues = []
952
- # Implementation would check for NULL values in non-nullable columns
953
- return issues
954
-
955
- def _check_data_consistency(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
956
- """Check data consistency across tables."""
957
- issues = []
958
- # Implementation would check for data consistency
959
- return issues
960
-
961
- def _check_referential_integrity(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
962
- """Check referential integrity."""
963
- issues = []
964
- # Implementation would check referential integrity
965
- return issues
966
-
967
- def _check_unique_constraints(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
968
- """Check unique constraints."""
969
- issues = []
970
- # Implementation would check unique constraints
971
- return issues
972
-
973
- def _check_check_constraints(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
974
- """Check check constraints."""
975
- issues = []
976
- # Implementation would check check constraints
977
- return issues
978
-
979
- def _detect_orphaned_records(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
980
- """Detect orphaned records."""
981
- issues = []
982
- # Implementation would detect orphaned records
983
- return issues
984
-
985
- def _detect_duplicate_records(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
986
- """Detect duplicate records."""
987
- issues = []
988
- # Implementation would detect duplicates
989
- return issues
990
-
991
- def _detect_invalid_json(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
992
- """Detect invalid JSON data."""
993
- issues = []
994
- # Implementation would detect invalid JSON
995
- return issues
996
-
997
- def _detect_encoding_issues(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
998
- """Detect encoding issues."""
999
- issues = []
1000
- # Implementation would detect encoding issues
1001
- return issues
1002
-
1003
- def _detect_truncated_data(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
1004
- """Detect truncated data."""
1005
- issues = []
1006
- # Implementation would detect truncated data
1007
- return issues
1008
-
1009
- def _detect_encoding_corruption(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
1010
- """Detect encoding corruption."""
1011
- issues = []
1012
- # Implementation would detect encoding corruption
1013
- return issues
1014
-
1015
- def _validate_individual_tool_settings(self, tool_name: str, settings: Dict[str, Any]) -> List[ValidationIssue]:
1016
- """Validate individual tool settings."""
1017
- issues = []
1018
- # Implementation would validate individual tool settings
1019
- return issues
1020
-
1021
- def _apply_automatic_fixes(self, issues: List[ValidationIssue]) -> None:
1022
- """Apply automatic fixes to validation issues."""
1023
- # Implementation would apply automatic fixes
1024
- pass
1025
-
1026
- def _repair_issue(self, conn: sqlite3.Connection, issue: ValidationIssue) -> bool:
1027
- """Repair a specific validation issue."""
1028
- # Implementation would repair specific issues
1029
- return False
1030
-
1031
- def _log_validation_summary(self, issues: List[ValidationIssue]) -> None:
1032
- """Log validation summary."""
1033
- if not issues:
1034
- self.logger.info("Database validation completed - no issues found")
1035
- return
1036
-
1037
- severity_counts = {}
1038
- for issue in issues:
1039
- severity_counts[issue.severity] = severity_counts.get(issue.severity, 0) + 1
1040
-
1041
- summary = f"Database validation completed - {len(issues)} issues found: "
1042
- summary += ", ".join([f"{count} {severity.value}" for severity, count in severity_counts.items()])
1043
-
1044
- if any(issue.severity == ValidationSeverity.CRITICAL for issue in issues):
1045
- self.logger.error(summary)
1046
- elif any(issue.severity == ValidationSeverity.ERROR for issue in issues):
1047
- self.logger.warning(summary)
1048
- else:
1049
- self.logger.info(summary)
1050
-
1051
- def _generate_recommendations(self, issues: List[ValidationIssue]) -> List[str]:
1052
- """Generate recommendations based on validation issues."""
1053
- recommendations = []
1054
-
1055
- critical_count = len([i for i in issues if i.severity == ValidationSeverity.CRITICAL])
1056
- if critical_count > 0:
1057
- recommendations.append(f"Address {critical_count} critical issues immediately")
1058
-
1059
- auto_fixable_count = len([i for i in issues if i.auto_fixable and not i.fix_applied])
1060
- if auto_fixable_count > 0:
1061
- recommendations.append(f"Run automatic repair for {auto_fixable_count} fixable issues")
1062
-
1063
- corruption_count = len([i for i in issues if i.category == ValidationCategory.CORRUPTION])
1064
- if corruption_count > 0:
1065
- recommendations.append("Consider restoring from backup due to corruption")
1066
-
1
+ """
2
+ Data Validation and Corruption Detection for Settings Database
3
+
4
+ This module provides comprehensive data validation and corruption detection
5
+ for the settings database system. It ensures data integrity and detects
6
+ various types of corruption or invalid data.
7
+
8
+ Features:
9
+ - Schema validation and integrity checks
10
+ - Data type validation and conversion
11
+ - Corruption detection algorithms
12
+ - Automatic data repair procedures
13
+ - Validation reporting and logging
14
+ """
15
+
16
+ import json
17
+ import sqlite3
18
+ import logging
19
+ import re
20
+ from typing import Dict, List, Tuple, Any, Optional, Union, Set
21
+ from datetime import datetime
22
+ from dataclasses import dataclass
23
+ from enum import Enum
24
+
25
+
26
+ class ValidationSeverity(Enum):
27
+ """Severity levels for validation issues."""
28
+ INFO = "info"
29
+ WARNING = "warning"
30
+ ERROR = "error"
31
+ CRITICAL = "critical"
32
+
33
+
34
+ class ValidationCategory(Enum):
35
+ """Categories of validation issues."""
36
+ SCHEMA = "schema"
37
+ DATA_TYPE = "data_type"
38
+ DATA_INTEGRITY = "data_integrity"
39
+ FOREIGN_KEY = "foreign_key"
40
+ CONSTRAINT = "constraint"
41
+ CORRUPTION = "corruption"
42
+ MISSING_DATA = "missing_data"
43
+ INVALID_FORMAT = "invalid_format"
44
+
45
+
46
+ @dataclass
47
+ class ValidationIssue:
48
+ """Information about a validation issue."""
49
+ category: ValidationCategory
50
+ severity: ValidationSeverity
51
+ message: str
52
+ table: Optional[str] = None
53
+ column: Optional[str] = None
54
+ row_id: Optional[int] = None
55
+ expected_value: Optional[Any] = None
56
+ actual_value: Optional[Any] = None
57
+ auto_fixable: bool = False
58
+ fix_applied: bool = False
59
+
60
+
61
+ class DataValidator:
62
+ """
63
+ Comprehensive data validator for the settings database system.
64
+
65
+ Provides validation, corruption detection, and automatic repair
66
+ capabilities to ensure data integrity.
67
+ """
68
+
69
+ def __init__(self, connection_manager, schema_manager):
70
+ """
71
+ Initialize the data validator.
72
+
73
+ Args:
74
+ connection_manager: Database connection manager
75
+ schema_manager: Database schema manager
76
+ """
77
+ self.connection_manager = connection_manager
78
+ self.schema_manager = schema_manager
79
+ self.logger = logging.getLogger(__name__)
80
+
81
+ # Validation configuration
82
+ self.auto_fix_enabled = True
83
+ self.strict_validation = False
84
+
85
+ # Validation rules
86
+ self._validation_rules = self._initialize_validation_rules()
87
+
88
+ # Data type validators
89
+ self._type_validators = {
90
+ 'str': self._validate_string,
91
+ 'int': self._validate_integer,
92
+ 'float': self._validate_float,
93
+ 'bool': self._validate_boolean,
94
+ 'json': self._validate_json,
95
+ 'array': self._validate_array
96
+ }
97
+
98
+ # Expected data patterns
99
+ self._data_patterns = {
100
+ 'export_path': r'^[a-zA-Z]:\\.*|^/.*|^~.*', # Windows/Unix paths
101
+ 'debug_level': r'^(DEBUG|INFO|WARNING|ERROR|CRITICAL)$',
102
+ 'selected_tool': r'^.+$', # Non-empty string
103
+ 'active_input_tab': r'^\d+$', # Non-negative integer
104
+ 'active_output_tab': r'^\d+$', # Non-negative integer
105
+ }
106
+
107
+ # Critical settings that must exist
108
+ self._critical_settings = {
109
+ 'export_path', 'debug_level', 'selected_tool',
110
+ 'active_input_tab', 'active_output_tab'
111
+ }
112
+
113
+ # Tool settings validation rules
114
+ self._tool_validation_rules = {
115
+ 'cURL Tool': {
116
+ 'default_timeout': {'type': int, 'min': 1, 'max': 3600},
117
+ 'follow_redirects': {'type': bool},
118
+ 'verify_ssl': {'type': bool},
119
+ 'max_redirects': {'type': int, 'min': 0, 'max': 50},
120
+ 'user_agent': {'type': str, 'max_length': 200},
121
+ 'save_history': {'type': bool},
122
+ 'max_history_items': {'type': int, 'min': 0, 'max': 10000}
123
+ },
124
+ 'JSON/XML Tool': {
125
+ 'json_indent': {'type': int, 'min': 0, 'max': 10},
126
+ 'xml_indent': {'type': int, 'min': 0, 'max': 10},
127
+ 'preserve_attributes': {'type': bool},
128
+ 'sort_keys': {'type': bool}
129
+ }
130
+ }
131
+
132
+ def validate_database(self, fix_issues: bool = None) -> List[ValidationIssue]:
133
+ """
134
+ Perform comprehensive database validation.
135
+
136
+ Args:
137
+ fix_issues: Whether to automatically fix issues (None = use default)
138
+
139
+ Returns:
140
+ List of validation issues found
141
+ """
142
+ if fix_issues is None:
143
+ fix_issues = self.auto_fix_enabled
144
+
145
+ issues = []
146
+
147
+ try:
148
+ self.logger.info("Starting comprehensive database validation")
149
+
150
+ # Schema validation
151
+ issues.extend(self._validate_schema())
152
+
153
+ # Data integrity validation
154
+ issues.extend(self._validate_data_integrity())
155
+
156
+ # Data type validation
157
+ issues.extend(self._validate_data_types())
158
+
159
+ # Foreign key validation
160
+ issues.extend(self._validate_foreign_keys())
161
+
162
+ # Constraint validation
163
+ issues.extend(self._validate_constraints())
164
+
165
+ # Corruption detection
166
+ issues.extend(self._detect_corruption())
167
+
168
+ # Critical data validation
169
+ issues.extend(self._validate_critical_data())
170
+
171
+ # Tool settings validation
172
+ issues.extend(self._validate_tool_settings())
173
+
174
+ # Apply fixes if enabled
175
+ if fix_issues:
176
+ self._apply_automatic_fixes(issues)
177
+
178
+ # Log summary
179
+ self._log_validation_summary(issues)
180
+
181
+ return issues
182
+
183
+ except Exception as e:
184
+ self.logger.error(f"Database validation failed: {e}")
185
+ return [ValidationIssue(
186
+ category=ValidationCategory.CORRUPTION,
187
+ severity=ValidationSeverity.CRITICAL,
188
+ message=f"Validation process failed: {e}"
189
+ )]
190
+
191
+ def validate_settings_data(self, settings_data: Dict[str, Any]) -> List[ValidationIssue]:
192
+ """
193
+ Validate settings data structure before database insertion.
194
+
195
+ Args:
196
+ settings_data: Settings data to validate
197
+
198
+ Returns:
199
+ List of validation issues
200
+ """
201
+ issues = []
202
+
203
+ try:
204
+ # Validate required top-level keys
205
+ required_keys = ['export_path', 'debug_level', 'selected_tool']
206
+ for key in required_keys:
207
+ if key not in settings_data:
208
+ issues.append(ValidationIssue(
209
+ category=ValidationCategory.MISSING_DATA,
210
+ severity=ValidationSeverity.ERROR,
211
+ message=f"Required setting '{key}' is missing",
212
+ auto_fixable=True
213
+ ))
214
+
215
+ # Validate data types and formats
216
+ for key, value in settings_data.items():
217
+ validation_issues = self._validate_setting_value(key, value)
218
+ issues.extend(validation_issues)
219
+
220
+ # Validate tool settings structure
221
+ if 'tool_settings' in settings_data:
222
+ tool_issues = self._validate_tool_settings_structure(
223
+ settings_data['tool_settings']
224
+ )
225
+ issues.extend(tool_issues)
226
+
227
+ # Validate tab arrays
228
+ for tab_type in ['input_tabs', 'output_tabs']:
229
+ if tab_type in settings_data:
230
+ tab_issues = self._validate_tab_array(
231
+ tab_type, settings_data[tab_type]
232
+ )
233
+ issues.extend(tab_issues)
234
+
235
+ return issues
236
+
237
+ except Exception as e:
238
+ self.logger.error(f"Settings data validation failed: {e}")
239
+ return [ValidationIssue(
240
+ category=ValidationCategory.CORRUPTION,
241
+ severity=ValidationSeverity.CRITICAL,
242
+ message=f"Settings validation failed: {e}"
243
+ )]
244
+
245
+ def detect_data_corruption(self) -> List[ValidationIssue]:
246
+ """
247
+ Detect various types of data corruption in the database.
248
+
249
+ Returns:
250
+ List of corruption issues found
251
+ """
252
+ issues = []
253
+
254
+ try:
255
+ conn = self.connection_manager.get_connection()
256
+
257
+ # SQLite integrity check
258
+ cursor = conn.execute("PRAGMA integrity_check")
259
+ integrity_result = cursor.fetchone()[0]
260
+
261
+ if integrity_result != "ok":
262
+ issues.append(ValidationIssue(
263
+ category=ValidationCategory.CORRUPTION,
264
+ severity=ValidationSeverity.CRITICAL,
265
+ message=f"SQLite integrity check failed: {integrity_result}"
266
+ ))
267
+
268
+ # Check for orphaned records
269
+ orphaned_issues = self._detect_orphaned_records(conn)
270
+ issues.extend(orphaned_issues)
271
+
272
+ # Check for duplicate records
273
+ duplicate_issues = self._detect_duplicate_records(conn)
274
+ issues.extend(duplicate_issues)
275
+
276
+ # Check for invalid JSON data
277
+ json_issues = self._detect_invalid_json(conn)
278
+ issues.extend(json_issues)
279
+
280
+ # Check for encoding issues
281
+ encoding_issues = self._detect_encoding_issues(conn)
282
+ issues.extend(encoding_issues)
283
+
284
+ return issues
285
+
286
+ except Exception as e:
287
+ self.logger.error(f"Corruption detection failed: {e}")
288
+ return [ValidationIssue(
289
+ category=ValidationCategory.CORRUPTION,
290
+ severity=ValidationSeverity.CRITICAL,
291
+ message=f"Corruption detection failed: {e}"
292
+ )]
293
+
294
+ def repair_data_corruption(self, issues: List[ValidationIssue]) -> bool:
295
+ """
296
+ Attempt to repair data corruption issues.
297
+
298
+ Args:
299
+ issues: List of validation issues to repair
300
+
301
+ Returns:
302
+ True if all repairs successful
303
+ """
304
+ try:
305
+ repaired_count = 0
306
+
307
+ with self.connection_manager.transaction() as conn:
308
+ for issue in issues:
309
+ if issue.auto_fixable and not issue.fix_applied:
310
+ success = self._repair_issue(conn, issue)
311
+ if success:
312
+ issue.fix_applied = True
313
+ repaired_count += 1
314
+
315
+ self.logger.info(f"Repaired {repaired_count} data corruption issues")
316
+ return repaired_count == len([i for i in issues if i.auto_fixable])
317
+
318
+ except Exception as e:
319
+ self.logger.error(f"Data corruption repair failed: {e}")
320
+ return False
321
+
322
+ def get_validation_report(self, issues: List[ValidationIssue]) -> Dict[str, Any]:
323
+ """
324
+ Generate a comprehensive validation report.
325
+
326
+ Args:
327
+ issues: List of validation issues
328
+
329
+ Returns:
330
+ Validation report dictionary
331
+ """
332
+ report = {
333
+ 'timestamp': datetime.now().isoformat(),
334
+ 'total_issues': len(issues),
335
+ 'issues_by_severity': {},
336
+ 'issues_by_category': {},
337
+ 'auto_fixable_issues': 0,
338
+ 'fixed_issues': 0,
339
+ 'critical_issues': [],
340
+ 'recommendations': []
341
+ }
342
+
343
+ # Count by severity
344
+ for severity in ValidationSeverity:
345
+ count = len([i for i in issues if i.severity == severity])
346
+ report['issues_by_severity'][severity.value] = count
347
+
348
+ # Count by category
349
+ for category in ValidationCategory:
350
+ count = len([i for i in issues if i.category == category])
351
+ report['issues_by_category'][category.value] = count
352
+
353
+ # Count fixable and fixed issues
354
+ report['auto_fixable_issues'] = len([i for i in issues if i.auto_fixable])
355
+ report['fixed_issues'] = len([i for i in issues if i.fix_applied])
356
+
357
+ # Critical issues details
358
+ critical_issues = [i for i in issues if i.severity == ValidationSeverity.CRITICAL]
359
+ report['critical_issues'] = [
360
+ {
361
+ 'category': issue.category.value,
362
+ 'message': issue.message,
363
+ 'table': issue.table,
364
+ 'auto_fixable': issue.auto_fixable,
365
+ 'fix_applied': issue.fix_applied
366
+ }
367
+ for issue in critical_issues
368
+ ]
369
+
370
+ # Generate recommendations
371
+ report['recommendations'] = self._generate_recommendations(issues)
372
+
373
+ return report
374
+
375
+ # Private validation methods
376
+
377
+ def _initialize_validation_rules(self) -> Dict[str, Any]:
378
+ """Initialize validation rules for different data types and tables."""
379
+ return {
380
+ 'core_settings': {
381
+ 'required_columns': ['key', 'value', 'data_type'],
382
+ 'key_constraints': {
383
+ 'max_length': 100,
384
+ 'pattern': r'^[a-zA-Z_][a-zA-Z0-9_]*$'
385
+ },
386
+ 'value_constraints': {
387
+ 'max_length': 10000
388
+ }
389
+ },
390
+ 'tool_settings': {
391
+ 'required_columns': ['tool_name', 'setting_path', 'setting_value', 'data_type'],
392
+ 'tool_name_constraints': {
393
+ 'max_length': 100,
394
+ 'pattern': r'^.+$' # Non-empty
395
+ },
396
+ 'setting_path_constraints': {
397
+ 'max_length': 200,
398
+ 'pattern': r'^[a-zA-Z_][a-zA-Z0-9_\.]*$'
399
+ }
400
+ },
401
+ 'tab_content': {
402
+ 'required_columns': ['tab_type', 'tab_index', 'content'],
403
+ 'tab_type_values': ['input', 'output'],
404
+ 'tab_index_range': (0, 6) # 0-6 for 7 tabs
405
+ }
406
+ }
407
+
408
+ def _validate_schema(self) -> List[ValidationIssue]:
409
+ """Validate database schema structure."""
410
+ issues = []
411
+
412
+ try:
413
+ conn = self.connection_manager.get_connection()
414
+
415
+ # Check table existence
416
+ cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
417
+ existing_tables = {row[0] for row in cursor.fetchall()}
418
+
419
+ required_tables = {
420
+ 'core_settings', 'tool_settings', 'tab_content',
421
+ 'performance_settings', 'font_settings', 'dialog_settings',
422
+ 'settings_metadata'
423
+ }
424
+
425
+ missing_tables = required_tables - existing_tables
426
+ for table in missing_tables:
427
+ issues.append(ValidationIssue(
428
+ category=ValidationCategory.SCHEMA,
429
+ severity=ValidationSeverity.CRITICAL,
430
+ message=f"Required table '{table}' is missing",
431
+ table=table,
432
+ auto_fixable=True
433
+ ))
434
+
435
+ # Validate table schemas
436
+ for table in existing_tables & required_tables:
437
+ table_issues = self._validate_table_schema(conn, table)
438
+ issues.extend(table_issues)
439
+
440
+ return issues
441
+
442
+ except Exception as e:
443
+ return [ValidationIssue(
444
+ category=ValidationCategory.SCHEMA,
445
+ severity=ValidationSeverity.CRITICAL,
446
+ message=f"Schema validation failed: {e}"
447
+ )]
448
+
449
+ def _validate_table_schema(self, conn: sqlite3.Connection, table_name: str) -> List[ValidationIssue]:
450
+ """Validate individual table schema."""
451
+ issues = []
452
+
453
+ try:
454
+ # Get table info
455
+ cursor = conn.execute(f"PRAGMA table_info({table_name})")
456
+ columns = {row[1]: row[2] for row in cursor.fetchall()} # name: type
457
+
458
+ # Check required columns
459
+ rules = self._validation_rules.get(table_name, {})
460
+ required_columns = rules.get('required_columns', [])
461
+
462
+ for column in required_columns:
463
+ if column not in columns:
464
+ issues.append(ValidationIssue(
465
+ category=ValidationCategory.SCHEMA,
466
+ severity=ValidationSeverity.ERROR,
467
+ message=f"Required column '{column}' missing in table '{table_name}'",
468
+ table=table_name,
469
+ column=column,
470
+ auto_fixable=True
471
+ ))
472
+
473
+ return issues
474
+
475
+ except Exception as e:
476
+ return [ValidationIssue(
477
+ category=ValidationCategory.SCHEMA,
478
+ severity=ValidationSeverity.ERROR,
479
+ message=f"Table schema validation failed for '{table_name}': {e}",
480
+ table=table_name
481
+ )]
482
+
483
+ def _validate_data_integrity(self) -> List[ValidationIssue]:
484
+ """Validate data integrity across tables."""
485
+ issues = []
486
+
487
+ try:
488
+ conn = self.connection_manager.get_connection()
489
+
490
+ # Check for NULL values in required fields
491
+ null_issues = self._check_null_values(conn)
492
+ issues.extend(null_issues)
493
+
494
+ # Check data consistency
495
+ consistency_issues = self._check_data_consistency(conn)
496
+ issues.extend(consistency_issues)
497
+
498
+ # Check referential integrity
499
+ referential_issues = self._check_referential_integrity(conn)
500
+ issues.extend(referential_issues)
501
+
502
+ return issues
503
+
504
+ except Exception as e:
505
+ return [ValidationIssue(
506
+ category=ValidationCategory.DATA_INTEGRITY,
507
+ severity=ValidationSeverity.ERROR,
508
+ message=f"Data integrity validation failed: {e}"
509
+ )]
510
+
511
+ def _validate_data_types(self) -> List[ValidationIssue]:
512
+ """Validate data types in all tables."""
513
+ issues = []
514
+
515
+ try:
516
+ conn = self.connection_manager.get_connection()
517
+
518
+ # Validate core settings data types
519
+ cursor = conn.execute("SELECT key, value, data_type FROM core_settings")
520
+ for key, value, data_type in cursor.fetchall():
521
+ type_issues = self._validate_data_type(key, value, data_type, 'core_settings')
522
+ issues.extend(type_issues)
523
+
524
+ # Validate tool settings data types
525
+ cursor = conn.execute("SELECT tool_name, setting_path, setting_value, data_type FROM tool_settings")
526
+ for tool_name, setting_path, setting_value, data_type in cursor.fetchall():
527
+ type_issues = self._validate_data_type(
528
+ f"{tool_name}.{setting_path}", setting_value, data_type, 'tool_settings'
529
+ )
530
+ issues.extend(type_issues)
531
+
532
+ return issues
533
+
534
+ except Exception as e:
535
+ return [ValidationIssue(
536
+ category=ValidationCategory.DATA_TYPE,
537
+ severity=ValidationSeverity.ERROR,
538
+ message=f"Data type validation failed: {e}"
539
+ )]
540
+
541
+ def _validate_data_type(self, key: str, value: str, data_type: str, table: str) -> List[ValidationIssue]:
542
+ """Validate a specific data type."""
543
+ issues = []
544
+
545
+ try:
546
+ validator = self._type_validators.get(data_type)
547
+ if validator:
548
+ is_valid, error_msg = validator(value)
549
+ if not is_valid:
550
+ issues.append(ValidationIssue(
551
+ category=ValidationCategory.DATA_TYPE,
552
+ severity=ValidationSeverity.WARNING,
553
+ message=f"Invalid {data_type} value for '{key}': {error_msg}",
554
+ table=table,
555
+ actual_value=value,
556
+ auto_fixable=True
557
+ ))
558
+ else:
559
+ issues.append(ValidationIssue(
560
+ category=ValidationCategory.DATA_TYPE,
561
+ severity=ValidationSeverity.WARNING,
562
+ message=f"Unknown data type '{data_type}' for '{key}'",
563
+ table=table,
564
+ actual_value=data_type
565
+ ))
566
+
567
+ return issues
568
+
569
+ except Exception as e:
570
+ return [ValidationIssue(
571
+ category=ValidationCategory.DATA_TYPE,
572
+ severity=ValidationSeverity.ERROR,
573
+ message=f"Data type validation failed for '{key}': {e}",
574
+ table=table
575
+ )]
576
+
577
+ def _validate_foreign_keys(self) -> List[ValidationIssue]:
578
+ """Validate foreign key constraints."""
579
+ issues = []
580
+
581
+ try:
582
+ conn = self.connection_manager.get_connection()
583
+
584
+ # Check foreign key violations
585
+ cursor = conn.execute("PRAGMA foreign_key_check")
586
+ violations = cursor.fetchall()
587
+
588
+ for violation in violations:
589
+ issues.append(ValidationIssue(
590
+ category=ValidationCategory.FOREIGN_KEY,
591
+ severity=ValidationSeverity.ERROR,
592
+ message=f"Foreign key violation: {violation}",
593
+ auto_fixable=False
594
+ ))
595
+
596
+ return issues
597
+
598
+ except Exception as e:
599
+ return [ValidationIssue(
600
+ category=ValidationCategory.FOREIGN_KEY,
601
+ severity=ValidationSeverity.ERROR,
602
+ message=f"Foreign key validation failed: {e}"
603
+ )]
604
+
605
+ def _validate_constraints(self) -> List[ValidationIssue]:
606
+ """Validate database constraints."""
607
+ issues = []
608
+
609
+ try:
610
+ conn = self.connection_manager.get_connection()
611
+
612
+ # Check unique constraints
613
+ unique_issues = self._check_unique_constraints(conn)
614
+ issues.extend(unique_issues)
615
+
616
+ # Check check constraints (if any)
617
+ check_issues = self._check_check_constraints(conn)
618
+ issues.extend(check_issues)
619
+
620
+ return issues
621
+
622
+ except Exception as e:
623
+ return [ValidationIssue(
624
+ category=ValidationCategory.CONSTRAINT,
625
+ severity=ValidationSeverity.ERROR,
626
+ message=f"Constraint validation failed: {e}"
627
+ )]
628
+
629
+ def _detect_corruption(self) -> List[ValidationIssue]:
630
+ """Detect various types of data corruption."""
631
+ issues = []
632
+
633
+ try:
634
+ conn = self.connection_manager.get_connection()
635
+
636
+ # SQLite integrity check
637
+ cursor = conn.execute("PRAGMA integrity_check")
638
+ result = cursor.fetchone()[0]
639
+
640
+ if result != "ok":
641
+ issues.append(ValidationIssue(
642
+ category=ValidationCategory.CORRUPTION,
643
+ severity=ValidationSeverity.CRITICAL,
644
+ message=f"Database corruption detected: {result}",
645
+ auto_fixable=False
646
+ ))
647
+
648
+ # Check for truncated data
649
+ truncation_issues = self._detect_truncated_data(conn)
650
+ issues.extend(truncation_issues)
651
+
652
+ # Check for encoding corruption
653
+ encoding_issues = self._detect_encoding_corruption(conn)
654
+ issues.extend(encoding_issues)
655
+
656
+ return issues
657
+
658
+ except Exception as e:
659
+ return [ValidationIssue(
660
+ category=ValidationCategory.CORRUPTION,
661
+ severity=ValidationSeverity.CRITICAL,
662
+ message=f"Corruption detection failed: {e}"
663
+ )]
664
+
665
+ def _validate_critical_data(self) -> List[ValidationIssue]:
666
+ """Validate critical settings that must exist."""
667
+ issues = []
668
+
669
+ try:
670
+ conn = self.connection_manager.get_connection()
671
+
672
+ # Check critical core settings
673
+ cursor = conn.execute("SELECT key FROM core_settings")
674
+ existing_keys = {row[0] for row in cursor.fetchall()}
675
+
676
+ missing_critical = self._critical_settings - existing_keys
677
+ for key in missing_critical:
678
+ issues.append(ValidationIssue(
679
+ category=ValidationCategory.MISSING_DATA,
680
+ severity=ValidationSeverity.CRITICAL,
681
+ message=f"Critical setting '{key}' is missing",
682
+ table='core_settings',
683
+ auto_fixable=True
684
+ ))
685
+
686
+ # Validate tab content completeness
687
+ cursor = conn.execute("SELECT tab_type, COUNT(*) FROM tab_content GROUP BY tab_type")
688
+ tab_counts = dict(cursor.fetchall())
689
+
690
+ for tab_type in ['input', 'output']:
691
+ count = tab_counts.get(tab_type, 0)
692
+ if count != 7: # Should have 7 tabs
693
+ issues.append(ValidationIssue(
694
+ category=ValidationCategory.MISSING_DATA,
695
+ severity=ValidationSeverity.ERROR,
696
+ message=f"Incomplete {tab_type} tabs: expected 7, found {count}",
697
+ table='tab_content',
698
+ auto_fixable=True
699
+ ))
700
+
701
+ return issues
702
+
703
+ except Exception as e:
704
+ return [ValidationIssue(
705
+ category=ValidationCategory.MISSING_DATA,
706
+ severity=ValidationSeverity.ERROR,
707
+ message=f"Critical data validation failed: {e}"
708
+ )]
709
+
710
+ def _validate_tool_settings(self) -> List[ValidationIssue]:
711
+ """Validate tool-specific settings."""
712
+ issues = []
713
+
714
+ try:
715
+ conn = self.connection_manager.get_connection()
716
+
717
+ # Get all tool settings
718
+ cursor = conn.execute("""
719
+ SELECT tool_name, setting_path, setting_value, data_type
720
+ FROM tool_settings
721
+ ORDER BY tool_name, setting_path
722
+ """)
723
+
724
+ tool_settings = {}
725
+ for tool_name, setting_path, setting_value, data_type in cursor.fetchall():
726
+ if tool_name not in tool_settings:
727
+ tool_settings[tool_name] = {}
728
+ tool_settings[tool_name][setting_path] = {
729
+ 'value': setting_value,
730
+ 'type': data_type
731
+ }
732
+
733
+ # Validate each tool's settings
734
+ for tool_name, settings in tool_settings.items():
735
+ tool_issues = self._validate_individual_tool_settings(tool_name, settings)
736
+ issues.extend(tool_issues)
737
+
738
+ return issues
739
+
740
+ except Exception as e:
741
+ return [ValidationIssue(
742
+ category=ValidationCategory.DATA_INTEGRITY,
743
+ severity=ValidationSeverity.ERROR,
744
+ message=f"Tool settings validation failed: {e}"
745
+ )]
746
+
747
+ # Type validators
748
+
749
+ def _validate_string(self, value: str) -> Tuple[bool, str]:
750
+ """Validate string value."""
751
+ try:
752
+ if not isinstance(value, str):
753
+ return False, f"Expected string, got {type(value).__name__}"
754
+ return True, ""
755
+ except Exception as e:
756
+ return False, str(e)
757
+
758
+ def _validate_integer(self, value: str) -> Tuple[bool, str]:
759
+ """Validate integer value."""
760
+ try:
761
+ int(value)
762
+ return True, ""
763
+ except ValueError:
764
+ return False, f"Cannot convert '{value}' to integer"
765
+ except Exception as e:
766
+ return False, str(e)
767
+
768
+ def _validate_float(self, value: str) -> Tuple[bool, str]:
769
+ """Validate float value."""
770
+ try:
771
+ float(value)
772
+ return True, ""
773
+ except ValueError:
774
+ return False, f"Cannot convert '{value}' to float"
775
+ except Exception as e:
776
+ return False, str(e)
777
+
778
+ def _validate_boolean(self, value: str) -> Tuple[bool, str]:
779
+ """Validate boolean value."""
780
+ try:
781
+ if value.lower() in ('true', 'false', '1', '0'):
782
+ return True, ""
783
+ return False, f"Invalid boolean value: '{value}'"
784
+ except Exception as e:
785
+ return False, str(e)
786
+
787
+ def _validate_json(self, value: str) -> Tuple[bool, str]:
788
+ """Validate JSON value."""
789
+ try:
790
+ json.loads(value)
791
+ return True, ""
792
+ except json.JSONDecodeError as e:
793
+ return False, f"Invalid JSON: {e}"
794
+ except Exception as e:
795
+ return False, str(e)
796
+
797
+ def _validate_array(self, value: str) -> Tuple[bool, str]:
798
+ """Validate array value."""
799
+ try:
800
+ parsed = json.loads(value)
801
+ if not isinstance(parsed, list):
802
+ return False, f"Expected array, got {type(parsed).__name__}"
803
+ return True, ""
804
+ except json.JSONDecodeError as e:
805
+ return False, f"Invalid array JSON: {e}"
806
+ except Exception as e:
807
+ return False, str(e)
808
+
809
+ # Helper methods for specific validation checks
810
+
811
+ def _validate_setting_value(self, key: str, value: Any) -> List[ValidationIssue]:
812
+ """Validate a specific setting value."""
813
+ issues = []
814
+
815
+ # Check against patterns
816
+ if key in self._data_patterns:
817
+ pattern = self._data_patterns[key]
818
+ if isinstance(value, str) and not re.match(pattern, value):
819
+ issues.append(ValidationIssue(
820
+ category=ValidationCategory.INVALID_FORMAT,
821
+ severity=ValidationSeverity.WARNING,
822
+ message=f"Setting '{key}' value '{value}' doesn't match expected pattern",
823
+ actual_value=value,
824
+ auto_fixable=True
825
+ ))
826
+
827
+ return issues
828
+
829
+ def _validate_tool_settings_structure(self, tool_settings: Dict[str, Any]) -> List[ValidationIssue]:
830
+ """Validate tool settings structure."""
831
+ issues = []
832
+
833
+ for tool_name, tool_config in tool_settings.items():
834
+ if not isinstance(tool_config, dict):
835
+ continue
836
+
837
+ # Validate against tool-specific rules
838
+ if tool_name in self._tool_validation_rules:
839
+ rules = self._tool_validation_rules[tool_name]
840
+ for setting_key, rule in rules.items():
841
+ if setting_key in tool_config:
842
+ value = tool_config[setting_key]
843
+ validation_issues = self._validate_tool_setting_value(
844
+ tool_name, setting_key, value, rule
845
+ )
846
+ issues.extend(validation_issues)
847
+
848
+ return issues
849
+
850
+ def _validate_tool_setting_value(self, tool_name: str, setting_key: str,
851
+ value: Any, rule: Dict[str, Any]) -> List[ValidationIssue]:
852
+ """Validate individual tool setting value."""
853
+ issues = []
854
+
855
+ # Type validation
856
+ expected_type = rule.get('type')
857
+ if expected_type and not isinstance(value, expected_type):
858
+ issues.append(ValidationIssue(
859
+ category=ValidationCategory.DATA_TYPE,
860
+ severity=ValidationSeverity.WARNING,
861
+ message=f"Tool '{tool_name}' setting '{setting_key}' has wrong type: expected {expected_type.__name__}, got {type(value).__name__}",
862
+ actual_value=value,
863
+ expected_value=expected_type.__name__,
864
+ auto_fixable=True
865
+ ))
866
+
867
+ # Range validation for numeric types
868
+ if isinstance(value, (int, float)):
869
+ min_val = rule.get('min')
870
+ max_val = rule.get('max')
871
+
872
+ if min_val is not None and value < min_val:
873
+ issues.append(ValidationIssue(
874
+ category=ValidationCategory.CONSTRAINT,
875
+ severity=ValidationSeverity.WARNING,
876
+ message=f"Tool '{tool_name}' setting '{setting_key}' value {value} is below minimum {min_val}",
877
+ actual_value=value,
878
+ expected_value=min_val,
879
+ auto_fixable=True
880
+ ))
881
+
882
+ if max_val is not None and value > max_val:
883
+ issues.append(ValidationIssue(
884
+ category=ValidationCategory.CONSTRAINT,
885
+ severity=ValidationSeverity.WARNING,
886
+ message=f"Tool '{tool_name}' setting '{setting_key}' value {value} is above maximum {max_val}",
887
+ actual_value=value,
888
+ expected_value=max_val,
889
+ auto_fixable=True
890
+ ))
891
+
892
+ # Length validation for strings
893
+ if isinstance(value, str):
894
+ max_length = rule.get('max_length')
895
+ if max_length and len(value) > max_length:
896
+ issues.append(ValidationIssue(
897
+ category=ValidationCategory.CONSTRAINT,
898
+ severity=ValidationSeverity.WARNING,
899
+ message=f"Tool '{tool_name}' setting '{setting_key}' value is too long: {len(value)} > {max_length}",
900
+ actual_value=len(value),
901
+ expected_value=max_length,
902
+ auto_fixable=True
903
+ ))
904
+
905
+ return issues
906
+
907
+ def _validate_tab_array(self, tab_type: str, tab_array: List[str]) -> List[ValidationIssue]:
908
+ """Validate tab array structure."""
909
+ issues = []
910
+
911
+ if not isinstance(tab_array, list):
912
+ issues.append(ValidationIssue(
913
+ category=ValidationCategory.DATA_TYPE,
914
+ severity=ValidationSeverity.ERROR,
915
+ message=f"{tab_type} is not an array",
916
+ actual_value=type(tab_array).__name__,
917
+ expected_value="list",
918
+ auto_fixable=True
919
+ ))
920
+ return issues
921
+
922
+ if len(tab_array) != 7:
923
+ issues.append(ValidationIssue(
924
+ category=ValidationCategory.CONSTRAINT,
925
+ severity=ValidationSeverity.ERROR,
926
+ message=f"{tab_type} should have 7 elements, found {len(tab_array)}",
927
+ actual_value=len(tab_array),
928
+ expected_value=7,
929
+ auto_fixable=True
930
+ ))
931
+
932
+ # Validate each tab content
933
+ for i, content in enumerate(tab_array):
934
+ if not isinstance(content, str):
935
+ issues.append(ValidationIssue(
936
+ category=ValidationCategory.DATA_TYPE,
937
+ severity=ValidationSeverity.WARNING,
938
+ message=f"{tab_type}[{i}] is not a string",
939
+ actual_value=type(content).__name__,
940
+ expected_value="string",
941
+ auto_fixable=True
942
+ ))
943
+
944
+ return issues
945
+
946
+ # Additional helper methods would continue here...
947
+ # (Implementing remaining validation methods for completeness)
948
+
949
+ def _check_null_values(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
950
+ """Check for NULL values in required fields."""
951
+ issues = []
952
+ # Implementation would check for NULL values in non-nullable columns
953
+ return issues
954
+
955
+ def _check_data_consistency(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
956
+ """Check data consistency across tables."""
957
+ issues = []
958
+ # Implementation would check for data consistency
959
+ return issues
960
+
961
+ def _check_referential_integrity(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
962
+ """Check referential integrity."""
963
+ issues = []
964
+ # Implementation would check referential integrity
965
+ return issues
966
+
967
+ def _check_unique_constraints(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
968
+ """Check unique constraints."""
969
+ issues = []
970
+ # Implementation would check unique constraints
971
+ return issues
972
+
973
+ def _check_check_constraints(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
974
+ """Check check constraints."""
975
+ issues = []
976
+ # Implementation would check check constraints
977
+ return issues
978
+
979
+ def _detect_orphaned_records(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
980
+ """Detect orphaned records."""
981
+ issues = []
982
+ # Implementation would detect orphaned records
983
+ return issues
984
+
985
+ def _detect_duplicate_records(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
986
+ """Detect duplicate records."""
987
+ issues = []
988
+ # Implementation would detect duplicates
989
+ return issues
990
+
991
+ def _detect_invalid_json(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
992
+ """Detect invalid JSON data."""
993
+ issues = []
994
+ # Implementation would detect invalid JSON
995
+ return issues
996
+
997
+ def _detect_encoding_issues(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
998
+ """Detect encoding issues."""
999
+ issues = []
1000
+ # Implementation would detect encoding issues
1001
+ return issues
1002
+
1003
+ def _detect_truncated_data(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
1004
+ """Detect truncated data."""
1005
+ issues = []
1006
+ # Implementation would detect truncated data
1007
+ return issues
1008
+
1009
+ def _detect_encoding_corruption(self, conn: sqlite3.Connection) -> List[ValidationIssue]:
1010
+ """Detect encoding corruption."""
1011
+ issues = []
1012
+ # Implementation would detect encoding corruption
1013
+ return issues
1014
+
1015
+ def _validate_individual_tool_settings(self, tool_name: str, settings: Dict[str, Any]) -> List[ValidationIssue]:
1016
+ """Validate individual tool settings."""
1017
+ issues = []
1018
+ # Implementation would validate individual tool settings
1019
+ return issues
1020
+
1021
+ def _apply_automatic_fixes(self, issues: List[ValidationIssue]) -> None:
1022
+ """Apply automatic fixes to validation issues."""
1023
+ # Implementation would apply automatic fixes
1024
+ pass
1025
+
1026
+ def _repair_issue(self, conn: sqlite3.Connection, issue: ValidationIssue) -> bool:
1027
+ """Repair a specific validation issue."""
1028
+ # Implementation would repair specific issues
1029
+ return False
1030
+
1031
+ def _log_validation_summary(self, issues: List[ValidationIssue]) -> None:
1032
+ """Log validation summary."""
1033
+ if not issues:
1034
+ self.logger.info("Database validation completed - no issues found")
1035
+ return
1036
+
1037
+ severity_counts = {}
1038
+ for issue in issues:
1039
+ severity_counts[issue.severity] = severity_counts.get(issue.severity, 0) + 1
1040
+
1041
+ summary = f"Database validation completed - {len(issues)} issues found: "
1042
+ summary += ", ".join([f"{count} {severity.value}" for severity, count in severity_counts.items()])
1043
+
1044
+ if any(issue.severity == ValidationSeverity.CRITICAL for issue in issues):
1045
+ self.logger.error(summary)
1046
+ elif any(issue.severity == ValidationSeverity.ERROR for issue in issues):
1047
+ self.logger.warning(summary)
1048
+ else:
1049
+ self.logger.info(summary)
1050
+
1051
+ def _generate_recommendations(self, issues: List[ValidationIssue]) -> List[str]:
1052
+ """Generate recommendations based on validation issues."""
1053
+ recommendations = []
1054
+
1055
+ critical_count = len([i for i in issues if i.severity == ValidationSeverity.CRITICAL])
1056
+ if critical_count > 0:
1057
+ recommendations.append(f"Address {critical_count} critical issues immediately")
1058
+
1059
+ auto_fixable_count = len([i for i in issues if i.auto_fixable and not i.fix_applied])
1060
+ if auto_fixable_count > 0:
1061
+ recommendations.append(f"Run automatic repair for {auto_fixable_count} fixable issues")
1062
+
1063
+ corruption_count = len([i for i in issues if i.category == ValidationCategory.CORRUPTION])
1064
+ if corruption_count > 0:
1065
+ recommendations.append("Consider restoring from backup due to corruption")
1066
+
1067
1067
  return recommendations