@intentsolutionsio/data-validation-engine 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "data-validation-engine",
3
+ "version": "1.0.0",
4
+ "description": "Database plugin for data-validation-engine",
5
+ "author": {
6
+ "name": "Claude Code Plugins",
7
+ "email": "[email protected]"
8
+ },
9
+ "repository": "https://github.com/jeremylongshore/claude-code-plugins",
10
+ "license": "MIT",
11
+ "keywords": [
12
+ "database",
13
+ "backend",
14
+ "validation",
15
+ "agent-skills"
16
+ ]
17
+ }
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-2026 Jeremy Longshore & Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # Data Validation Engine Plugin
2
+
3
+ Database plugin for data-validation-engine
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ /plugin install data-validation-engine@claude-code-plugins-plus
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ /data-validation-engine
15
+ ```
16
+
17
+ ## Features
18
+
19
+ - Database best practices
20
+ - Multi-database support
21
+ - Production-ready implementations
22
+ - Comprehensive documentation
23
+
24
+ ## Requirements
25
+
26
+ - Database access
27
+ - Appropriate permissions
28
+
29
+ ## Files
30
+
31
+ - `commands/data-validation-engine.md` or `agents/data-validation-engine-agent.md` - Main plugin logic
32
+
33
+ ## License
34
+
35
+ MIT
@@ -0,0 +1,19 @@
1
+ ---
2
+ name: validation-agent
3
+ description: Implement data validation rules
4
+ ---
5
+ # Data Validation Engine
6
+
7
+ Implement comprehensive data validation at database and application levels.
8
+
9
+ ## Validation Types
10
+
11
+ 1. **Type Validation**: Correct data types
12
+ 2. **Range Validation**: Min/max values
13
+ 3. **Format Validation**: Regex patterns
14
+ 4. **Referential Integrity**: Foreign key validation
15
+ 5. **Business Rules**: Custom validation logic
16
+
17
+ ## When to Activate
18
+
19
+ Implement data validation for database integrity.
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "@intentsolutionsio/data-validation-engine",
3
+ "version": "1.0.0",
4
+ "description": "Database plugin for data-validation-engine",
5
+ "keywords": [
6
+ "database",
7
+ "backend",
8
+ "validation",
9
+ "agent-skills",
10
+ "claude-code",
11
+ "claude-plugin",
12
+ "tonsofskills"
13
+ ],
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "git+https://github.com/jeremylongshore/claude-code-plugins-plus-skills.git",
17
+ "directory": "plugins/database/data-validation-engine"
18
+ },
19
+ "homepage": "https://tonsofskills.com/plugins/data-validation-engine",
20
+ "bugs": "https://github.com/jeremylongshore/claude-code-plugins-plus-skills/issues",
21
+ "license": "MIT",
22
+ "author": {
23
+ "name": "Claude Code Plugins",
24
+ "email": "[email protected]"
25
+ },
26
+ "publishConfig": {
27
+ "access": "public"
28
+ },
29
+ "files": [
30
+ "README.md",
31
+ ".claude-plugin",
32
+ "skills",
33
+ "agents"
34
+ ],
35
+ "scripts": {
36
+ "postinstall": "node -e \"console.log(\\\"\\\\n→ This npm package is a tracking/proof artifact. Install the plugin via:\\\\n ccpi install data-validation-engine\\\\n or /plugin install data-validation-engine@claude-code-plugins-plus in Claude Code\\\\n\\\")\""
37
+ }
38
+ }
@@ -0,0 +1,100 @@
1
+ ---
2
+ name: validating-database-integrity
3
+ description: |
4
+ Process use when you need to ensure database integrity through comprehensive data validation.
5
+ This skill validates data types, ranges, formats, referential integrity, and business rules.
6
+ Trigger with phrases like "validate database data", "implement data validation rules",
7
+ "enforce data integrity constraints", or "validate data formats".
8
+
9
+ allowed-tools: Read, Write, Edit, Grep, Glob, Bash(psql:*), Bash(mysql:*)
10
+ version: 1.0.0
11
+ author: Jeremy Longshore <jeremy@intentsolutions.io>
12
+ license: MIT
13
+ compatible-with: claude-code, codex, openclaw
14
+ tags: [database, validating-database]
15
+ ---
16
+ # Data Validation Engine
17
+
18
+ ## Overview
19
+
20
+ Implement and enforce data integrity rules at the database level using CHECK constraints, triggers, foreign keys, and custom validation functions across PostgreSQL and MySQL.
21
+
22
+ ## Prerequisites
23
+
24
+ - Database credentials with ALTER TABLE and CREATE FUNCTION permissions
25
+ - `psql` or `mysql` CLI for executing validation queries
26
+ - Current schema documentation or access to `information_schema` for column specifications
27
+ - Business rules document describing valid data ranges, formats, and relationships
28
+ - Backup of production data before applying new constraints (constraints may reject existing invalid data)
29
+
30
+ ## Instructions
31
+
32
+ 1. Audit existing data quality by running validation queries before adding constraints. Check for NULL values in columns that should be required: `SELECT column_name, COUNT(*) FILTER (WHERE column_name IS NULL) AS null_count, COUNT(*) AS total FROM table_name GROUP BY column_name`.
33
+
34
+ 2. Detect orphaned records (broken referential integrity): `SELECT c.id FROM child_table c LEFT JOIN parent_table p ON c.parent_id = p.id WHERE p.id IS NULL`. Document all orphaned records for cleanup or archival before adding foreign key constraints.
35
+
36
+ 3. Validate data format compliance:
37
+ - Email format: `SELECT email FROM users WHERE email !~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'`
38
+ - Phone format: `SELECT phone FROM contacts WHERE phone !~ '^\+?[1-9]\d{6,14}$'`
39
+ - URL format: `SELECT url FROM links WHERE url !~ '^https?://.+'`
40
+ - Date ranges: `SELECT * FROM events WHERE start_date > end_date`
41
+
42
+ 4. Check numeric range violations: `SELECT * FROM products WHERE price < 0 OR price > 999999.99` and `SELECT * FROM users WHERE age < 0 OR age > 150`. Map each column to its valid range based on business rules.
43
+
44
+ 5. Identify duplicate records that violate intended uniqueness: `SELECT email, COUNT(*) FROM users GROUP BY email HAVING COUNT(*) > 1`. Determine which duplicate to keep (most recent, most complete) and plan deduplication.
45
+
46
+ 6. Generate CHECK constraints for validated rules:
47
+ - `ALTER TABLE products ADD CONSTRAINT chk_price_positive CHECK (price >= 0)`
48
+ - `ALTER TABLE users ADD CONSTRAINT chk_email_format CHECK (email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')`
49
+ - `ALTER TABLE events ADD CONSTRAINT chk_date_order CHECK (start_date <= end_date)`
50
+ - `ALTER TABLE orders ADD CONSTRAINT chk_status_valid CHECK (status IN ('pending', 'processing', 'shipped', 'delivered', 'cancelled'))`
51
+
52
+ 7. Create foreign key constraints with appropriate cascade behavior:
53
+ - `ALTER TABLE orders ADD CONSTRAINT fk_orders_customer FOREIGN KEY (customer_id) REFERENCES customers(id) ON DELETE RESTRICT`
54
+ - Use `ON DELETE CASCADE` for dependent data (order_items when order is deleted)
55
+ - Use `ON DELETE SET NULL` for optional relationships (assigned_to when user is deactivated)
56
+
57
+ 8. Implement complex business rule validation using database triggers when CHECK constraints are insufficient:
58
+ - Trigger that prevents order total from exceeding customer credit limit
59
+ - Trigger that enforces at least one admin user per organization
60
+ - Trigger that validates JSON schema for JSONB columns
61
+
62
+ 9. Apply constraints in a safe two-phase approach:
63
+ - Phase 1: Run validation queries to find all violations. Generate data cleanup scripts. Execute cleanup.
64
+ - Phase 2: Apply constraints with `NOT VALID` option (PostgreSQL): `ALTER TABLE users ADD CONSTRAINT chk_email CHECK (email ~ '...') NOT VALID` then `ALTER TABLE users VALIDATE CONSTRAINT chk_email` (validates existing data without blocking writes).
65
+
66
+ 10. Generate a data quality report summarizing: total records per table, violation counts by constraint type, cleanup actions taken, constraints applied, and remaining data quality issues requiring manual review.
67
+
68
+ ## Output
69
+
70
+ - **Data quality audit report** with violation counts, examples, and severity ratings
71
+ - **Data cleanup scripts** (SQL) to fix violations before constraint application
72
+ - **Constraint DDL scripts** with CHECK, FOREIGN KEY, NOT NULL, and UNIQUE constraints
73
+ - **Validation triggers** for complex business rules beyond simple constraints
74
+ - **Ongoing validation queries** for periodic data quality monitoring
75
+
76
+ ## Error Handling
77
+
78
+ | Error | Cause | Solution |
79
+ |-------|-------|---------|
80
+ | `check constraint violated by existing row` | Existing data fails the new constraint | Run the validation query first to find violations; clean up data; use `NOT VALID` option to add constraint without checking existing data, then validate separately |
81
+ | `cannot add foreign key: referenced row not found` | Orphaned child records reference non-existent parent | Clean up orphaned records first with DELETE or UPDATE to valid parent; or insert missing parent records |
82
+ | `column cannot be made NOT NULL: contains NULL values` | Existing rows have NULL in the target column | Backfill NULLs with `UPDATE table SET column = default_value WHERE column IS NULL` before adding NOT NULL |
83
+ | Trigger function causes performance regression | Complex validation logic executes on every INSERT/UPDATE | Optimize trigger function; use WHEN clause to limit trigger firing; consider CHECK constraints instead of triggers for simple rules |
84
+ | Circular foreign key prevents constraint creation | Tables reference each other, preventing creation order | Use `ALTER TABLE ADD CONSTRAINT` after both tables exist; or use `DEFERRABLE INITIALLY DEFERRED` constraints |
85
+
86
+ ## Examples
87
+
88
+ **Auditing a legacy database with 50,000 invalid email addresses**: Validation query reveals 50,000 of 2M user records have invalid email formats (missing @, double dots, spaces). A cleanup script normalizes common issues (trim whitespace, lowercase) and flags 3,000 unfixable records for manual review. After cleanup, a CHECK constraint with regex validation is applied.
89
+
90
+ **Enforcing referential integrity on a database without foreign keys**: An application relied on application-level FK enforcement, resulting in 12,000 orphaned order_items, 800 orphaned payments, and 200 orphaned reviews. Cleanup scripts archive orphaned records to backup tables, then foreign key constraints with `ON DELETE CASCADE` are added. A nightly validation job monitors for new orphans.
91
+
92
+ **Implementing business rules for a financial application**: Constraints enforce: account balance cannot be negative (`CHECK (balance >= 0)`), transfer amount must be positive (`CHECK (amount > 0)`), transaction date cannot be in the future (`CHECK (transaction_date <= CURRENT_DATE)`), and a trigger prevents transfers between accounts owned by different customers unless explicitly authorized.
93
+
94
+ ## Resources
95
+
96
+ - PostgreSQL CHECK constraints: https://www.postgresql.org/docs/current/ddl-constraints.html
97
+ - PostgreSQL triggers: https://www.postgresql.org/docs/current/triggers.html
98
+ - MySQL CHECK constraints (8.0.16+): https://dev.mysql.com/doc/refman/8.0/en/create-table-check-constraints.html
99
+ - Data validation patterns: https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONSTRAINTS-CHECK-CONSTRAINTS
100
+ - NOT VALID constraint option: https://www.postgresql.org/docs/current/sql-altertable.html
@@ -0,0 +1,5 @@
1
+ # Assets
2
+
3
+ Bundled resources for data-validation-engine skill
4
+
5
+ - [ ] validation_report_template.html: HTML template for generating visually appealing and informative data validation reports.
@@ -0,0 +1,4 @@
1
+ # References
2
+
3
+ Bundled resources for data-validation-engine skill
4
+
@@ -0,0 +1,7 @@
1
+ # Scripts
2
+
3
+ Bundled resources for data-validation-engine skill
4
+
5
+ - [x] validate_data.py: Script to execute data validation checks against a specified database and table, logging any discrepancies.
6
+ - [x] generate_validation_report.py: Script to generate a comprehensive report of data validation results, including statistics and identified issues.
7
+ - [x] configure_validation_rules.py: Script to interactively configure data validation rules for a given database and table, storing the rules in a configuration file.
@@ -0,0 +1,481 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Interactively configure data validation rules for a database table.
4
+
5
+ This script allows users to define and customize validation rules for database tables,
6
+ which are then saved in a configuration file for use by other validation scripts.
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import sys
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Dict, List, Any, Optional
15
+
16
+
17
+ class ValidationRuleConfigurator:
18
+ """Interactively configure validation rules."""
19
+
20
+ def __init__(self):
21
+ """Initialize configurator."""
22
+ self.rules = []
23
+ self.table_name = ""
24
+ self.database = ""
25
+
26
+ def add_not_null_rule(self, column: str):
27
+ """
28
+ Add a NOT NULL validation rule.
29
+
30
+ Args:
31
+ column: Column name
32
+ """
33
+ self.rules.append({
34
+ "rule": "not_null",
35
+ "column": column,
36
+ "description": f"Column {column} must not contain NULL values"
37
+ })
38
+
39
+ def add_unique_rule(self, column: str):
40
+ """
41
+ Add a UNIQUE validation rule.
42
+
43
+ Args:
44
+ column: Column name
45
+ """
46
+ self.rules.append({
47
+ "rule": "unique",
48
+ "column": column,
49
+ "description": f"Column {column} must contain unique values"
50
+ })
51
+
52
+ def add_range_rule(
53
+ self,
54
+ column: str,
55
+ min_value: float,
56
+ max_value: float
57
+ ):
58
+ """
59
+ Add a RANGE validation rule.
60
+
61
+ Args:
62
+ column: Column name
63
+ min_value: Minimum allowed value
64
+ max_value: Maximum allowed value
65
+ """
66
+ self.rules.append({
67
+ "rule": "range",
68
+ "column": column,
69
+ "min": min_value,
70
+ "max": max_value,
71
+ "description": f"Column {column} values must be between {min_value} and {max_value}"
72
+ })
73
+
74
+ def add_pattern_rule(self, column: str, pattern: str):
75
+ """
76
+ Add a PATTERN (regex) validation rule.
77
+
78
+ Args:
79
+ column: Column name
80
+ pattern: Regular expression pattern
81
+ """
82
+ self.rules.append({
83
+ "rule": "pattern",
84
+ "column": column,
85
+ "pattern": pattern,
86
+ "description": f"Column {column} values must match pattern: {pattern}"
87
+ })
88
+
89
+ def add_custom_rule(self, column: str, query: str):
90
+ """
91
+ Add a custom SQL validation rule.
92
+
93
+ Args:
94
+ column: Column name
95
+ query: Custom SQL query
96
+ """
97
+ self.rules.append({
98
+ "rule": "custom",
99
+ "column": column,
100
+ "query": query,
101
+ "description": f"Custom validation on {column}"
102
+ })
103
+
104
+ def remove_rule(self, index: int) -> bool:
105
+ """
106
+ Remove a rule by index.
107
+
108
+ Args:
109
+ index: Rule index
110
+
111
+ Returns:
112
+ True if successful, False otherwise
113
+ """
114
+ if 0 <= index < len(self.rules):
115
+ del self.rules[index]
116
+ return True
117
+ return False
118
+
119
+ def get_config_dict(self) -> Dict[str, Any]:
120
+ """
121
+ Get configuration as dictionary.
122
+
123
+ Returns:
124
+ Configuration dictionary
125
+ """
126
+ return {
127
+ "table": self.table_name,
128
+ "database": self.database,
129
+ "created_at": datetime.now().isoformat(),
130
+ "validations": self.rules
131
+ }
132
+
133
+ def load_config(self, filepath: str) -> bool:
134
+ """
135
+ Load configuration from JSON file.
136
+
137
+ Args:
138
+ filepath: Path to JSON file
139
+
140
+ Returns:
141
+ True if successful, False otherwise
142
+ """
143
+ try:
144
+ with open(filepath, 'r') as f:
145
+ config = json.load(f)
146
+
147
+ self.table_name = config.get("table", "")
148
+ self.database = config.get("database", "")
149
+ self.rules = config.get("validations", [])
150
+
151
+ return True
152
+ except (FileNotFoundError, json.JSONDecodeError) as e:
153
+ print(f"Error loading config: {e}", file=sys.stderr)
154
+ return False
155
+
156
+ def save_config(self, filepath: str) -> bool:
157
+ """
158
+ Save configuration to JSON file.
159
+
160
+ Args:
161
+ filepath: Path to save JSON file
162
+
163
+ Returns:
164
+ True if successful, False otherwise
165
+ """
166
+ try:
167
+ with open(filepath, 'w') as f:
168
+ json.dump(self.get_config_dict(), f, indent=2)
169
+ return True
170
+ except Exception as e:
171
+ print(f"Error saving config: {e}", file=sys.stderr)
172
+ return False
173
+
174
+
175
+ def interactive_mode(configurator: ValidationRuleConfigurator):
176
+ """
177
+ Run interactive configuration mode.
178
+
179
+ Args:
180
+ configurator: ValidationRuleConfigurator instance
181
+ """
182
+ print("\n" + "="*60)
183
+ print("Data Validation Rule Configurator")
184
+ print("="*60 + "\n")
185
+
186
+ # Get table and database info
187
+ configurator.table_name = input("Enter table name: ").strip()
188
+ if not configurator.table_name:
189
+ print("Error: Table name is required")
190
+ sys.exit(1)
191
+
192
+ configurator.database = input("Enter database name (optional): ").strip()
193
+
194
+ print("\nConfigure validation rules for this table.")
195
+ print("Enter 'help' for rule descriptions, 'done' when finished.\n")
196
+
197
+ while True:
198
+ print("\nAvailable rules:")
199
+ print(" 1. not-null - Column cannot contain NULL values")
200
+ print(" 2. unique - Column values must be unique")
201
+ print(" 3. range - Column values must be within min/max")
202
+ print(" 4. pattern - Column values must match regex pattern")
203
+ print(" 5. custom - Custom SQL validation query")
204
+ print(" 6. list - Show current rules")
205
+ print(" 7. remove - Remove a rule")
206
+ print(" 8. done - Finish configuration")
207
+
208
+ choice = input("\nEnter rule type (1-8): ").strip().lower()
209
+
210
+ if choice in ["done", "8"]:
211
+ break
212
+ elif choice == "help":
213
+ print_help()
214
+ elif choice in ["1", "not-null"]:
215
+ column = input("Enter column name: ").strip()
216
+ if column:
217
+ configurator.add_not_null_rule(column)
218
+ print(f"✓ Added NOT NULL rule for {column}")
219
+ elif choice in ["2", "unique"]:
220
+ column = input("Enter column name: ").strip()
221
+ if column:
222
+ configurator.add_unique_rule(column)
223
+ print(f"✓ Added UNIQUE rule for {column}")
224
+ elif choice in ["3", "range"]:
225
+ column = input("Enter column name: ").strip()
226
+ try:
227
+ min_val = float(input("Enter minimum value: "))
228
+ max_val = float(input("Enter maximum value: "))
229
+ configurator.add_range_rule(column, min_val, max_val)
230
+ print(f"✓ Added RANGE rule for {column} [{min_val}, {max_val}]")
231
+ except ValueError:
232
+ print("Error: Invalid numeric values")
233
+ elif choice in ["4", "pattern"]:
234
+ column = input("Enter column name: ").strip()
235
+ pattern = input("Enter regex pattern: ").strip()
236
+ if column and pattern:
237
+ configurator.add_pattern_rule(column, pattern)
238
+ print(f"✓ Added PATTERN rule for {column}")
239
+ elif choice in ["5", "custom"]:
240
+ column = input("Enter column name: ").strip()
241
+ query = input("Enter SQL query: ").strip()
242
+ if column and query:
243
+ configurator.add_custom_rule(column, query)
244
+ print(f"✓ Added CUSTOM rule for {column}")
245
+ elif choice in ["6", "list"]:
246
+ list_rules(configurator)
247
+ elif choice in ["7", "remove"]:
248
+ list_rules(configurator)
249
+ try:
250
+ idx = int(input("Enter rule number to remove: ")) - 1
251
+ if configurator.remove_rule(idx):
252
+ print(f"✓ Removed rule {idx + 1}")
253
+ else:
254
+ print("Error: Invalid rule number")
255
+ except ValueError:
256
+ print("Error: Invalid input")
257
+ else:
258
+ print("Invalid choice. Please try again.")
259
+
260
+ # Summary and save
261
+ print("\n" + "="*60)
262
+ print("Configuration Summary")
263
+ print("="*60)
264
+ print(f"Table: {configurator.table_name}")
265
+ print(f"Database: {configurator.database or '(none specified)'}")
266
+ print(f"Total Rules: {len(configurator.rules)}\n")
267
+
268
+ list_rules(configurator)
269
+
270
+ # Save configuration
271
+ save_choice = input("\nSave configuration? (y/n): ").strip().lower()
272
+ if save_choice in ["y", "yes"]:
273
+ filepath = input("Enter filename to save (default: validation_rules.json): ").strip()
274
+ if not filepath:
275
+ filepath = "validation_rules.json"
276
+
277
+ if configurator.save_config(filepath):
278
+ print(f"✓ Configuration saved to {filepath}")
279
+ else:
280
+ print("Error: Failed to save configuration")
281
+ sys.exit(1)
282
+
283
+
284
+ def list_rules(configurator: ValidationRuleConfigurator):
285
+ """
286
+ Display configured rules.
287
+
288
+ Args:
289
+ configurator: ValidationRuleConfigurator instance
290
+ """
291
+ if not configurator.rules:
292
+ print("No rules configured yet.")
293
+ return
294
+
295
+ print("\nConfigured Rules:")
296
+ print("-" * 60)
297
+ for i, rule in enumerate(configurator.rules, 1):
298
+ rule_type = rule.get("rule", "unknown").upper()
299
+ column = rule.get("column", "N/A")
300
+ description = rule.get("description", "")
301
+
302
+ print(f"{i}. [{rule_type}] {column}")
303
+ print(f" {description}")
304
+
305
+ # Show additional details based on rule type
306
+ if rule.get("rule") == "range":
307
+ print(f" Range: [{rule.get('min')}, {rule.get('max')}]")
308
+ elif rule.get("rule") == "pattern":
309
+ print(f" Pattern: {rule.get('pattern')}")
310
+ elif rule.get("rule") == "custom":
311
+ print(f" Query: {rule.get('query')}")
312
+
313
+ print("-" * 60)
314
+
315
+
316
+ def print_help():
317
+ """Print help information."""
318
+ help_text = r"""
319
+ Rule Types:
320
+
321
+ NOT NULL
322
+ Description: Ensures column contains no NULL values
323
+ Example: Validate that 'user_id' is never NULL
324
+
325
+ UNIQUE
326
+ Description: Ensures all values in column are unique
327
+ Example: Email addresses must be unique for user table
328
+
329
+ RANGE
330
+ Description: Ensures numeric values fall within min/max bounds
331
+ Example: Age must be between 0 and 150
332
+ Price must be between 0 and 999999
333
+
334
+ PATTERN
335
+ Description: Ensures values match a regular expression
336
+ Example: Email format: ^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$
337
+ Phone: ^\+?1?\d{9,15}$
338
+
339
+ CUSTOM
340
+ Description: Run custom SQL query for complex validations
341
+ Example: SELECT COUNT(*) FROM users WHERE created_at > updated_at
342
+ (Ensures created_at is always before updated_at)
343
+ """
344
+ print(help_text)
345
+
346
+
347
+ def create_from_args(args: argparse.Namespace) -> ValidationRuleConfigurator:
348
+ """
349
+ Create configurator from command-line arguments.
350
+
351
+ Args:
352
+ args: Parsed arguments
353
+
354
+ Returns:
355
+ Configured ValidationRuleConfigurator
356
+ """
357
+ configurator = ValidationRuleConfigurator()
358
+ configurator.table_name = args.table
359
+ configurator.database = args.database
360
+
361
+ # Add rules from arguments
362
+ if args.not_null:
363
+ for column in args.not_null.split(","):
364
+ configurator.add_not_null_rule(column.strip())
365
+
366
+ if args.unique:
367
+ for column in args.unique.split(","):
368
+ configurator.add_unique_rule(column.strip())
369
+
370
+ if args.range:
371
+ # Format: "column:min:max" or "column:min:max,column2:min2:max2"
372
+ for range_spec in args.range.split(","):
373
+ parts = range_spec.split(":")
374
+ if len(parts) == 3:
375
+ try:
376
+ column = parts[0].strip()
377
+ min_val = float(parts[1].strip())
378
+ max_val = float(parts[2].strip())
379
+ configurator.add_range_rule(column, min_val, max_val)
380
+ except ValueError:
381
+ print(f"Warning: Invalid range specification: {range_spec}")
382
+
383
+ return configurator
384
+
385
+
386
+ def main():
387
+ """Main entry point for rule configuration."""
388
+ parser = argparse.ArgumentParser(
389
+ description="Configure data validation rules for database tables",
390
+ formatter_class=argparse.RawDescriptionHelpFormatter,
391
+ epilog="""
392
+ Examples:
393
+ # Interactive mode (recommended)
394
+ %(prog)s
395
+
396
+ # Command-line mode with single table
397
+ %(prog)s --table users --database mydb \\
398
+ --not-null id,email \\
399
+ --unique email \\
400
+ --output rules.json
401
+
402
+ # With range validation
403
+ %(prog)s --table products --database catalog \\
404
+ --range "price:0:10000,quantity:0:1000000" \\
405
+ --output rules.json
406
+
407
+ # Load and modify existing rules
408
+ %(prog)s --load rules.json --not-null phone --output rules.json
409
+ """
410
+ )
411
+
412
+ parser.add_argument(
413
+ "--table",
414
+ help="Table name for non-interactive mode"
415
+ )
416
+ parser.add_argument(
417
+ "--database",
418
+ help="Database name"
419
+ )
420
+ parser.add_argument(
421
+ "--not-null",
422
+ help="Comma-separated columns that must not be NULL"
423
+ )
424
+ parser.add_argument(
425
+ "--unique",
426
+ help="Comma-separated columns that must be unique"
427
+ )
428
+ parser.add_argument(
429
+ "--range",
430
+ help="Range validations in format: col:min:max,col2:min2:max2"
431
+ )
432
+ parser.add_argument(
433
+ "--load",
434
+ help="Load existing configuration file"
435
+ )
436
+ parser.add_argument(
437
+ "--output",
438
+ help="Output file for configuration (JSON)"
439
+ )
440
+
441
+ args = parser.parse_args()
442
+
443
+ try:
444
+ # Determine mode
445
+ if args.load:
446
+ # Load existing config
447
+ configurator = ValidationRuleConfigurator()
448
+ if not configurator.load_config(args.load):
449
+ sys.exit(1)
450
+ print(f"Loaded configuration from {args.load}")
451
+ list_rules(configurator)
452
+ elif args.table:
453
+ # Command-line mode
454
+ configurator = create_from_args(args)
455
+ else:
456
+ # Interactive mode
457
+ configurator = ValidationRuleConfigurator()
458
+ interactive_mode(configurator)
459
+ sys.exit(0)
460
+
461
+ # If table specified, save configuration
462
+ if args.table or args.load:
463
+ if args.table and not configurator.table_name:
464
+ configurator.table_name = args.table
465
+
466
+ output_file = args.output or "validation_rules.json"
467
+
468
+ if configurator.save_config(output_file):
469
+ print(f"\n✓ Configuration saved to {output_file}")
470
+ print(f"Total rules: {len(configurator.rules)}")
471
+ sys.exit(0)
472
+ else:
473
+ sys.exit(1)
474
+
475
+ except Exception as e:
476
+ print(f"Error: {e}", file=sys.stderr)
477
+ sys.exit(1)
478
+
479
+
480
+ if __name__ == "__main__":
481
+ main()
@@ -0,0 +1,385 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Generate comprehensive report of data validation results.
4
+
5
+ This script creates detailed HTML, JSON, or Markdown reports from validation results,
6
+ including statistics, identified issues, trend analysis, and recommendations.
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import sys
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Dict, List, Any, Optional
15
+
16
+
17
+ class ValidationReportGenerator:
18
+ """Generates comprehensive validation reports."""
19
+
20
+ def __init__(self):
21
+ """Initialize report generator."""
22
+ self.results = []
23
+ self.metadata = {}
24
+
25
+ def load_results(self, filepath: str) -> bool:
26
+ """
27
+ Load validation results from JSON file.
28
+
29
+ Args:
30
+ filepath: Path to JSON results file
31
+
32
+ Returns:
33
+ True if successful, False otherwise
34
+ """
35
+ try:
36
+ with open(filepath, 'r') as f:
37
+ data = json.load(f)
38
+
39
+ self.results = data.get("validations", [])
40
+ self.metadata = {
41
+ "table": data.get("table", "unknown"),
42
+ "timestamp": data.get("timestamp", datetime.now().isoformat()),
43
+ "statistics": data.get("statistics", {})
44
+ }
45
+
46
+ return True
47
+ except (FileNotFoundError, json.JSONDecodeError) as e:
48
+ print(f"Error loading results: {e}", file=sys.stderr)
49
+ return False
50
+
51
+ def generate_summary(self) -> Dict[str, Any]:
52
+ """
53
+ Generate summary statistics from results.
54
+
55
+ Returns:
56
+ Dictionary with summary stats
57
+ """
58
+ if not self.results:
59
+ return {
60
+ "total": 0,
61
+ "passed": 0,
62
+ "failed": 0,
63
+ "pass_rate": 0.0,
64
+ "issues": []
65
+ }
66
+
67
+ total = len(self.results)
68
+ passed = sum(1 for r in self.results if r.get("valid", False))
69
+ failed = total - passed
70
+
71
+ issues = []
72
+ for result in self.results:
73
+ if not result.get("valid", False):
74
+ details = result.get("details", {})
75
+ issues.append({
76
+ "rule": details.get("rule", "unknown"),
77
+ "column": details.get("column", "unknown"),
78
+ "severity": self._determine_severity(details.get("rule"))
79
+ })
80
+
81
+ return {
82
+ "total": total,
83
+ "passed": passed,
84
+ "failed": failed,
85
+ "pass_rate": (passed / total * 100) if total > 0 else 0,
86
+ "issues": issues,
87
+ "critical_issues": sum(1 for i in issues if i["severity"] == "critical"),
88
+ "high_issues": sum(1 for i in issues if i["severity"] == "high"),
89
+ "medium_issues": sum(1 for i in issues if i["severity"] == "medium")
90
+ }
91
+
92
+ def _determine_severity(self, rule: str) -> str:
93
+ """
94
+ Determine issue severity based on rule type.
95
+
96
+ Args:
97
+ rule: Validation rule name
98
+
99
+ Returns:
100
+ Severity level (critical, high, medium, low)
101
+ """
102
+ critical_rules = ["not_null", "unique", "foreign_key"]
103
+ high_rules = ["range", "pattern"]
104
+ medium_rules = ["custom"]
105
+
106
+ if rule in critical_rules:
107
+ return "critical"
108
+ elif rule in high_rules:
109
+ return "high"
110
+ elif rule in medium_rules:
111
+ return "medium"
112
+ else:
113
+ return "low"
114
+
115
+ def generate_json_report(self) -> str:
116
+ """Generate JSON report."""
117
+ summary = self.generate_summary()
118
+
119
+ report = {
120
+ "metadata": self.metadata,
121
+ "summary": summary,
122
+ "timestamp": datetime.now().isoformat(),
123
+ "validations": self.results
124
+ }
125
+
126
+ return json.dumps(report, indent=2)
127
+
128
+ def generate_markdown_report(self) -> str:
129
+ """Generate Markdown report."""
130
+ summary = self.generate_summary()
131
+
132
+ md = []
133
+ md.append(f"# Data Validation Report")
134
+ md.append(f"\n**Table:** {self.metadata.get('table', 'Unknown')}")
135
+ md.append(f"**Generated:** {datetime.now().isoformat()}")
136
+ md.append("")
137
+
138
+ # Summary section
139
+ md.append("## Executive Summary\n")
140
+ md.append(f"| Metric | Value |")
141
+ md.append(f"|--------|-------|")
142
+ md.append(f"| Total Checks | {summary['total']} |")
143
+ md.append(f"| Passed | {summary['passed']} |")
144
+ md.append(f"| Failed | {summary['failed']} |")
145
+ md.append(f"| Pass Rate | {summary['pass_rate']:.1f}% |")
146
+ md.append("")
147
+
148
+ # Table statistics
149
+ stats = self.metadata.get("statistics", {})
150
+ if stats and "error" not in stats:
151
+ md.append("## Table Statistics\n")
152
+ md.append(f"- **Total Rows:** {stats.get('row_count', 'N/A')}")
153
+ md.append(f"- **Total Columns:** {stats.get('column_count', 'N/A')}")
154
+ md.append("")
155
+
156
+ # Issues section
157
+ if summary['failed'] > 0:
158
+ md.append("## Issues Identified\n")
159
+
160
+ if summary['critical_issues'] > 0:
161
+ md.append("### Critical Issues\n")
162
+ for issue in summary['issues']:
163
+ if issue['severity'] == 'critical':
164
+ md.append(f"- **{issue['rule']}** on column `{issue['column']}`")
165
+ md.append("")
166
+
167
+ if summary['high_issues'] > 0:
168
+ md.append("### High Priority Issues\n")
169
+ for issue in summary['issues']:
170
+ if issue['severity'] == 'high':
171
+ md.append(f"- **{issue['rule']}** on column `{issue['column']}`")
172
+ md.append("")
173
+
174
+ if summary['medium_issues'] > 0:
175
+ md.append("### Medium Priority Issues\n")
176
+ for issue in summary['issues']:
177
+ if issue['severity'] == 'medium':
178
+ md.append(f"- **{issue['rule']}** on column `{issue['column']}`")
179
+ md.append("")
180
+
181
+ # Detailed results
182
+ if summary['failed'] > 0:
183
+ md.append("## Detailed Validation Results\n")
184
+ for result in self.results:
185
+ if not result.get("valid", False):
186
+ details = result.get("details", {})
187
+ rule = details.get("rule", "unknown")
188
+ column = details.get("column", "N/A")
189
+
190
+ md.append(f"### {rule.replace('_', ' ').title()} - {column}\n")
191
+
192
+ if "error" in details:
193
+ md.append(f"**Error:** {details['error']}\n")
194
+ elif rule == "not_null":
195
+ md.append(f"**Issue:** Found {details.get('null_count', 0)} NULL values\n")
196
+ elif rule == "unique":
197
+ md.append(f"**Issue:** Found {details.get('duplicate_count', 0)} "
198
+ f"duplicate groups\n")
199
+ elif rule == "range":
200
+ md.append(f"**Issue:** {details.get('out_of_range_count', 0)} values "
201
+ f"outside range [{details.get('min')}, {details.get('max')}]\n")
202
+
203
+ # Recommendations
204
+ md.append("## Recommendations\n")
205
+ if summary['critical_issues'] > 0:
206
+ md.append("1. **Immediately address critical issues** - These indicate data integrity problems")
207
+ if summary['high_issues'] > 0:
208
+ md.append("2. **Resolve high priority issues within 1 week** - These affect data quality")
209
+ if summary['failed'] == 0:
210
+ md.append("✅ **All validations passed!** - Data integrity is maintained")
211
+ else:
212
+ md.append("3. **Review validation rules** - Ensure they match business requirements")
213
+ md.append("4. **Implement data quality monitoring** - Set up alerts for recurring issues")
214
+
215
+ md.append("")
216
+ md.append("---")
217
+ md.append("*Report generated by Data Validation Engine*")
218
+
219
+ return "\n".join(md)
220
+
221
+ def generate_html_report(self) -> str:
222
+ """Generate HTML report."""
223
+ summary = self.generate_summary()
224
+ stats = self.metadata.get("statistics", {})
225
+
226
+ html = [
227
+ "<!DOCTYPE html>",
228
+ "<html>",
229
+ "<head>",
230
+ "<meta charset='UTF-8'>",
231
+ "<meta name='viewport' content='width=device-width, initial-scale=1.0'>",
232
+ "<title>Data Validation Report</title>",
233
+ "<style>",
234
+ "body { font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }",
235
+ ".container { max-width: 1000px; margin: 0 auto; background-color: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }",
236
+ "h1 { color: #333; border-bottom: 3px solid #007bff; padding-bottom: 10px; }",
237
+ "h2 { color: #555; margin-top: 30px; }",
238
+ "table { width: 100%; border-collapse: collapse; margin: 15px 0; }",
239
+ "th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }",
240
+ "th { background-color: #007bff; color: white; }",
241
+ "tr:hover { background-color: #f9f9f9; }",
242
+ ".summary { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px; margin: 20px 0; }",
243
+ ".metric { background-color: #f0f0f0; padding: 15px; border-radius: 5px; }",
244
+ ".metric .value { font-size: 24px; font-weight: bold; color: #007bff; }",
245
+ ".metric .label { color: #666; margin-top: 5px; }",
246
+ ".critical { color: #dc3545; font-weight: bold; }",
247
+ ".high { color: #ff6b6b; font-weight: bold; }",
248
+ ".medium { color: #ffc107; font-weight: bold; }",
249
+ ".success { color: #28a745; font-weight: bold; }",
250
+ ".issue-list { margin: 15px 0; }",
251
+ ".issue { background-color: #fff3cd; padding: 10px; margin: 5px 0; border-left: 4px solid #ffc107; border-radius: 3px; }",
252
+ ".footer { text-align: center; color: #999; margin-top: 30px; padding-top: 15px; border-top: 1px solid #ddd; }",
253
+ "</style>",
254
+ "</head>",
255
+ "<body>",
256
+ "<div class='container'>",
257
+ f"<h1>Data Validation Report</h1>",
258
+ f"<p><strong>Table:</strong> {self.metadata.get('table', 'Unknown')}</p>",
259
+ f"<p><strong>Generated:</strong> {datetime.now().isoformat()}</p>"
260
+ ]
261
+
262
+ # Summary metrics
263
+ html.append("<h2>Summary</h2>")
264
+ html.append("<div class='summary'>")
265
+ html.append(f"<div class='metric'><div class='value'>{summary['total']}</div><div class='label'>Total Checks</div></div>")
266
+ html.append(f"<div class='metric'><div class='value success'>{summary['passed']}</div><div class='label'>Passed</div></div>")
267
+ html.append(f"<div class='metric'><div class='value critical'>{summary['failed']}</div><div class='label'>Failed</div></div>")
268
+ html.append(f"<div class='metric'><div class='value'>{summary['pass_rate']:.1f}%</div><div class='label'>Pass Rate</div></div>")
269
+ html.append("</div>")
270
+
271
+ # Table statistics
272
+ if stats and "error" not in stats:
273
+ html.append("<h2>Table Statistics</h2>")
274
+ html.append("<table>")
275
+ html.append("<tr><th>Metric</th><th>Value</th></tr>")
276
+ html.append(f"<tr><td>Total Rows</td><td>{stats.get('row_count', 'N/A')}</td></tr>")
277
+ html.append(f"<tr><td>Total Columns</td><td>{stats.get('column_count', 'N/A')}</td></tr>")
278
+ html.append("</table>")
279
+
280
+ # Issues
281
+ if summary['failed'] > 0:
282
+ html.append("<h2>Issues</h2>")
283
+ html.append("<div class='issue-list'>")
284
+
285
+ for issue in summary['issues']:
286
+ severity_class = issue['severity']
287
+ html.append(
288
+ f"<div class='issue'>"
289
+ f"<span class='{severity_class}'>{issue['severity'].upper()}</span>: "
290
+ f"<strong>{issue['rule']}</strong> on column <code>{issue['column']}</code>"
291
+ f"</div>"
292
+ )
293
+
294
+ html.append("</div>")
295
+ else:
296
+ html.append("<h2>Results</h2>")
297
+ html.append("<p class='success'>✅ All validations passed!</p>")
298
+
299
+ # Footer
300
+ html.append("<div class='footer'>")
301
+ html.append("<p>Report generated by Data Validation Engine</p>")
302
+ html.append("</div>")
303
+
304
+ html.append("</div>")
305
+ html.append("</body>")
306
+ html.append("</html>")
307
+
308
+ return "\n".join(html)
309
+
310
+
311
+ def main():
312
+ """Main entry point for report generation."""
313
+ parser = argparse.ArgumentParser(
314
+ description="Generate comprehensive data validation reports",
315
+ formatter_class=argparse.RawDescriptionHelpFormatter,
316
+ epilog="""
317
+ Examples:
318
+ %(prog)s --results validation.json
319
+ %(prog)s --results validation.json --format markdown
320
+ %(prog)s --results validation.json --format html --output report.html
321
+ %(prog)s --results validation.json --format json --output report.json
322
+ """
323
+ )
324
+
325
+ parser.add_argument(
326
+ "--results",
327
+ required=True,
328
+ help="Path to JSON file containing validation results"
329
+ )
330
+ parser.add_argument(
331
+ "--format",
332
+ default="markdown",
333
+ choices=["json", "markdown", "html"],
334
+ help="Report format"
335
+ )
336
+ parser.add_argument(
337
+ "--output",
338
+ help="Output file for report"
339
+ )
340
+ parser.add_argument(
341
+ "--verbose",
342
+ action="store_true",
343
+ help="Print detailed output"
344
+ )
345
+
346
+ args = parser.parse_args()
347
+
348
+ try:
349
+ generator = ValidationReportGenerator()
350
+
351
+ # Load results
352
+ if args.verbose:
353
+ print(f"Loading results from {args.results}...", file=sys.stderr)
354
+
355
+ if not generator.load_results(args.results):
356
+ sys.exit(1)
357
+
358
+ # Generate report
359
+ if args.format == "json":
360
+ report = generator.generate_json_report()
361
+ elif args.format == "html":
362
+ report = generator.generate_html_report()
363
+ else: # markdown
364
+ report = generator.generate_markdown_report()
365
+
366
+ # Print report
367
+ print(report)
368
+
369
+ # Save to file if requested
370
+ if args.output:
371
+ with open(args.output, 'w') as f:
372
+ f.write(report)
373
+
374
+ if args.verbose:
375
+ print(f"\nReport saved to {args.output}", file=sys.stderr)
376
+
377
+ sys.exit(0)
378
+
379
+ except Exception as e:
380
+ print(f"Error: {e}", file=sys.stderr)
381
+ sys.exit(1)
382
+
383
+
384
+ if __name__ == "__main__":
385
+ main()