attune-ai 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- attune/__init__.py +358 -0
- attune/adaptive/__init__.py +13 -0
- attune/adaptive/task_complexity.py +127 -0
- attune/agent_monitoring.py +414 -0
- attune/cache/__init__.py +117 -0
- attune/cache/base.py +166 -0
- attune/cache/dependency_manager.py +256 -0
- attune/cache/hash_only.py +251 -0
- attune/cache/hybrid.py +457 -0
- attune/cache/storage.py +285 -0
- attune/cache_monitor.py +356 -0
- attune/cache_stats.py +298 -0
- attune/cli/__init__.py +152 -0
- attune/cli/__main__.py +12 -0
- attune/cli/commands/__init__.py +1 -0
- attune/cli/commands/batch.py +264 -0
- attune/cli/commands/cache.py +248 -0
- attune/cli/commands/help.py +331 -0
- attune/cli/commands/info.py +140 -0
- attune/cli/commands/inspect.py +436 -0
- attune/cli/commands/inspection.py +57 -0
- attune/cli/commands/memory.py +48 -0
- attune/cli/commands/metrics.py +92 -0
- attune/cli/commands/orchestrate.py +184 -0
- attune/cli/commands/patterns.py +207 -0
- attune/cli/commands/profiling.py +202 -0
- attune/cli/commands/provider.py +98 -0
- attune/cli/commands/routing.py +285 -0
- attune/cli/commands/setup.py +96 -0
- attune/cli/commands/status.py +235 -0
- attune/cli/commands/sync.py +166 -0
- attune/cli/commands/tier.py +121 -0
- attune/cli/commands/utilities.py +114 -0
- attune/cli/commands/workflow.py +579 -0
- attune/cli/core.py +32 -0
- attune/cli/parsers/__init__.py +68 -0
- attune/cli/parsers/batch.py +118 -0
- attune/cli/parsers/cache.py +65 -0
- attune/cli/parsers/help.py +41 -0
- attune/cli/parsers/info.py +26 -0
- attune/cli/parsers/inspect.py +66 -0
- attune/cli/parsers/metrics.py +42 -0
- attune/cli/parsers/orchestrate.py +61 -0
- attune/cli/parsers/patterns.py +54 -0
- attune/cli/parsers/provider.py +40 -0
- attune/cli/parsers/routing.py +110 -0
- attune/cli/parsers/setup.py +42 -0
- attune/cli/parsers/status.py +47 -0
- attune/cli/parsers/sync.py +31 -0
- attune/cli/parsers/tier.py +33 -0
- attune/cli/parsers/workflow.py +77 -0
- attune/cli/utils/__init__.py +1 -0
- attune/cli/utils/data.py +242 -0
- attune/cli/utils/helpers.py +68 -0
- attune/cli_legacy.py +3957 -0
- attune/cli_minimal.py +1159 -0
- attune/cli_router.py +437 -0
- attune/cli_unified.py +814 -0
- attune/config/__init__.py +66 -0
- attune/config/xml_config.py +286 -0
- attune/config.py +545 -0
- attune/coordination.py +870 -0
- attune/core.py +1511 -0
- attune/core_modules/__init__.py +15 -0
- attune/cost_tracker.py +626 -0
- attune/dashboard/__init__.py +41 -0
- attune/dashboard/app.py +512 -0
- attune/dashboard/simple_server.py +435 -0
- attune/dashboard/standalone_server.py +547 -0
- attune/discovery.py +306 -0
- attune/emergence.py +306 -0
- attune/exceptions.py +123 -0
- attune/feedback_loops.py +373 -0
- attune/hot_reload/README.md +473 -0
- attune/hot_reload/__init__.py +62 -0
- attune/hot_reload/config.py +83 -0
- attune/hot_reload/integration.py +229 -0
- attune/hot_reload/reloader.py +298 -0
- attune/hot_reload/watcher.py +183 -0
- attune/hot_reload/websocket.py +177 -0
- attune/levels.py +577 -0
- attune/leverage_points.py +441 -0
- attune/logging_config.py +261 -0
- attune/mcp/__init__.py +10 -0
- attune/mcp/server.py +506 -0
- attune/memory/__init__.py +237 -0
- attune/memory/claude_memory.py +469 -0
- attune/memory/config.py +224 -0
- attune/memory/control_panel.py +1290 -0
- attune/memory/control_panel_support.py +145 -0
- attune/memory/cross_session.py +845 -0
- attune/memory/edges.py +179 -0
- attune/memory/encryption.py +159 -0
- attune/memory/file_session.py +770 -0
- attune/memory/graph.py +570 -0
- attune/memory/long_term.py +913 -0
- attune/memory/long_term_types.py +99 -0
- attune/memory/mixins/__init__.py +25 -0
- attune/memory/mixins/backend_init_mixin.py +249 -0
- attune/memory/mixins/capabilities_mixin.py +208 -0
- attune/memory/mixins/handoff_mixin.py +208 -0
- attune/memory/mixins/lifecycle_mixin.py +49 -0
- attune/memory/mixins/long_term_mixin.py +352 -0
- attune/memory/mixins/promotion_mixin.py +109 -0
- attune/memory/mixins/short_term_mixin.py +182 -0
- attune/memory/nodes.py +179 -0
- attune/memory/redis_bootstrap.py +540 -0
- attune/memory/security/__init__.py +31 -0
- attune/memory/security/audit_logger.py +932 -0
- attune/memory/security/pii_scrubber.py +640 -0
- attune/memory/security/secrets_detector.py +678 -0
- attune/memory/short_term.py +2192 -0
- attune/memory/simple_storage.py +302 -0
- attune/memory/storage/__init__.py +15 -0
- attune/memory/storage_backend.py +167 -0
- attune/memory/summary_index.py +583 -0
- attune/memory/types.py +446 -0
- attune/memory/unified.py +182 -0
- attune/meta_workflows/__init__.py +74 -0
- attune/meta_workflows/agent_creator.py +248 -0
- attune/meta_workflows/builtin_templates.py +567 -0
- attune/meta_workflows/cli_commands/__init__.py +56 -0
- attune/meta_workflows/cli_commands/agent_commands.py +321 -0
- attune/meta_workflows/cli_commands/analytics_commands.py +442 -0
- attune/meta_workflows/cli_commands/config_commands.py +232 -0
- attune/meta_workflows/cli_commands/memory_commands.py +182 -0
- attune/meta_workflows/cli_commands/template_commands.py +354 -0
- attune/meta_workflows/cli_commands/workflow_commands.py +382 -0
- attune/meta_workflows/cli_meta_workflows.py +59 -0
- attune/meta_workflows/form_engine.py +292 -0
- attune/meta_workflows/intent_detector.py +409 -0
- attune/meta_workflows/models.py +569 -0
- attune/meta_workflows/pattern_learner.py +738 -0
- attune/meta_workflows/plan_generator.py +384 -0
- attune/meta_workflows/session_context.py +397 -0
- attune/meta_workflows/template_registry.py +229 -0
- attune/meta_workflows/workflow.py +984 -0
- attune/metrics/__init__.py +12 -0
- attune/metrics/collector.py +31 -0
- attune/metrics/prompt_metrics.py +194 -0
- attune/models/__init__.py +172 -0
- attune/models/__main__.py +13 -0
- attune/models/adaptive_routing.py +437 -0
- attune/models/auth_cli.py +444 -0
- attune/models/auth_strategy.py +450 -0
- attune/models/cli.py +655 -0
- attune/models/empathy_executor.py +354 -0
- attune/models/executor.py +257 -0
- attune/models/fallback.py +762 -0
- attune/models/provider_config.py +282 -0
- attune/models/registry.py +472 -0
- attune/models/tasks.py +359 -0
- attune/models/telemetry/__init__.py +71 -0
- attune/models/telemetry/analytics.py +594 -0
- attune/models/telemetry/backend.py +196 -0
- attune/models/telemetry/data_models.py +431 -0
- attune/models/telemetry/storage.py +489 -0
- attune/models/token_estimator.py +420 -0
- attune/models/validation.py +280 -0
- attune/monitoring/__init__.py +52 -0
- attune/monitoring/alerts.py +946 -0
- attune/monitoring/alerts_cli.py +448 -0
- attune/monitoring/multi_backend.py +271 -0
- attune/monitoring/otel_backend.py +362 -0
- attune/optimization/__init__.py +19 -0
- attune/optimization/context_optimizer.py +272 -0
- attune/orchestration/__init__.py +67 -0
- attune/orchestration/agent_templates.py +707 -0
- attune/orchestration/config_store.py +499 -0
- attune/orchestration/execution_strategies.py +2111 -0
- attune/orchestration/meta_orchestrator.py +1168 -0
- attune/orchestration/pattern_learner.py +696 -0
- attune/orchestration/real_tools.py +931 -0
- attune/pattern_cache.py +187 -0
- attune/pattern_library.py +542 -0
- attune/patterns/debugging/all_patterns.json +81 -0
- attune/patterns/debugging/workflow_20260107_1770825e.json +77 -0
- attune/patterns/refactoring_memory.json +89 -0
- attune/persistence.py +564 -0
- attune/platform_utils.py +265 -0
- attune/plugins/__init__.py +28 -0
- attune/plugins/base.py +361 -0
- attune/plugins/registry.py +268 -0
- attune/project_index/__init__.py +32 -0
- attune/project_index/cli.py +335 -0
- attune/project_index/index.py +667 -0
- attune/project_index/models.py +504 -0
- attune/project_index/reports.py +474 -0
- attune/project_index/scanner.py +777 -0
- attune/project_index/scanner_parallel.py +291 -0
- attune/prompts/__init__.py +61 -0
- attune/prompts/config.py +77 -0
- attune/prompts/context.py +177 -0
- attune/prompts/parser.py +285 -0
- attune/prompts/registry.py +313 -0
- attune/prompts/templates.py +208 -0
- attune/redis_config.py +302 -0
- attune/redis_memory.py +799 -0
- attune/resilience/__init__.py +56 -0
- attune/resilience/circuit_breaker.py +256 -0
- attune/resilience/fallback.py +179 -0
- attune/resilience/health.py +300 -0
- attune/resilience/retry.py +209 -0
- attune/resilience/timeout.py +135 -0
- attune/routing/__init__.py +43 -0
- attune/routing/chain_executor.py +433 -0
- attune/routing/classifier.py +217 -0
- attune/routing/smart_router.py +234 -0
- attune/routing/workflow_registry.py +343 -0
- attune/scaffolding/README.md +589 -0
- attune/scaffolding/__init__.py +35 -0
- attune/scaffolding/__main__.py +14 -0
- attune/scaffolding/cli.py +240 -0
- attune/scaffolding/templates/base_wizard.py.jinja2 +121 -0
- attune/scaffolding/templates/coach_wizard.py.jinja2 +321 -0
- attune/scaffolding/templates/domain_wizard.py.jinja2 +408 -0
- attune/scaffolding/templates/linear_flow_wizard.py.jinja2 +203 -0
- attune/socratic/__init__.py +256 -0
- attune/socratic/ab_testing.py +958 -0
- attune/socratic/blueprint.py +533 -0
- attune/socratic/cli.py +703 -0
- attune/socratic/collaboration.py +1114 -0
- attune/socratic/domain_templates.py +924 -0
- attune/socratic/embeddings.py +738 -0
- attune/socratic/engine.py +794 -0
- attune/socratic/explainer.py +682 -0
- attune/socratic/feedback.py +772 -0
- attune/socratic/forms.py +629 -0
- attune/socratic/generator.py +732 -0
- attune/socratic/llm_analyzer.py +637 -0
- attune/socratic/mcp_server.py +702 -0
- attune/socratic/session.py +312 -0
- attune/socratic/storage.py +667 -0
- attune/socratic/success.py +730 -0
- attune/socratic/visual_editor.py +860 -0
- attune/socratic/web_ui.py +958 -0
- attune/telemetry/__init__.py +39 -0
- attune/telemetry/agent_coordination.py +475 -0
- attune/telemetry/agent_tracking.py +367 -0
- attune/telemetry/approval_gates.py +545 -0
- attune/telemetry/cli.py +1231 -0
- attune/telemetry/commands/__init__.py +14 -0
- attune/telemetry/commands/dashboard_commands.py +696 -0
- attune/telemetry/event_streaming.py +409 -0
- attune/telemetry/feedback_loop.py +567 -0
- attune/telemetry/usage_tracker.py +591 -0
- attune/templates.py +754 -0
- attune/test_generator/__init__.py +38 -0
- attune/test_generator/__main__.py +14 -0
- attune/test_generator/cli.py +234 -0
- attune/test_generator/generator.py +355 -0
- attune/test_generator/risk_analyzer.py +216 -0
- attune/test_generator/templates/unit_test.py.jinja2 +272 -0
- attune/tier_recommender.py +384 -0
- attune/tools.py +183 -0
- attune/trust/__init__.py +28 -0
- attune/trust/circuit_breaker.py +579 -0
- attune/trust_building.py +527 -0
- attune/validation/__init__.py +19 -0
- attune/validation/xml_validator.py +281 -0
- attune/vscode_bridge.py +173 -0
- attune/workflow_commands.py +780 -0
- attune/workflow_patterns/__init__.py +33 -0
- attune/workflow_patterns/behavior.py +249 -0
- attune/workflow_patterns/core.py +76 -0
- attune/workflow_patterns/output.py +99 -0
- attune/workflow_patterns/registry.py +255 -0
- attune/workflow_patterns/structural.py +288 -0
- attune/workflows/__init__.py +539 -0
- attune/workflows/autonomous_test_gen.py +1268 -0
- attune/workflows/base.py +2667 -0
- attune/workflows/batch_processing.py +342 -0
- attune/workflows/bug_predict.py +1084 -0
- attune/workflows/builder.py +273 -0
- attune/workflows/caching.py +253 -0
- attune/workflows/code_review.py +1048 -0
- attune/workflows/code_review_adapters.py +312 -0
- attune/workflows/code_review_pipeline.py +722 -0
- attune/workflows/config.py +645 -0
- attune/workflows/dependency_check.py +644 -0
- attune/workflows/document_gen/__init__.py +25 -0
- attune/workflows/document_gen/config.py +30 -0
- attune/workflows/document_gen/report_formatter.py +162 -0
- attune/workflows/document_gen/workflow.py +1426 -0
- attune/workflows/document_manager.py +216 -0
- attune/workflows/document_manager_README.md +134 -0
- attune/workflows/documentation_orchestrator.py +1205 -0
- attune/workflows/history.py +510 -0
- attune/workflows/keyboard_shortcuts/__init__.py +39 -0
- attune/workflows/keyboard_shortcuts/generators.py +391 -0
- attune/workflows/keyboard_shortcuts/parsers.py +416 -0
- attune/workflows/keyboard_shortcuts/prompts.py +295 -0
- attune/workflows/keyboard_shortcuts/schema.py +193 -0
- attune/workflows/keyboard_shortcuts/workflow.py +509 -0
- attune/workflows/llm_base.py +363 -0
- attune/workflows/manage_docs.py +87 -0
- attune/workflows/manage_docs_README.md +134 -0
- attune/workflows/manage_documentation.py +821 -0
- attune/workflows/new_sample_workflow1.py +149 -0
- attune/workflows/new_sample_workflow1_README.md +150 -0
- attune/workflows/orchestrated_health_check.py +849 -0
- attune/workflows/orchestrated_release_prep.py +600 -0
- attune/workflows/output.py +413 -0
- attune/workflows/perf_audit.py +863 -0
- attune/workflows/pr_review.py +762 -0
- attune/workflows/progress.py +785 -0
- attune/workflows/progress_server.py +322 -0
- attune/workflows/progressive/README 2.md +454 -0
- attune/workflows/progressive/README.md +454 -0
- attune/workflows/progressive/__init__.py +82 -0
- attune/workflows/progressive/cli.py +219 -0
- attune/workflows/progressive/core.py +488 -0
- attune/workflows/progressive/orchestrator.py +723 -0
- attune/workflows/progressive/reports.py +520 -0
- attune/workflows/progressive/telemetry.py +274 -0
- attune/workflows/progressive/test_gen.py +495 -0
- attune/workflows/progressive/workflow.py +589 -0
- attune/workflows/refactor_plan.py +694 -0
- attune/workflows/release_prep.py +895 -0
- attune/workflows/release_prep_crew.py +969 -0
- attune/workflows/research_synthesis.py +404 -0
- attune/workflows/routing.py +168 -0
- attune/workflows/secure_release.py +593 -0
- attune/workflows/security_adapters.py +297 -0
- attune/workflows/security_audit.py +1329 -0
- attune/workflows/security_audit_phase3.py +355 -0
- attune/workflows/seo_optimization.py +633 -0
- attune/workflows/step_config.py +234 -0
- attune/workflows/telemetry_mixin.py +269 -0
- attune/workflows/test5.py +125 -0
- attune/workflows/test5_README.md +158 -0
- attune/workflows/test_coverage_boost_crew.py +849 -0
- attune/workflows/test_gen/__init__.py +52 -0
- attune/workflows/test_gen/ast_analyzer.py +249 -0
- attune/workflows/test_gen/config.py +88 -0
- attune/workflows/test_gen/data_models.py +38 -0
- attune/workflows/test_gen/report_formatter.py +289 -0
- attune/workflows/test_gen/test_templates.py +381 -0
- attune/workflows/test_gen/workflow.py +655 -0
- attune/workflows/test_gen.py +54 -0
- attune/workflows/test_gen_behavioral.py +477 -0
- attune/workflows/test_gen_parallel.py +341 -0
- attune/workflows/test_lifecycle.py +526 -0
- attune/workflows/test_maintenance.py +627 -0
- attune/workflows/test_maintenance_cli.py +590 -0
- attune/workflows/test_maintenance_crew.py +840 -0
- attune/workflows/test_runner.py +622 -0
- attune/workflows/tier_tracking.py +531 -0
- attune/workflows/xml_enhanced_crew.py +285 -0
- attune_ai-2.0.0.dist-info/METADATA +1026 -0
- attune_ai-2.0.0.dist-info/RECORD +457 -0
- attune_ai-2.0.0.dist-info/WHEEL +5 -0
- attune_ai-2.0.0.dist-info/entry_points.txt +26 -0
- attune_ai-2.0.0.dist-info/licenses/LICENSE +201 -0
- attune_ai-2.0.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
- attune_ai-2.0.0.dist-info/top_level.txt +5 -0
- attune_healthcare/__init__.py +13 -0
- attune_healthcare/monitors/__init__.py +9 -0
- attune_healthcare/monitors/clinical_protocol_monitor.py +315 -0
- attune_healthcare/monitors/monitoring/__init__.py +44 -0
- attune_healthcare/monitors/monitoring/protocol_checker.py +300 -0
- attune_healthcare/monitors/monitoring/protocol_loader.py +214 -0
- attune_healthcare/monitors/monitoring/sensor_parsers.py +306 -0
- attune_healthcare/monitors/monitoring/trajectory_analyzer.py +389 -0
- attune_llm/README.md +553 -0
- attune_llm/__init__.py +28 -0
- attune_llm/agent_factory/__init__.py +53 -0
- attune_llm/agent_factory/adapters/__init__.py +85 -0
- attune_llm/agent_factory/adapters/autogen_adapter.py +312 -0
- attune_llm/agent_factory/adapters/crewai_adapter.py +483 -0
- attune_llm/agent_factory/adapters/haystack_adapter.py +298 -0
- attune_llm/agent_factory/adapters/langchain_adapter.py +362 -0
- attune_llm/agent_factory/adapters/langgraph_adapter.py +333 -0
- attune_llm/agent_factory/adapters/native.py +228 -0
- attune_llm/agent_factory/adapters/wizard_adapter.py +423 -0
- attune_llm/agent_factory/base.py +305 -0
- attune_llm/agent_factory/crews/__init__.py +67 -0
- attune_llm/agent_factory/crews/code_review.py +1113 -0
- attune_llm/agent_factory/crews/health_check.py +1262 -0
- attune_llm/agent_factory/crews/refactoring.py +1128 -0
- attune_llm/agent_factory/crews/security_audit.py +1018 -0
- attune_llm/agent_factory/decorators.py +287 -0
- attune_llm/agent_factory/factory.py +558 -0
- attune_llm/agent_factory/framework.py +193 -0
- attune_llm/agent_factory/memory_integration.py +328 -0
- attune_llm/agent_factory/resilient.py +320 -0
- attune_llm/agents_md/__init__.py +22 -0
- attune_llm/agents_md/loader.py +218 -0
- attune_llm/agents_md/parser.py +271 -0
- attune_llm/agents_md/registry.py +307 -0
- attune_llm/claude_memory.py +466 -0
- attune_llm/cli/__init__.py +8 -0
- attune_llm/cli/sync_claude.py +487 -0
- attune_llm/code_health.py +1313 -0
- attune_llm/commands/__init__.py +51 -0
- attune_llm/commands/context.py +375 -0
- attune_llm/commands/loader.py +301 -0
- attune_llm/commands/models.py +231 -0
- attune_llm/commands/parser.py +371 -0
- attune_llm/commands/registry.py +429 -0
- attune_llm/config/__init__.py +29 -0
- attune_llm/config/unified.py +291 -0
- attune_llm/context/__init__.py +22 -0
- attune_llm/context/compaction.py +455 -0
- attune_llm/context/manager.py +434 -0
- attune_llm/contextual_patterns.py +361 -0
- attune_llm/core.py +907 -0
- attune_llm/git_pattern_extractor.py +435 -0
- attune_llm/hooks/__init__.py +24 -0
- attune_llm/hooks/config.py +306 -0
- attune_llm/hooks/executor.py +289 -0
- attune_llm/hooks/registry.py +302 -0
- attune_llm/hooks/scripts/__init__.py +39 -0
- attune_llm/hooks/scripts/evaluate_session.py +201 -0
- attune_llm/hooks/scripts/first_time_init.py +285 -0
- attune_llm/hooks/scripts/pre_compact.py +207 -0
- attune_llm/hooks/scripts/session_end.py +183 -0
- attune_llm/hooks/scripts/session_start.py +163 -0
- attune_llm/hooks/scripts/suggest_compact.py +225 -0
- attune_llm/learning/__init__.py +30 -0
- attune_llm/learning/evaluator.py +438 -0
- attune_llm/learning/extractor.py +514 -0
- attune_llm/learning/storage.py +560 -0
- attune_llm/levels.py +227 -0
- attune_llm/pattern_confidence.py +414 -0
- attune_llm/pattern_resolver.py +272 -0
- attune_llm/pattern_summary.py +350 -0
- attune_llm/providers.py +967 -0
- attune_llm/routing/__init__.py +32 -0
- attune_llm/routing/model_router.py +362 -0
- attune_llm/security/IMPLEMENTATION_SUMMARY.md +413 -0
- attune_llm/security/PHASE2_COMPLETE.md +384 -0
- attune_llm/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
- attune_llm/security/QUICK_REFERENCE.md +316 -0
- attune_llm/security/README.md +262 -0
- attune_llm/security/__init__.py +62 -0
- attune_llm/security/audit_logger.py +929 -0
- attune_llm/security/audit_logger_example.py +152 -0
- attune_llm/security/pii_scrubber.py +640 -0
- attune_llm/security/secrets_detector.py +678 -0
- attune_llm/security/secrets_detector_example.py +304 -0
- attune_llm/security/secure_memdocs.py +1192 -0
- attune_llm/security/secure_memdocs_example.py +278 -0
- attune_llm/session_status.py +745 -0
- attune_llm/state.py +246 -0
- attune_llm/utils/__init__.py +5 -0
- attune_llm/utils/tokens.py +349 -0
- attune_software/SOFTWARE_PLUGIN_README.md +57 -0
- attune_software/__init__.py +13 -0
- attune_software/cli/__init__.py +120 -0
- attune_software/cli/inspect.py +362 -0
- attune_software/cli.py +574 -0
- attune_software/plugin.py +188 -0
- workflow_scaffolding/__init__.py +11 -0
- workflow_scaffolding/__main__.py +12 -0
- workflow_scaffolding/cli.py +206 -0
- workflow_scaffolding/generator.py +265 -0
|
@@ -0,0 +1,640 @@
|
|
|
1
|
+
"""PII Scrubbing Module for Enterprise Privacy Integration
|
|
2
|
+
|
|
3
|
+
Comprehensive PII detection and scrubbing based on GDPR, HIPAA, and SOC2 requirements.
|
|
4
|
+
Supports custom patterns and provides detailed audit information.
|
|
5
|
+
|
|
6
|
+
Copyright 2025 Smart AI Memory, LLC
|
|
7
|
+
Licensed under Fair Source 0.9
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class PIIDetection:
|
|
17
|
+
"""Details about a detected PII instance.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
pii_type: Type of PII detected (email, phone, ssn, etc.)
|
|
21
|
+
matched_text: The actual text that matched (for audit purposes)
|
|
22
|
+
start_pos: Starting position in original content
|
|
23
|
+
end_pos: Ending position in original content
|
|
24
|
+
replacement: What it was replaced with
|
|
25
|
+
confidence: Detection confidence (0.0-1.0)
|
|
26
|
+
metadata: Additional context about the detection
|
|
27
|
+
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
pii_type: str
|
|
31
|
+
matched_text: str
|
|
32
|
+
start_pos: int
|
|
33
|
+
end_pos: int
|
|
34
|
+
replacement: str
|
|
35
|
+
confidence: float = 1.0
|
|
36
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
37
|
+
|
|
38
|
+
def to_dict(self) -> dict[str, Any]:
|
|
39
|
+
"""Convert to dictionary for logging/auditing"""
|
|
40
|
+
return {
|
|
41
|
+
"pii_type": self.pii_type,
|
|
42
|
+
"matched_text": self.matched_text, # Be careful logging this
|
|
43
|
+
"start_pos": self.start_pos,
|
|
44
|
+
"end_pos": self.end_pos,
|
|
45
|
+
"replacement": self.replacement,
|
|
46
|
+
"confidence": self.confidence,
|
|
47
|
+
"metadata": self.metadata,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
def to_audit_safe_dict(self) -> dict[str, Any]:
|
|
51
|
+
"""Convert to dictionary safe for audit logs (no PII values)"""
|
|
52
|
+
return {
|
|
53
|
+
"pii_type": self.pii_type,
|
|
54
|
+
"position": f"{self.start_pos}-{self.end_pos}",
|
|
55
|
+
"length": len(self.matched_text),
|
|
56
|
+
"replacement": self.replacement,
|
|
57
|
+
"confidence": self.confidence,
|
|
58
|
+
"metadata": self.metadata,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class PIIPattern:
|
|
64
|
+
"""Definition of a PII detection pattern.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
name: Pattern identifier (e.g., "email", "ssn")
|
|
68
|
+
pattern: Compiled regex pattern
|
|
69
|
+
replacement: Replacement template (e.g., "[EMAIL]", "[SSN]")
|
|
70
|
+
confidence: Base confidence level for this pattern
|
|
71
|
+
description: Human-readable description
|
|
72
|
+
enabled: Whether this pattern is active
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
name: str
|
|
77
|
+
pattern: re.Pattern
|
|
78
|
+
replacement: str
|
|
79
|
+
confidence: float = 1.0
|
|
80
|
+
description: str = ""
|
|
81
|
+
enabled: bool = True
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PIIScrubber:
|
|
85
|
+
"""Comprehensive PII detection and scrubbing system.
|
|
86
|
+
|
|
87
|
+
Detects and removes Personally Identifiable Information from text content
|
|
88
|
+
according to GDPR, HIPAA, and SOC2 requirements.
|
|
89
|
+
|
|
90
|
+
Supported PII types:
|
|
91
|
+
- Email addresses
|
|
92
|
+
- Phone numbers (US and international formats)
|
|
93
|
+
- Social Security Numbers (SSN)
|
|
94
|
+
- Credit card numbers (Visa, MC, Amex, Discover)
|
|
95
|
+
- IP addresses (IPv4 and IPv6)
|
|
96
|
+
- Physical addresses (US format)
|
|
97
|
+
- Names (context-aware)
|
|
98
|
+
- Medical Record Numbers (MRN)
|
|
99
|
+
- Patient IDs
|
|
100
|
+
|
|
101
|
+
Example:
|
|
102
|
+
>>> scrubber = PIIScrubber()
|
|
103
|
+
>>> sanitized, detections = scrubber.scrub(
|
|
104
|
+
... "Contact John Doe at john.doe@email.com or 555-123-4567"
|
|
105
|
+
... )
|
|
106
|
+
>>> print(sanitized)
|
|
107
|
+
"Contact [NAME] at [EMAIL] or [PHONE]"
|
|
108
|
+
>>> print(len(detections))
|
|
109
|
+
3
|
|
110
|
+
|
|
111
|
+
Performance:
|
|
112
|
+
All patterns are pre-compiled for efficient repeated use.
|
|
113
|
+
Typical scrubbing time: ~1-5ms for 1KB of text.
|
|
114
|
+
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, enable_name_detection: bool = True):
|
|
118
|
+
"""Initialize PII scrubber with default patterns.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
enable_name_detection: Enable context-aware name detection
|
|
122
|
+
(may have false positives, disabled by default in production)
|
|
123
|
+
|
|
124
|
+
"""
|
|
125
|
+
self.patterns: dict[str, PIIPattern] = {}
|
|
126
|
+
self.custom_patterns: dict[str, PIIPattern] = {}
|
|
127
|
+
|
|
128
|
+
# Initialize default patterns
|
|
129
|
+
self._init_default_patterns()
|
|
130
|
+
|
|
131
|
+
# Control name detection (can have false positives)
|
|
132
|
+
if not enable_name_detection:
|
|
133
|
+
self.patterns["name"].enabled = False
|
|
134
|
+
|
|
135
|
+
def _init_default_patterns(self):
|
|
136
|
+
"""Initialize default PII detection patterns based on enterprise security policy"""
|
|
137
|
+
# Email addresses (RFC 5322 simplified)
|
|
138
|
+
self.patterns["email"] = PIIPattern(
|
|
139
|
+
name="email",
|
|
140
|
+
pattern=re.compile(
|
|
141
|
+
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
|
|
142
|
+
re.IGNORECASE,
|
|
143
|
+
),
|
|
144
|
+
replacement="[EMAIL]",
|
|
145
|
+
confidence=1.0,
|
|
146
|
+
description="Email address (RFC 5322 format)",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Social Security Numbers
|
|
150
|
+
# Format: 123-45-6789 or 123456789
|
|
151
|
+
self.patterns["ssn"] = PIIPattern(
|
|
152
|
+
name="ssn",
|
|
153
|
+
pattern=re.compile(r"\b(?!000|666|9\d{2})\d{3}-?(?!00)\d{2}-?(?!0000)\d{4}\b"),
|
|
154
|
+
replacement="[SSN]",
|
|
155
|
+
confidence=1.0,
|
|
156
|
+
description="Social Security Number (SSN)",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Phone numbers (US and international)
|
|
160
|
+
# Matches: (555) 123-4567, 555-123-4567, 555.123.4567, 5551234567
|
|
161
|
+
# Also: +1-555-123-4567, +44 20 7123 4567
|
|
162
|
+
self.patterns["phone"] = PIIPattern(
|
|
163
|
+
name="phone",
|
|
164
|
+
pattern=re.compile(
|
|
165
|
+
r"""
|
|
166
|
+
(?:
|
|
167
|
+
# International format with country code
|
|
168
|
+
\+\d{1,3}[\s.-]?\(?\d{1,4}\)?[\s.-]?\d{1,4}[\s.-]?\d{1,4}[\s.-]?\d{1,9}
|
|
169
|
+
|
|
|
170
|
+
# US format with optional area code
|
|
171
|
+
\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}
|
|
172
|
+
|
|
|
173
|
+
# Simple 10-digit format
|
|
174
|
+
\b\d{3}[-.]?\d{3}[-.]?\d{4}\b
|
|
175
|
+
)
|
|
176
|
+
""",
|
|
177
|
+
re.VERBOSE,
|
|
178
|
+
),
|
|
179
|
+
replacement="[PHONE]",
|
|
180
|
+
confidence=0.95,
|
|
181
|
+
description="Phone number (US and international formats)",
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Credit card numbers
|
|
185
|
+
# Supports Visa, MasterCard, Amex, Discover with optional spaces/dashes
|
|
186
|
+
self.patterns["credit_card"] = PIIPattern(
|
|
187
|
+
name="credit_card",
|
|
188
|
+
pattern=re.compile(
|
|
189
|
+
r"""
|
|
190
|
+
\b(?:
|
|
191
|
+
# Visa
|
|
192
|
+
4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}
|
|
193
|
+
|
|
|
194
|
+
# MasterCard
|
|
195
|
+
5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}
|
|
196
|
+
|
|
|
197
|
+
# American Express
|
|
198
|
+
3[47]\d{2}[\s-]?\d{6}[\s-]?\d{5}
|
|
199
|
+
|
|
|
200
|
+
# Discover
|
|
201
|
+
6(?:011|5\d{2})[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}
|
|
202
|
+
)\b
|
|
203
|
+
""",
|
|
204
|
+
re.VERBOSE,
|
|
205
|
+
),
|
|
206
|
+
replacement="[CC]",
|
|
207
|
+
confidence=1.0,
|
|
208
|
+
description="Credit card number (Visa, MC, Amex, Discover)",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# IPv4 addresses
|
|
212
|
+
self.patterns["ipv4"] = PIIPattern(
|
|
213
|
+
name="ipv4",
|
|
214
|
+
pattern=re.compile(
|
|
215
|
+
r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}"
|
|
216
|
+
r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
|
|
217
|
+
),
|
|
218
|
+
replacement="[IP]",
|
|
219
|
+
confidence=1.0,
|
|
220
|
+
description="IPv4 address",
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# IPv6 addresses (simplified pattern)
|
|
224
|
+
self.patterns["ipv6"] = PIIPattern(
|
|
225
|
+
name="ipv6",
|
|
226
|
+
pattern=re.compile(
|
|
227
|
+
r"\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b|"
|
|
228
|
+
r"\b(?:[0-9a-fA-F]{1,4}:){1,7}:\b|"
|
|
229
|
+
r"\b:(?::[0-9a-fA-F]{1,4}){1,7}\b",
|
|
230
|
+
),
|
|
231
|
+
replacement="[IP]",
|
|
232
|
+
confidence=0.95,
|
|
233
|
+
description="IPv6 address",
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# US Street addresses (basic pattern)
|
|
237
|
+
# Matches: 123 Main St, 456 Oak Avenue, 789 First Street Apt 12
|
|
238
|
+
self.patterns["address"] = PIIPattern(
|
|
239
|
+
name="address",
|
|
240
|
+
pattern=re.compile(
|
|
241
|
+
r"\b\d{1,6}\s+(?:[A-Z][a-z]+\s+){1,3}"
|
|
242
|
+
r"(?:Street|St|Avenue|Ave|Road|Rd|Drive|Dr|Lane|Ln|Boulevard|Blvd|Way|Court|Ct)"
|
|
243
|
+
r"(?:\s+(?:Apt|Apartment|Suite|Ste|Unit|#)\s*\w+)?\b",
|
|
244
|
+
re.IGNORECASE,
|
|
245
|
+
),
|
|
246
|
+
replacement="[ADDRESS]",
|
|
247
|
+
confidence=0.85,
|
|
248
|
+
description="US street address",
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Names (context-aware pattern - conservative)
|
|
252
|
+
# Only matches capitalized first+last name patterns near PII indicators
|
|
253
|
+
# This is DISABLED by default to avoid false positives
|
|
254
|
+
self.patterns["name"] = PIIPattern(
|
|
255
|
+
name="name",
|
|
256
|
+
pattern=re.compile(
|
|
257
|
+
r"\b(?:Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2})\b|"
|
|
258
|
+
r"\bPatient:?\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2})\b|"
|
|
259
|
+
r"\bContact:?\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2})\b",
|
|
260
|
+
re.MULTILINE,
|
|
261
|
+
),
|
|
262
|
+
replacement="[NAME]",
|
|
263
|
+
confidence=0.75,
|
|
264
|
+
description="Personal name (context-aware)",
|
|
265
|
+
enabled=False, # Disabled by default - high false positive rate
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Medical Record Number (MRN)
|
|
269
|
+
# Format: MRN-1234567, MRN:1234567, MRN #1234567
|
|
270
|
+
self.patterns["mrn"] = PIIPattern(
|
|
271
|
+
name="mrn",
|
|
272
|
+
pattern=re.compile(
|
|
273
|
+
r"\bMRN[:\s#-]*(\d{6,10})\b",
|
|
274
|
+
re.IGNORECASE,
|
|
275
|
+
),
|
|
276
|
+
replacement="[MRN]",
|
|
277
|
+
confidence=1.0,
|
|
278
|
+
description="Medical Record Number",
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Patient ID (healthcare context)
|
|
282
|
+
# Format: Patient ID: 123456, PID-123456
|
|
283
|
+
self.patterns["patient_id"] = PIIPattern(
|
|
284
|
+
name="patient_id",
|
|
285
|
+
pattern=re.compile(
|
|
286
|
+
r"\b(?:Patient\s*ID|PID)[:\s#-]*(\d{5,10})\b",
|
|
287
|
+
re.IGNORECASE,
|
|
288
|
+
),
|
|
289
|
+
replacement="[PATIENT_ID]",
|
|
290
|
+
confidence=0.95,
|
|
291
|
+
description="Patient identifier",
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
def scrub(self, content: str) -> tuple[str, list[PIIDetection]]:
|
|
295
|
+
"""Scrub PII from content.
|
|
296
|
+
|
|
297
|
+
Detects and replaces all PII according to configured patterns.
|
|
298
|
+
Returns both sanitized content and detailed detection information.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
content: Text content to scrub
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Tuple of (sanitized_content, detections):
|
|
305
|
+
- sanitized_content: Text with PII replaced
|
|
306
|
+
- detections: List of PIIDetection objects with details
|
|
307
|
+
|
|
308
|
+
Example:
|
|
309
|
+
>>> scrubber = PIIScrubber()
|
|
310
|
+
>>> text = "Email me at john@example.com or call 555-1234"
|
|
311
|
+
>>> clean_text, detections = scrubber.scrub(text)
|
|
312
|
+
>>> print(clean_text)
|
|
313
|
+
"Email me at [EMAIL] or call [PHONE]"
|
|
314
|
+
>>> print(detections[0].pii_type)
|
|
315
|
+
"email"
|
|
316
|
+
|
|
317
|
+
"""
|
|
318
|
+
if not content:
|
|
319
|
+
return content, []
|
|
320
|
+
|
|
321
|
+
detections: list[PIIDetection] = []
|
|
322
|
+
sanitized = content
|
|
323
|
+
|
|
324
|
+
# Track position adjustments as we replace text
|
|
325
|
+
position_offset = 0
|
|
326
|
+
|
|
327
|
+
# Collect all matches first to handle overlaps and sort by position
|
|
328
|
+
all_matches: list[tuple[int, int, str, str, re.Match, float]] = []
|
|
329
|
+
|
|
330
|
+
# Check all enabled patterns
|
|
331
|
+
for pattern_dict in [self.patterns, self.custom_patterns]:
|
|
332
|
+
for _pattern_name, pii_pattern in pattern_dict.items():
|
|
333
|
+
if not pii_pattern.enabled:
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
for match in pii_pattern.pattern.finditer(content):
|
|
337
|
+
matched_text = match.group(0)
|
|
338
|
+
start_pos = match.start()
|
|
339
|
+
end_pos = match.end()
|
|
340
|
+
|
|
341
|
+
all_matches.append(
|
|
342
|
+
(
|
|
343
|
+
start_pos,
|
|
344
|
+
end_pos,
|
|
345
|
+
matched_text,
|
|
346
|
+
pii_pattern.replacement,
|
|
347
|
+
match,
|
|
348
|
+
pii_pattern.confidence,
|
|
349
|
+
),
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Sort by start position
|
|
353
|
+
all_matches.sort(key=lambda x: x[0])
|
|
354
|
+
|
|
355
|
+
# Remove overlapping matches (keep first one)
|
|
356
|
+
filtered_matches: list[tuple[int, int, str, str, re.Match, float]] = []
|
|
357
|
+
last_end = -1
|
|
358
|
+
|
|
359
|
+
for match_tuple in all_matches:
|
|
360
|
+
start_pos = match_tuple[0]
|
|
361
|
+
if start_pos >= last_end:
|
|
362
|
+
filtered_matches.append(match_tuple)
|
|
363
|
+
last_end = match_tuple[1]
|
|
364
|
+
|
|
365
|
+
# Now apply replacements and create detections
|
|
366
|
+
for start_pos, end_pos, matched_text, replacement, _match, confidence in filtered_matches:
|
|
367
|
+
# Determine which pattern this came from
|
|
368
|
+
pii_type = None
|
|
369
|
+
for pattern_dict in [self.patterns, self.custom_patterns]:
|
|
370
|
+
for pattern_name, pii_pattern in pattern_dict.items():
|
|
371
|
+
if pii_pattern.replacement == replacement and pii_pattern.enabled:
|
|
372
|
+
pii_type = pattern_name
|
|
373
|
+
break
|
|
374
|
+
if pii_type:
|
|
375
|
+
break
|
|
376
|
+
|
|
377
|
+
# Create detection record
|
|
378
|
+
detection = PIIDetection(
|
|
379
|
+
pii_type=pii_type or "unknown",
|
|
380
|
+
matched_text=matched_text,
|
|
381
|
+
start_pos=start_pos,
|
|
382
|
+
end_pos=end_pos,
|
|
383
|
+
replacement=replacement,
|
|
384
|
+
confidence=confidence,
|
|
385
|
+
metadata={
|
|
386
|
+
"original_length": len(matched_text),
|
|
387
|
+
"replacement_length": len(replacement),
|
|
388
|
+
},
|
|
389
|
+
)
|
|
390
|
+
detections.append(detection)
|
|
391
|
+
|
|
392
|
+
# Apply replacement with position offset
|
|
393
|
+
adjusted_start = start_pos + position_offset
|
|
394
|
+
adjusted_end = end_pos + position_offset
|
|
395
|
+
|
|
396
|
+
sanitized = sanitized[:adjusted_start] + replacement + sanitized[adjusted_end:]
|
|
397
|
+
|
|
398
|
+
# Update offset for next replacement
|
|
399
|
+
position_offset += len(replacement) - len(matched_text)
|
|
400
|
+
|
|
401
|
+
return sanitized, detections
|
|
402
|
+
|
|
403
|
+
def add_custom_pattern(
|
|
404
|
+
self,
|
|
405
|
+
name: str,
|
|
406
|
+
pattern: str,
|
|
407
|
+
replacement: str,
|
|
408
|
+
confidence: float = 1.0,
|
|
409
|
+
description: str = "",
|
|
410
|
+
):
|
|
411
|
+
"""Add a custom PII detection pattern.
|
|
412
|
+
|
|
413
|
+
Allows extending the scrubber with organization-specific or
|
|
414
|
+
domain-specific PII patterns.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
name: Unique identifier for this pattern
|
|
418
|
+
pattern: Regular expression pattern (string)
|
|
419
|
+
replacement: Replacement text (e.g., "[CUSTOM_ID]")
|
|
420
|
+
confidence: Detection confidence (0.0-1.0)
|
|
421
|
+
description: Human-readable description
|
|
422
|
+
|
|
423
|
+
Raises:
|
|
424
|
+
ValueError: If pattern name already exists or regex is invalid
|
|
425
|
+
|
|
426
|
+
Example:
|
|
427
|
+
>>> scrubber = PIIScrubber()
|
|
428
|
+
>>> scrubber.add_custom_pattern(
|
|
429
|
+
... name="employee_id",
|
|
430
|
+
... pattern=r"EMP-\\d{6}",
|
|
431
|
+
... replacement="[EMPLOYEE_ID]",
|
|
432
|
+
... description="Company employee identifier"
|
|
433
|
+
... )
|
|
434
|
+
|
|
435
|
+
"""
|
|
436
|
+
if name in self.patterns or name in self.custom_patterns:
|
|
437
|
+
raise ValueError(f"Pattern '{name}' already exists")
|
|
438
|
+
|
|
439
|
+
try:
|
|
440
|
+
compiled_pattern = re.compile(pattern)
|
|
441
|
+
except re.error as e:
|
|
442
|
+
raise ValueError(f"Invalid regex pattern: {e}") from e
|
|
443
|
+
|
|
444
|
+
self.custom_patterns[name] = PIIPattern(
|
|
445
|
+
name=name,
|
|
446
|
+
pattern=compiled_pattern,
|
|
447
|
+
replacement=replacement,
|
|
448
|
+
confidence=confidence,
|
|
449
|
+
description=description,
|
|
450
|
+
enabled=True,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
def remove_custom_pattern(self, name: str):
|
|
454
|
+
"""Remove a custom PII pattern.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
name: Pattern identifier
|
|
458
|
+
|
|
459
|
+
Raises:
|
|
460
|
+
ValueError: If pattern doesn't exist or is a default pattern
|
|
461
|
+
|
|
462
|
+
"""
|
|
463
|
+
if name not in self.custom_patterns:
|
|
464
|
+
if name in self.patterns:
|
|
465
|
+
raise ValueError(
|
|
466
|
+
f"Cannot remove default pattern '{name}'. Use disable_pattern() instead.",
|
|
467
|
+
)
|
|
468
|
+
raise ValueError(f"Pattern '{name}' not found")
|
|
469
|
+
|
|
470
|
+
del self.custom_patterns[name]
|
|
471
|
+
|
|
472
|
+
def disable_pattern(self, name: str):
|
|
473
|
+
"""Disable a PII pattern without removing it.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
name: Pattern identifier
|
|
477
|
+
|
|
478
|
+
Raises:
|
|
479
|
+
ValueError: If pattern doesn't exist
|
|
480
|
+
|
|
481
|
+
"""
|
|
482
|
+
if name in self.patterns:
|
|
483
|
+
self.patterns[name].enabled = False
|
|
484
|
+
elif name in self.custom_patterns:
|
|
485
|
+
self.custom_patterns[name].enabled = False
|
|
486
|
+
else:
|
|
487
|
+
raise ValueError(f"Pattern '{name}' not found")
|
|
488
|
+
|
|
489
|
+
def enable_pattern(self, name: str):
|
|
490
|
+
"""Enable a previously disabled PII pattern.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
name: Pattern identifier
|
|
494
|
+
|
|
495
|
+
Raises:
|
|
496
|
+
ValueError: If pattern doesn't exist
|
|
497
|
+
|
|
498
|
+
"""
|
|
499
|
+
if name in self.patterns:
|
|
500
|
+
self.patterns[name].enabled = True
|
|
501
|
+
elif name in self.custom_patterns:
|
|
502
|
+
self.custom_patterns[name].enabled = True
|
|
503
|
+
else:
|
|
504
|
+
raise ValueError(f"Pattern '{name}' not found")
|
|
505
|
+
|
|
506
|
+
def get_statistics(self) -> dict[str, Any]:
|
|
507
|
+
"""Get statistics about configured patterns.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
Dictionary with pattern statistics
|
|
511
|
+
|
|
512
|
+
"""
|
|
513
|
+
enabled_default = sum(1 for p in self.patterns.values() if p.enabled)
|
|
514
|
+
enabled_custom = sum(1 for p in self.custom_patterns.values() if p.enabled)
|
|
515
|
+
|
|
516
|
+
return {
|
|
517
|
+
"total_patterns": len(self.patterns) + len(self.custom_patterns),
|
|
518
|
+
"default_patterns": len(self.patterns),
|
|
519
|
+
"custom_patterns": len(self.custom_patterns),
|
|
520
|
+
"enabled_default": enabled_default,
|
|
521
|
+
"enabled_custom": enabled_custom,
|
|
522
|
+
"total_enabled": enabled_default + enabled_custom,
|
|
523
|
+
"pattern_names": {
|
|
524
|
+
"default": list(self.patterns.keys()),
|
|
525
|
+
"custom": list(self.custom_patterns.keys()),
|
|
526
|
+
},
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
def get_pattern_info(self, name: str) -> dict[str, Any]:
|
|
530
|
+
"""Get detailed information about a specific pattern.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
name: Pattern identifier
|
|
534
|
+
|
|
535
|
+
Returns:
|
|
536
|
+
Dictionary with pattern details
|
|
537
|
+
|
|
538
|
+
Raises:
|
|
539
|
+
ValueError: If pattern doesn't exist
|
|
540
|
+
|
|
541
|
+
"""
|
|
542
|
+
pattern = None
|
|
543
|
+
is_custom = False
|
|
544
|
+
|
|
545
|
+
if name in self.patterns:
|
|
546
|
+
pattern = self.patterns[name]
|
|
547
|
+
elif name in self.custom_patterns:
|
|
548
|
+
pattern = self.custom_patterns[name]
|
|
549
|
+
is_custom = True
|
|
550
|
+
else:
|
|
551
|
+
raise ValueError(f"Pattern '{name}' not found")
|
|
552
|
+
|
|
553
|
+
return {
|
|
554
|
+
"name": pattern.name,
|
|
555
|
+
"replacement": pattern.replacement,
|
|
556
|
+
"confidence": pattern.confidence,
|
|
557
|
+
"description": pattern.description,
|
|
558
|
+
"enabled": pattern.enabled,
|
|
559
|
+
"is_custom": is_custom,
|
|
560
|
+
"regex_pattern": pattern.pattern.pattern,
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
def validate_patterns(self) -> list[dict[str, Any]]:
|
|
564
|
+
"""Validate all patterns with test cases.
|
|
565
|
+
|
|
566
|
+
Returns a list of validation results for each pattern.
|
|
567
|
+
Useful for testing pattern effectiveness.
|
|
568
|
+
|
|
569
|
+
Returns:
|
|
570
|
+
List of dictionaries with validation results
|
|
571
|
+
|
|
572
|
+
"""
|
|
573
|
+
test_cases = {
|
|
574
|
+
"email": [
|
|
575
|
+
("user@example.com", True),
|
|
576
|
+
("test.user+tag@domain.co.uk", True),
|
|
577
|
+
("not-an-email", False),
|
|
578
|
+
],
|
|
579
|
+
"ssn": [
|
|
580
|
+
("123-45-6789", True),
|
|
581
|
+
("123456789", True),
|
|
582
|
+
("000-12-3456", False), # Invalid area number
|
|
583
|
+
("12-345-6789", False), # Wrong format
|
|
584
|
+
],
|
|
585
|
+
"phone": [
|
|
586
|
+
("555-123-4567", True),
|
|
587
|
+
("(555) 123-4567", True),
|
|
588
|
+
("+1-555-123-4567", True),
|
|
589
|
+
("12345", False),
|
|
590
|
+
],
|
|
591
|
+
"credit_card": [
|
|
592
|
+
("4532-1234-5678-9010", True),
|
|
593
|
+
("5123 4567 8901 2345", True),
|
|
594
|
+
("3782 822463 10005", True), # Amex
|
|
595
|
+
("1234-5678-9012-3456", False), # Invalid prefix
|
|
596
|
+
],
|
|
597
|
+
"ipv4": [
|
|
598
|
+
("192.168.1.1", True),
|
|
599
|
+
("10.0.0.1", True),
|
|
600
|
+
("256.1.1.1", False), # Invalid octet
|
|
601
|
+
],
|
|
602
|
+
"mrn": [
|
|
603
|
+
("MRN-1234567", True),
|
|
604
|
+
("MRN:1234567", True),
|
|
605
|
+
("MRN 1234567", True),
|
|
606
|
+
("MRN-123", False), # Too short
|
|
607
|
+
],
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
results = []
|
|
611
|
+
|
|
612
|
+
for pattern_name, cases in test_cases.items():
|
|
613
|
+
if pattern_name not in self.patterns:
|
|
614
|
+
continue
|
|
615
|
+
|
|
616
|
+
pattern = self.patterns[pattern_name]
|
|
617
|
+
if not pattern.enabled:
|
|
618
|
+
continue
|
|
619
|
+
|
|
620
|
+
passed = 0
|
|
621
|
+
failed = 0
|
|
622
|
+
|
|
623
|
+
for test_input, should_match in cases:
|
|
624
|
+
matches = bool(pattern.pattern.search(test_input))
|
|
625
|
+
if matches == should_match:
|
|
626
|
+
passed += 1
|
|
627
|
+
else:
|
|
628
|
+
failed += 1
|
|
629
|
+
|
|
630
|
+
results.append(
|
|
631
|
+
{
|
|
632
|
+
"pattern": pattern_name,
|
|
633
|
+
"total_tests": len(cases),
|
|
634
|
+
"passed": passed,
|
|
635
|
+
"failed": failed,
|
|
636
|
+
"success_rate": passed / len(cases) if cases else 0,
|
|
637
|
+
},
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
return results
|