attune-ai 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- attune/__init__.py +358 -0
- attune/adaptive/__init__.py +13 -0
- attune/adaptive/task_complexity.py +127 -0
- attune/agent_monitoring.py +414 -0
- attune/cache/__init__.py +117 -0
- attune/cache/base.py +166 -0
- attune/cache/dependency_manager.py +256 -0
- attune/cache/hash_only.py +251 -0
- attune/cache/hybrid.py +457 -0
- attune/cache/storage.py +285 -0
- attune/cache_monitor.py +356 -0
- attune/cache_stats.py +298 -0
- attune/cli/__init__.py +152 -0
- attune/cli/__main__.py +12 -0
- attune/cli/commands/__init__.py +1 -0
- attune/cli/commands/batch.py +264 -0
- attune/cli/commands/cache.py +248 -0
- attune/cli/commands/help.py +331 -0
- attune/cli/commands/info.py +140 -0
- attune/cli/commands/inspect.py +436 -0
- attune/cli/commands/inspection.py +57 -0
- attune/cli/commands/memory.py +48 -0
- attune/cli/commands/metrics.py +92 -0
- attune/cli/commands/orchestrate.py +184 -0
- attune/cli/commands/patterns.py +207 -0
- attune/cli/commands/profiling.py +202 -0
- attune/cli/commands/provider.py +98 -0
- attune/cli/commands/routing.py +285 -0
- attune/cli/commands/setup.py +96 -0
- attune/cli/commands/status.py +235 -0
- attune/cli/commands/sync.py +166 -0
- attune/cli/commands/tier.py +121 -0
- attune/cli/commands/utilities.py +114 -0
- attune/cli/commands/workflow.py +579 -0
- attune/cli/core.py +32 -0
- attune/cli/parsers/__init__.py +68 -0
- attune/cli/parsers/batch.py +118 -0
- attune/cli/parsers/cache.py +65 -0
- attune/cli/parsers/help.py +41 -0
- attune/cli/parsers/info.py +26 -0
- attune/cli/parsers/inspect.py +66 -0
- attune/cli/parsers/metrics.py +42 -0
- attune/cli/parsers/orchestrate.py +61 -0
- attune/cli/parsers/patterns.py +54 -0
- attune/cli/parsers/provider.py +40 -0
- attune/cli/parsers/routing.py +110 -0
- attune/cli/parsers/setup.py +42 -0
- attune/cli/parsers/status.py +47 -0
- attune/cli/parsers/sync.py +31 -0
- attune/cli/parsers/tier.py +33 -0
- attune/cli/parsers/workflow.py +77 -0
- attune/cli/utils/__init__.py +1 -0
- attune/cli/utils/data.py +242 -0
- attune/cli/utils/helpers.py +68 -0
- attune/cli_legacy.py +3957 -0
- attune/cli_minimal.py +1159 -0
- attune/cli_router.py +437 -0
- attune/cli_unified.py +814 -0
- attune/config/__init__.py +66 -0
- attune/config/xml_config.py +286 -0
- attune/config.py +545 -0
- attune/coordination.py +870 -0
- attune/core.py +1511 -0
- attune/core_modules/__init__.py +15 -0
- attune/cost_tracker.py +626 -0
- attune/dashboard/__init__.py +41 -0
- attune/dashboard/app.py +512 -0
- attune/dashboard/simple_server.py +435 -0
- attune/dashboard/standalone_server.py +547 -0
- attune/discovery.py +306 -0
- attune/emergence.py +306 -0
- attune/exceptions.py +123 -0
- attune/feedback_loops.py +373 -0
- attune/hot_reload/README.md +473 -0
- attune/hot_reload/__init__.py +62 -0
- attune/hot_reload/config.py +83 -0
- attune/hot_reload/integration.py +229 -0
- attune/hot_reload/reloader.py +298 -0
- attune/hot_reload/watcher.py +183 -0
- attune/hot_reload/websocket.py +177 -0
- attune/levels.py +577 -0
- attune/leverage_points.py +441 -0
- attune/logging_config.py +261 -0
- attune/mcp/__init__.py +10 -0
- attune/mcp/server.py +506 -0
- attune/memory/__init__.py +237 -0
- attune/memory/claude_memory.py +469 -0
- attune/memory/config.py +224 -0
- attune/memory/control_panel.py +1290 -0
- attune/memory/control_panel_support.py +145 -0
- attune/memory/cross_session.py +845 -0
- attune/memory/edges.py +179 -0
- attune/memory/encryption.py +159 -0
- attune/memory/file_session.py +770 -0
- attune/memory/graph.py +570 -0
- attune/memory/long_term.py +913 -0
- attune/memory/long_term_types.py +99 -0
- attune/memory/mixins/__init__.py +25 -0
- attune/memory/mixins/backend_init_mixin.py +249 -0
- attune/memory/mixins/capabilities_mixin.py +208 -0
- attune/memory/mixins/handoff_mixin.py +208 -0
- attune/memory/mixins/lifecycle_mixin.py +49 -0
- attune/memory/mixins/long_term_mixin.py +352 -0
- attune/memory/mixins/promotion_mixin.py +109 -0
- attune/memory/mixins/short_term_mixin.py +182 -0
- attune/memory/nodes.py +179 -0
- attune/memory/redis_bootstrap.py +540 -0
- attune/memory/security/__init__.py +31 -0
- attune/memory/security/audit_logger.py +932 -0
- attune/memory/security/pii_scrubber.py +640 -0
- attune/memory/security/secrets_detector.py +678 -0
- attune/memory/short_term.py +2192 -0
- attune/memory/simple_storage.py +302 -0
- attune/memory/storage/__init__.py +15 -0
- attune/memory/storage_backend.py +167 -0
- attune/memory/summary_index.py +583 -0
- attune/memory/types.py +446 -0
- attune/memory/unified.py +182 -0
- attune/meta_workflows/__init__.py +74 -0
- attune/meta_workflows/agent_creator.py +248 -0
- attune/meta_workflows/builtin_templates.py +567 -0
- attune/meta_workflows/cli_commands/__init__.py +56 -0
- attune/meta_workflows/cli_commands/agent_commands.py +321 -0
- attune/meta_workflows/cli_commands/analytics_commands.py +442 -0
- attune/meta_workflows/cli_commands/config_commands.py +232 -0
- attune/meta_workflows/cli_commands/memory_commands.py +182 -0
- attune/meta_workflows/cli_commands/template_commands.py +354 -0
- attune/meta_workflows/cli_commands/workflow_commands.py +382 -0
- attune/meta_workflows/cli_meta_workflows.py +59 -0
- attune/meta_workflows/form_engine.py +292 -0
- attune/meta_workflows/intent_detector.py +409 -0
- attune/meta_workflows/models.py +569 -0
- attune/meta_workflows/pattern_learner.py +738 -0
- attune/meta_workflows/plan_generator.py +384 -0
- attune/meta_workflows/session_context.py +397 -0
- attune/meta_workflows/template_registry.py +229 -0
- attune/meta_workflows/workflow.py +984 -0
- attune/metrics/__init__.py +12 -0
- attune/metrics/collector.py +31 -0
- attune/metrics/prompt_metrics.py +194 -0
- attune/models/__init__.py +172 -0
- attune/models/__main__.py +13 -0
- attune/models/adaptive_routing.py +437 -0
- attune/models/auth_cli.py +444 -0
- attune/models/auth_strategy.py +450 -0
- attune/models/cli.py +655 -0
- attune/models/empathy_executor.py +354 -0
- attune/models/executor.py +257 -0
- attune/models/fallback.py +762 -0
- attune/models/provider_config.py +282 -0
- attune/models/registry.py +472 -0
- attune/models/tasks.py +359 -0
- attune/models/telemetry/__init__.py +71 -0
- attune/models/telemetry/analytics.py +594 -0
- attune/models/telemetry/backend.py +196 -0
- attune/models/telemetry/data_models.py +431 -0
- attune/models/telemetry/storage.py +489 -0
- attune/models/token_estimator.py +420 -0
- attune/models/validation.py +280 -0
- attune/monitoring/__init__.py +52 -0
- attune/monitoring/alerts.py +946 -0
- attune/monitoring/alerts_cli.py +448 -0
- attune/monitoring/multi_backend.py +271 -0
- attune/monitoring/otel_backend.py +362 -0
- attune/optimization/__init__.py +19 -0
- attune/optimization/context_optimizer.py +272 -0
- attune/orchestration/__init__.py +67 -0
- attune/orchestration/agent_templates.py +707 -0
- attune/orchestration/config_store.py +499 -0
- attune/orchestration/execution_strategies.py +2111 -0
- attune/orchestration/meta_orchestrator.py +1168 -0
- attune/orchestration/pattern_learner.py +696 -0
- attune/orchestration/real_tools.py +931 -0
- attune/pattern_cache.py +187 -0
- attune/pattern_library.py +542 -0
- attune/patterns/debugging/all_patterns.json +81 -0
- attune/patterns/debugging/workflow_20260107_1770825e.json +77 -0
- attune/patterns/refactoring_memory.json +89 -0
- attune/persistence.py +564 -0
- attune/platform_utils.py +265 -0
- attune/plugins/__init__.py +28 -0
- attune/plugins/base.py +361 -0
- attune/plugins/registry.py +268 -0
- attune/project_index/__init__.py +32 -0
- attune/project_index/cli.py +335 -0
- attune/project_index/index.py +667 -0
- attune/project_index/models.py +504 -0
- attune/project_index/reports.py +474 -0
- attune/project_index/scanner.py +777 -0
- attune/project_index/scanner_parallel.py +291 -0
- attune/prompts/__init__.py +61 -0
- attune/prompts/config.py +77 -0
- attune/prompts/context.py +177 -0
- attune/prompts/parser.py +285 -0
- attune/prompts/registry.py +313 -0
- attune/prompts/templates.py +208 -0
- attune/redis_config.py +302 -0
- attune/redis_memory.py +799 -0
- attune/resilience/__init__.py +56 -0
- attune/resilience/circuit_breaker.py +256 -0
- attune/resilience/fallback.py +179 -0
- attune/resilience/health.py +300 -0
- attune/resilience/retry.py +209 -0
- attune/resilience/timeout.py +135 -0
- attune/routing/__init__.py +43 -0
- attune/routing/chain_executor.py +433 -0
- attune/routing/classifier.py +217 -0
- attune/routing/smart_router.py +234 -0
- attune/routing/workflow_registry.py +343 -0
- attune/scaffolding/README.md +589 -0
- attune/scaffolding/__init__.py +35 -0
- attune/scaffolding/__main__.py +14 -0
- attune/scaffolding/cli.py +240 -0
- attune/scaffolding/templates/base_wizard.py.jinja2 +121 -0
- attune/scaffolding/templates/coach_wizard.py.jinja2 +321 -0
- attune/scaffolding/templates/domain_wizard.py.jinja2 +408 -0
- attune/scaffolding/templates/linear_flow_wizard.py.jinja2 +203 -0
- attune/socratic/__init__.py +256 -0
- attune/socratic/ab_testing.py +958 -0
- attune/socratic/blueprint.py +533 -0
- attune/socratic/cli.py +703 -0
- attune/socratic/collaboration.py +1114 -0
- attune/socratic/domain_templates.py +924 -0
- attune/socratic/embeddings.py +738 -0
- attune/socratic/engine.py +794 -0
- attune/socratic/explainer.py +682 -0
- attune/socratic/feedback.py +772 -0
- attune/socratic/forms.py +629 -0
- attune/socratic/generator.py +732 -0
- attune/socratic/llm_analyzer.py +637 -0
- attune/socratic/mcp_server.py +702 -0
- attune/socratic/session.py +312 -0
- attune/socratic/storage.py +667 -0
- attune/socratic/success.py +730 -0
- attune/socratic/visual_editor.py +860 -0
- attune/socratic/web_ui.py +958 -0
- attune/telemetry/__init__.py +39 -0
- attune/telemetry/agent_coordination.py +475 -0
- attune/telemetry/agent_tracking.py +367 -0
- attune/telemetry/approval_gates.py +545 -0
- attune/telemetry/cli.py +1231 -0
- attune/telemetry/commands/__init__.py +14 -0
- attune/telemetry/commands/dashboard_commands.py +696 -0
- attune/telemetry/event_streaming.py +409 -0
- attune/telemetry/feedback_loop.py +567 -0
- attune/telemetry/usage_tracker.py +591 -0
- attune/templates.py +754 -0
- attune/test_generator/__init__.py +38 -0
- attune/test_generator/__main__.py +14 -0
- attune/test_generator/cli.py +234 -0
- attune/test_generator/generator.py +355 -0
- attune/test_generator/risk_analyzer.py +216 -0
- attune/test_generator/templates/unit_test.py.jinja2 +272 -0
- attune/tier_recommender.py +384 -0
- attune/tools.py +183 -0
- attune/trust/__init__.py +28 -0
- attune/trust/circuit_breaker.py +579 -0
- attune/trust_building.py +527 -0
- attune/validation/__init__.py +19 -0
- attune/validation/xml_validator.py +281 -0
- attune/vscode_bridge.py +173 -0
- attune/workflow_commands.py +780 -0
- attune/workflow_patterns/__init__.py +33 -0
- attune/workflow_patterns/behavior.py +249 -0
- attune/workflow_patterns/core.py +76 -0
- attune/workflow_patterns/output.py +99 -0
- attune/workflow_patterns/registry.py +255 -0
- attune/workflow_patterns/structural.py +288 -0
- attune/workflows/__init__.py +539 -0
- attune/workflows/autonomous_test_gen.py +1268 -0
- attune/workflows/base.py +2667 -0
- attune/workflows/batch_processing.py +342 -0
- attune/workflows/bug_predict.py +1084 -0
- attune/workflows/builder.py +273 -0
- attune/workflows/caching.py +253 -0
- attune/workflows/code_review.py +1048 -0
- attune/workflows/code_review_adapters.py +312 -0
- attune/workflows/code_review_pipeline.py +722 -0
- attune/workflows/config.py +645 -0
- attune/workflows/dependency_check.py +644 -0
- attune/workflows/document_gen/__init__.py +25 -0
- attune/workflows/document_gen/config.py +30 -0
- attune/workflows/document_gen/report_formatter.py +162 -0
- attune/workflows/document_gen/workflow.py +1426 -0
- attune/workflows/document_manager.py +216 -0
- attune/workflows/document_manager_README.md +134 -0
- attune/workflows/documentation_orchestrator.py +1205 -0
- attune/workflows/history.py +510 -0
- attune/workflows/keyboard_shortcuts/__init__.py +39 -0
- attune/workflows/keyboard_shortcuts/generators.py +391 -0
- attune/workflows/keyboard_shortcuts/parsers.py +416 -0
- attune/workflows/keyboard_shortcuts/prompts.py +295 -0
- attune/workflows/keyboard_shortcuts/schema.py +193 -0
- attune/workflows/keyboard_shortcuts/workflow.py +509 -0
- attune/workflows/llm_base.py +363 -0
- attune/workflows/manage_docs.py +87 -0
- attune/workflows/manage_docs_README.md +134 -0
- attune/workflows/manage_documentation.py +821 -0
- attune/workflows/new_sample_workflow1.py +149 -0
- attune/workflows/new_sample_workflow1_README.md +150 -0
- attune/workflows/orchestrated_health_check.py +849 -0
- attune/workflows/orchestrated_release_prep.py +600 -0
- attune/workflows/output.py +413 -0
- attune/workflows/perf_audit.py +863 -0
- attune/workflows/pr_review.py +762 -0
- attune/workflows/progress.py +785 -0
- attune/workflows/progress_server.py +322 -0
- attune/workflows/progressive/README 2.md +454 -0
- attune/workflows/progressive/README.md +454 -0
- attune/workflows/progressive/__init__.py +82 -0
- attune/workflows/progressive/cli.py +219 -0
- attune/workflows/progressive/core.py +488 -0
- attune/workflows/progressive/orchestrator.py +723 -0
- attune/workflows/progressive/reports.py +520 -0
- attune/workflows/progressive/telemetry.py +274 -0
- attune/workflows/progressive/test_gen.py +495 -0
- attune/workflows/progressive/workflow.py +589 -0
- attune/workflows/refactor_plan.py +694 -0
- attune/workflows/release_prep.py +895 -0
- attune/workflows/release_prep_crew.py +969 -0
- attune/workflows/research_synthesis.py +404 -0
- attune/workflows/routing.py +168 -0
- attune/workflows/secure_release.py +593 -0
- attune/workflows/security_adapters.py +297 -0
- attune/workflows/security_audit.py +1329 -0
- attune/workflows/security_audit_phase3.py +355 -0
- attune/workflows/seo_optimization.py +633 -0
- attune/workflows/step_config.py +234 -0
- attune/workflows/telemetry_mixin.py +269 -0
- attune/workflows/test5.py +125 -0
- attune/workflows/test5_README.md +158 -0
- attune/workflows/test_coverage_boost_crew.py +849 -0
- attune/workflows/test_gen/__init__.py +52 -0
- attune/workflows/test_gen/ast_analyzer.py +249 -0
- attune/workflows/test_gen/config.py +88 -0
- attune/workflows/test_gen/data_models.py +38 -0
- attune/workflows/test_gen/report_formatter.py +289 -0
- attune/workflows/test_gen/test_templates.py +381 -0
- attune/workflows/test_gen/workflow.py +655 -0
- attune/workflows/test_gen.py +54 -0
- attune/workflows/test_gen_behavioral.py +477 -0
- attune/workflows/test_gen_parallel.py +341 -0
- attune/workflows/test_lifecycle.py +526 -0
- attune/workflows/test_maintenance.py +627 -0
- attune/workflows/test_maintenance_cli.py +590 -0
- attune/workflows/test_maintenance_crew.py +840 -0
- attune/workflows/test_runner.py +622 -0
- attune/workflows/tier_tracking.py +531 -0
- attune/workflows/xml_enhanced_crew.py +285 -0
- attune_ai-2.0.0.dist-info/METADATA +1026 -0
- attune_ai-2.0.0.dist-info/RECORD +457 -0
- attune_ai-2.0.0.dist-info/WHEEL +5 -0
- attune_ai-2.0.0.dist-info/entry_points.txt +26 -0
- attune_ai-2.0.0.dist-info/licenses/LICENSE +201 -0
- attune_ai-2.0.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
- attune_ai-2.0.0.dist-info/top_level.txt +5 -0
- attune_healthcare/__init__.py +13 -0
- attune_healthcare/monitors/__init__.py +9 -0
- attune_healthcare/monitors/clinical_protocol_monitor.py +315 -0
- attune_healthcare/monitors/monitoring/__init__.py +44 -0
- attune_healthcare/monitors/monitoring/protocol_checker.py +300 -0
- attune_healthcare/monitors/monitoring/protocol_loader.py +214 -0
- attune_healthcare/monitors/monitoring/sensor_parsers.py +306 -0
- attune_healthcare/monitors/monitoring/trajectory_analyzer.py +389 -0
- attune_llm/README.md +553 -0
- attune_llm/__init__.py +28 -0
- attune_llm/agent_factory/__init__.py +53 -0
- attune_llm/agent_factory/adapters/__init__.py +85 -0
- attune_llm/agent_factory/adapters/autogen_adapter.py +312 -0
- attune_llm/agent_factory/adapters/crewai_adapter.py +483 -0
- attune_llm/agent_factory/adapters/haystack_adapter.py +298 -0
- attune_llm/agent_factory/adapters/langchain_adapter.py +362 -0
- attune_llm/agent_factory/adapters/langgraph_adapter.py +333 -0
- attune_llm/agent_factory/adapters/native.py +228 -0
- attune_llm/agent_factory/adapters/wizard_adapter.py +423 -0
- attune_llm/agent_factory/base.py +305 -0
- attune_llm/agent_factory/crews/__init__.py +67 -0
- attune_llm/agent_factory/crews/code_review.py +1113 -0
- attune_llm/agent_factory/crews/health_check.py +1262 -0
- attune_llm/agent_factory/crews/refactoring.py +1128 -0
- attune_llm/agent_factory/crews/security_audit.py +1018 -0
- attune_llm/agent_factory/decorators.py +287 -0
- attune_llm/agent_factory/factory.py +558 -0
- attune_llm/agent_factory/framework.py +193 -0
- attune_llm/agent_factory/memory_integration.py +328 -0
- attune_llm/agent_factory/resilient.py +320 -0
- attune_llm/agents_md/__init__.py +22 -0
- attune_llm/agents_md/loader.py +218 -0
- attune_llm/agents_md/parser.py +271 -0
- attune_llm/agents_md/registry.py +307 -0
- attune_llm/claude_memory.py +466 -0
- attune_llm/cli/__init__.py +8 -0
- attune_llm/cli/sync_claude.py +487 -0
- attune_llm/code_health.py +1313 -0
- attune_llm/commands/__init__.py +51 -0
- attune_llm/commands/context.py +375 -0
- attune_llm/commands/loader.py +301 -0
- attune_llm/commands/models.py +231 -0
- attune_llm/commands/parser.py +371 -0
- attune_llm/commands/registry.py +429 -0
- attune_llm/config/__init__.py +29 -0
- attune_llm/config/unified.py +291 -0
- attune_llm/context/__init__.py +22 -0
- attune_llm/context/compaction.py +455 -0
- attune_llm/context/manager.py +434 -0
- attune_llm/contextual_patterns.py +361 -0
- attune_llm/core.py +907 -0
- attune_llm/git_pattern_extractor.py +435 -0
- attune_llm/hooks/__init__.py +24 -0
- attune_llm/hooks/config.py +306 -0
- attune_llm/hooks/executor.py +289 -0
- attune_llm/hooks/registry.py +302 -0
- attune_llm/hooks/scripts/__init__.py +39 -0
- attune_llm/hooks/scripts/evaluate_session.py +201 -0
- attune_llm/hooks/scripts/first_time_init.py +285 -0
- attune_llm/hooks/scripts/pre_compact.py +207 -0
- attune_llm/hooks/scripts/session_end.py +183 -0
- attune_llm/hooks/scripts/session_start.py +163 -0
- attune_llm/hooks/scripts/suggest_compact.py +225 -0
- attune_llm/learning/__init__.py +30 -0
- attune_llm/learning/evaluator.py +438 -0
- attune_llm/learning/extractor.py +514 -0
- attune_llm/learning/storage.py +560 -0
- attune_llm/levels.py +227 -0
- attune_llm/pattern_confidence.py +414 -0
- attune_llm/pattern_resolver.py +272 -0
- attune_llm/pattern_summary.py +350 -0
- attune_llm/providers.py +967 -0
- attune_llm/routing/__init__.py +32 -0
- attune_llm/routing/model_router.py +362 -0
- attune_llm/security/IMPLEMENTATION_SUMMARY.md +413 -0
- attune_llm/security/PHASE2_COMPLETE.md +384 -0
- attune_llm/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
- attune_llm/security/QUICK_REFERENCE.md +316 -0
- attune_llm/security/README.md +262 -0
- attune_llm/security/__init__.py +62 -0
- attune_llm/security/audit_logger.py +929 -0
- attune_llm/security/audit_logger_example.py +152 -0
- attune_llm/security/pii_scrubber.py +640 -0
- attune_llm/security/secrets_detector.py +678 -0
- attune_llm/security/secrets_detector_example.py +304 -0
- attune_llm/security/secure_memdocs.py +1192 -0
- attune_llm/security/secure_memdocs_example.py +278 -0
- attune_llm/session_status.py +745 -0
- attune_llm/state.py +246 -0
- attune_llm/utils/__init__.py +5 -0
- attune_llm/utils/tokens.py +349 -0
- attune_software/SOFTWARE_PLUGIN_README.md +57 -0
- attune_software/__init__.py +13 -0
- attune_software/cli/__init__.py +120 -0
- attune_software/cli/inspect.py +362 -0
- attune_software/cli.py +574 -0
- attune_software/plugin.py +188 -0
- workflow_scaffolding/__init__.py +11 -0
- workflow_scaffolding/__main__.py +12 -0
- workflow_scaffolding/cli.py +206 -0
- workflow_scaffolding/generator.py +265 -0
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
"""Vector Embeddings for Semantic Goal Matching
|
|
2
|
+
|
|
3
|
+
Provides semantic similarity search for finding similar past goals and their
|
|
4
|
+
successful workflow configurations.
|
|
5
|
+
|
|
6
|
+
Supports multiple embedding backends:
|
|
7
|
+
1. Local: Simple TF-IDF based embeddings (no external dependencies)
|
|
8
|
+
2. OpenAI: OpenAI's text-embedding-3-small
|
|
9
|
+
3. Anthropic: Uses Claude for semantic analysis (via message API)
|
|
10
|
+
4. Sentence Transformers: Local neural embeddings (requires torch)
|
|
11
|
+
|
|
12
|
+
Copyright 2026 Smart-AI-Memory
|
|
13
|
+
Licensed under Fair Source License 0.9
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import hashlib
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import math
|
|
22
|
+
import os
|
|
23
|
+
import re
|
|
24
|
+
from abc import ABC, abstractmethod
|
|
25
|
+
from collections.abc import Iterator
|
|
26
|
+
from dataclasses import dataclass, field
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# =============================================================================
|
|
34
|
+
# DATA STRUCTURES
|
|
35
|
+
# =============================================================================
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class EmbeddedGoal:
|
|
40
|
+
"""A goal with its embedding vector and metadata."""
|
|
41
|
+
|
|
42
|
+
goal_id: str
|
|
43
|
+
goal_text: str
|
|
44
|
+
embedding: list[float]
|
|
45
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
46
|
+
domains: list[str] = field(default_factory=list)
|
|
47
|
+
workflow_id: str | None = None
|
|
48
|
+
success_score: float = 0.0
|
|
49
|
+
|
|
50
|
+
def to_dict(self) -> dict[str, Any]:
|
|
51
|
+
"""Convert to dictionary for storage."""
|
|
52
|
+
return {
|
|
53
|
+
"goal_id": self.goal_id,
|
|
54
|
+
"goal_text": self.goal_text,
|
|
55
|
+
"embedding": self.embedding,
|
|
56
|
+
"metadata": self.metadata,
|
|
57
|
+
"domains": self.domains,
|
|
58
|
+
"workflow_id": self.workflow_id,
|
|
59
|
+
"success_score": self.success_score,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_dict(cls, data: dict[str, Any]) -> EmbeddedGoal:
|
|
64
|
+
"""Create from dictionary."""
|
|
65
|
+
return cls(
|
|
66
|
+
goal_id=data["goal_id"],
|
|
67
|
+
goal_text=data["goal_text"],
|
|
68
|
+
embedding=data["embedding"],
|
|
69
|
+
metadata=data.get("metadata", {}),
|
|
70
|
+
domains=data.get("domains", []),
|
|
71
|
+
workflow_id=data.get("workflow_id"),
|
|
72
|
+
success_score=data.get("success_score", 0.0),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class SimilarityResult:
|
|
78
|
+
"""Result of a similarity search."""
|
|
79
|
+
|
|
80
|
+
goal: EmbeddedGoal
|
|
81
|
+
similarity: float
|
|
82
|
+
rank: int
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# =============================================================================
|
|
86
|
+
# EMBEDDING PROVIDERS
|
|
87
|
+
# =============================================================================
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class EmbeddingProvider(ABC):
|
|
91
|
+
"""Abstract base class for embedding providers."""
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
def embed(self, text: str) -> list[float]:
|
|
95
|
+
"""Generate embedding vector for text.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
text: Text to embed
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Embedding vector as list of floats
|
|
102
|
+
"""
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
@abstractmethod
|
|
106
|
+
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
107
|
+
"""Generate embeddings for multiple texts.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
texts: List of texts to embed
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List of embedding vectors
|
|
114
|
+
"""
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
@abstractmethod
|
|
119
|
+
def dimension(self) -> int:
|
|
120
|
+
"""Return the embedding dimension."""
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class TFIDFEmbeddingProvider(EmbeddingProvider):
|
|
125
|
+
"""Simple TF-IDF based embeddings (no external dependencies).
|
|
126
|
+
|
|
127
|
+
Uses term frequency-inverse document frequency to create sparse
|
|
128
|
+
embeddings that are then normalized to fixed dimension.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, dimension: int = 256, vocabulary_size: int = 10000):
|
|
132
|
+
"""Initialize TF-IDF provider.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
dimension: Output embedding dimension
|
|
136
|
+
vocabulary_size: Maximum vocabulary size
|
|
137
|
+
"""
|
|
138
|
+
self._dimension = dimension
|
|
139
|
+
self._vocabulary_size = vocabulary_size
|
|
140
|
+
self._vocabulary: dict[str, int] = {}
|
|
141
|
+
self._idf: dict[str, float] = {}
|
|
142
|
+
self._document_count = 0
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def dimension(self) -> int:
|
|
146
|
+
return self._dimension
|
|
147
|
+
|
|
148
|
+
def _tokenize(self, text: str) -> list[str]:
|
|
149
|
+
"""Tokenize text into words."""
|
|
150
|
+
# Simple tokenization: lowercase, split on non-alphanumeric
|
|
151
|
+
text = text.lower()
|
|
152
|
+
tokens = re.findall(r"\b[a-z][a-z0-9_]*\b", text)
|
|
153
|
+
return tokens
|
|
154
|
+
|
|
155
|
+
def _compute_tf(self, tokens: list[str]) -> dict[str, float]:
|
|
156
|
+
"""Compute term frequency."""
|
|
157
|
+
tf: dict[str, int] = {}
|
|
158
|
+
for token in tokens:
|
|
159
|
+
tf[token] = tf.get(token, 0) + 1
|
|
160
|
+
|
|
161
|
+
# Normalize by document length
|
|
162
|
+
total = len(tokens) or 1
|
|
163
|
+
return {k: v / total for k, v in tf.items()}
|
|
164
|
+
|
|
165
|
+
def _hash_to_bucket(self, term: str) -> int:
|
|
166
|
+
"""Hash term to fixed bucket for dimensionality reduction."""
|
|
167
|
+
h = int(hashlib.md5(term.encode(), usedforsecurity=False).hexdigest(), 16)
|
|
168
|
+
return h % self._dimension
|
|
169
|
+
|
|
170
|
+
def embed(self, text: str) -> list[float]:
|
|
171
|
+
"""Generate TF-IDF based embedding.
|
|
172
|
+
|
|
173
|
+
Uses feature hashing to project sparse TF-IDF vector
|
|
174
|
+
to fixed dimension.
|
|
175
|
+
"""
|
|
176
|
+
tokens = self._tokenize(text)
|
|
177
|
+
tf = self._compute_tf(tokens)
|
|
178
|
+
|
|
179
|
+
# Initialize vector
|
|
180
|
+
vector = [0.0] * self._dimension
|
|
181
|
+
|
|
182
|
+
# Project TF-IDF scores to fixed dimension using feature hashing
|
|
183
|
+
for term, freq in tf.items():
|
|
184
|
+
bucket = self._hash_to_bucket(term)
|
|
185
|
+
# Use sign trick for better distribution
|
|
186
|
+
sign = 1 if int(hashlib.sha256(term.encode()).hexdigest(), 16) % 2 == 0 else -1
|
|
187
|
+
idf = self._idf.get(term, 1.0)
|
|
188
|
+
vector[bucket] += sign * freq * idf
|
|
189
|
+
|
|
190
|
+
# L2 normalize
|
|
191
|
+
norm = math.sqrt(sum(x * x for x in vector)) or 1.0
|
|
192
|
+
return [x / norm for x in vector]
|
|
193
|
+
|
|
194
|
+
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
195
|
+
"""Embed multiple texts."""
|
|
196
|
+
return [self.embed(text) for text in texts]
|
|
197
|
+
|
|
198
|
+
def fit(self, documents: list[str]):
|
|
199
|
+
"""Fit IDF weights on document corpus.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
documents: List of documents to compute IDF from
|
|
203
|
+
"""
|
|
204
|
+
self._document_count = len(documents)
|
|
205
|
+
doc_freq: dict[str, int] = {}
|
|
206
|
+
|
|
207
|
+
for doc in documents:
|
|
208
|
+
tokens = set(self._tokenize(doc))
|
|
209
|
+
for token in tokens:
|
|
210
|
+
doc_freq[token] = doc_freq.get(token, 0) + 1
|
|
211
|
+
|
|
212
|
+
# Compute IDF
|
|
213
|
+
for term, df in doc_freq.items():
|
|
214
|
+
self._idf[term] = math.log((self._document_count + 1) / (df + 1)) + 1
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class AnthropicEmbeddingProvider(EmbeddingProvider):
|
|
218
|
+
"""Use Claude for semantic embeddings via similarity scoring.
|
|
219
|
+
|
|
220
|
+
Note: Anthropic doesn't have a dedicated embedding API, so we use
|
|
221
|
+
Claude to generate semantic feature vectors based on predefined
|
|
222
|
+
aspects relevant to workflow generation.
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
ASPECTS = [
|
|
226
|
+
"code review and quality",
|
|
227
|
+
"security and vulnerability",
|
|
228
|
+
"testing and coverage",
|
|
229
|
+
"documentation and comments",
|
|
230
|
+
"performance and optimization",
|
|
231
|
+
"refactoring and cleanup",
|
|
232
|
+
"deployment and CI/CD",
|
|
233
|
+
"debugging and troubleshooting",
|
|
234
|
+
"architecture and design",
|
|
235
|
+
"data processing and ETL",
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
def __init__(self, api_key: str | None = None, dimension: int = 64):
|
|
239
|
+
"""Initialize Anthropic provider.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
api_key: Anthropic API key
|
|
243
|
+
dimension: Number of semantic aspects to score
|
|
244
|
+
"""
|
|
245
|
+
self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
246
|
+
self._dimension = min(dimension, len(self.ASPECTS))
|
|
247
|
+
self._client = None
|
|
248
|
+
|
|
249
|
+
@property
|
|
250
|
+
def dimension(self) -> int:
|
|
251
|
+
return self._dimension
|
|
252
|
+
|
|
253
|
+
def _get_client(self):
|
|
254
|
+
"""Lazy-load Anthropic client."""
|
|
255
|
+
if self._client is None and self.api_key:
|
|
256
|
+
try:
|
|
257
|
+
import anthropic
|
|
258
|
+
|
|
259
|
+
self._client = anthropic.Anthropic(api_key=self.api_key)
|
|
260
|
+
except ImportError:
|
|
261
|
+
logger.warning("anthropic package not installed")
|
|
262
|
+
return self._client
|
|
263
|
+
|
|
264
|
+
def embed(self, text: str) -> list[float]:
|
|
265
|
+
"""Generate semantic embedding by scoring relevance to aspects."""
|
|
266
|
+
client = self._get_client()
|
|
267
|
+
if not client:
|
|
268
|
+
# Fallback to TF-IDF
|
|
269
|
+
fallback = TFIDFEmbeddingProvider(dimension=self._dimension)
|
|
270
|
+
return fallback.embed(text)
|
|
271
|
+
|
|
272
|
+
aspects = self.ASPECTS[: self._dimension]
|
|
273
|
+
prompt = f"""Rate how relevant this goal is to each aspect on a scale of 0.0 to 1.0.
|
|
274
|
+
|
|
275
|
+
Goal: "{text}"
|
|
276
|
+
|
|
277
|
+
Aspects to rate:
|
|
278
|
+
{chr(10).join(f"{i + 1}. {aspect}" for i, aspect in enumerate(aspects))}
|
|
279
|
+
|
|
280
|
+
Respond with ONLY a JSON array of numbers, one per aspect, in order.
|
|
281
|
+
Example: [0.8, 0.2, 0.5, ...]"""
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
response = client.messages.create(
|
|
285
|
+
model="claude-3-5-haiku-20241022",
|
|
286
|
+
max_tokens=200,
|
|
287
|
+
messages=[{"role": "user", "content": prompt}],
|
|
288
|
+
)
|
|
289
|
+
content = response.content[0].text if response.content else "[]"
|
|
290
|
+
|
|
291
|
+
# Parse JSON array
|
|
292
|
+
scores = json.loads(content.strip())
|
|
293
|
+
if isinstance(scores, list) and len(scores) >= self._dimension:
|
|
294
|
+
return [float(s) for s in scores[: self._dimension]]
|
|
295
|
+
|
|
296
|
+
except Exception as e:
|
|
297
|
+
logger.warning(f"Anthropic embedding failed: {e}")
|
|
298
|
+
|
|
299
|
+
# Fallback
|
|
300
|
+
fallback = TFIDFEmbeddingProvider(dimension=self._dimension)
|
|
301
|
+
return fallback.embed(text)
|
|
302
|
+
|
|
303
|
+
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
304
|
+
"""Embed multiple texts."""
|
|
305
|
+
return [self.embed(text) for text in texts]
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class SentenceTransformerProvider(EmbeddingProvider):
|
|
309
|
+
"""Use sentence-transformers for local neural embeddings.
|
|
310
|
+
|
|
311
|
+
Requires: pip install sentence-transformers
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
315
|
+
"""Initialize sentence transformer.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
model_name: HuggingFace model name
|
|
319
|
+
"""
|
|
320
|
+
self.model_name = model_name
|
|
321
|
+
self._model = None
|
|
322
|
+
self._dimension: int | None = None
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def dimension(self) -> int:
|
|
326
|
+
if self._dimension is None:
|
|
327
|
+
self._load_model()
|
|
328
|
+
return self._dimension or 384
|
|
329
|
+
|
|
330
|
+
def _load_model(self):
|
|
331
|
+
"""Lazy-load the model."""
|
|
332
|
+
if self._model is None:
|
|
333
|
+
try:
|
|
334
|
+
from sentence_transformers import SentenceTransformer
|
|
335
|
+
|
|
336
|
+
self._model = SentenceTransformer(self.model_name)
|
|
337
|
+
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
338
|
+
except ImportError:
|
|
339
|
+
logger.warning("sentence-transformers not installed")
|
|
340
|
+
self._dimension = 384
|
|
341
|
+
|
|
342
|
+
def embed(self, text: str) -> list[float]:
|
|
343
|
+
"""Generate embedding using sentence transformer."""
|
|
344
|
+
self._load_model()
|
|
345
|
+
if self._model is None:
|
|
346
|
+
# Fallback to TF-IDF
|
|
347
|
+
fallback = TFIDFEmbeddingProvider(dimension=384)
|
|
348
|
+
return fallback.embed(text)
|
|
349
|
+
|
|
350
|
+
embedding = self._model.encode(text, convert_to_numpy=True)
|
|
351
|
+
return embedding.tolist()
|
|
352
|
+
|
|
353
|
+
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
354
|
+
"""Embed multiple texts efficiently."""
|
|
355
|
+
self._load_model()
|
|
356
|
+
if self._model is None:
|
|
357
|
+
fallback = TFIDFEmbeddingProvider(dimension=384)
|
|
358
|
+
return [fallback.embed(t) for t in texts]
|
|
359
|
+
|
|
360
|
+
embeddings = self._model.encode(texts, convert_to_numpy=True)
|
|
361
|
+
return [e.tolist() for e in embeddings]
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
# =============================================================================
|
|
365
|
+
# VECTOR STORE
|
|
366
|
+
# =============================================================================
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
class VectorStore:
|
|
370
|
+
"""In-memory vector store with similarity search.
|
|
371
|
+
|
|
372
|
+
Supports persistence to JSON files.
|
|
373
|
+
"""
|
|
374
|
+
|
|
375
|
+
def __init__(
|
|
376
|
+
self,
|
|
377
|
+
provider: EmbeddingProvider | None = None,
|
|
378
|
+
storage_path: Path | str | None = None,
|
|
379
|
+
):
|
|
380
|
+
"""Initialize vector store.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
provider: Embedding provider to use
|
|
384
|
+
storage_path: Path to persist vectors
|
|
385
|
+
"""
|
|
386
|
+
self.provider = provider or TFIDFEmbeddingProvider()
|
|
387
|
+
self.storage_path = Path(storage_path) if storage_path else None
|
|
388
|
+
self._goals: dict[str, EmbeddedGoal] = {}
|
|
389
|
+
|
|
390
|
+
# Load from storage if exists
|
|
391
|
+
if self.storage_path and self.storage_path.exists():
|
|
392
|
+
self._load()
|
|
393
|
+
|
|
394
|
+
def add(
|
|
395
|
+
self,
|
|
396
|
+
goal_text: str,
|
|
397
|
+
goal_id: str | None = None,
|
|
398
|
+
metadata: dict[str, Any] | None = None,
|
|
399
|
+
domains: list[str] | None = None,
|
|
400
|
+
workflow_id: str | None = None,
|
|
401
|
+
success_score: float = 0.0,
|
|
402
|
+
) -> EmbeddedGoal:
|
|
403
|
+
"""Add a goal to the store.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
goal_text: The goal text
|
|
407
|
+
goal_id: Optional ID (generated if not provided)
|
|
408
|
+
metadata: Optional metadata
|
|
409
|
+
domains: Optional domain tags
|
|
410
|
+
workflow_id: Optional linked workflow ID
|
|
411
|
+
success_score: Success score (0.0-1.0)
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
The embedded goal
|
|
415
|
+
"""
|
|
416
|
+
if goal_id is None:
|
|
417
|
+
goal_id = hashlib.sha256(goal_text.encode()).hexdigest()[:12]
|
|
418
|
+
|
|
419
|
+
embedding = self.provider.embed(goal_text)
|
|
420
|
+
|
|
421
|
+
goal = EmbeddedGoal(
|
|
422
|
+
goal_id=goal_id,
|
|
423
|
+
goal_text=goal_text,
|
|
424
|
+
embedding=embedding,
|
|
425
|
+
metadata=metadata or {},
|
|
426
|
+
domains=domains or [],
|
|
427
|
+
workflow_id=workflow_id,
|
|
428
|
+
success_score=success_score,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
self._goals[goal_id] = goal
|
|
432
|
+
|
|
433
|
+
# Auto-save if storage configured
|
|
434
|
+
if self.storage_path:
|
|
435
|
+
self._save()
|
|
436
|
+
|
|
437
|
+
return goal
|
|
438
|
+
|
|
439
|
+
def search(
|
|
440
|
+
self,
|
|
441
|
+
query: str,
|
|
442
|
+
top_k: int = 5,
|
|
443
|
+
min_similarity: float = 0.0,
|
|
444
|
+
domain_filter: str | None = None,
|
|
445
|
+
) -> list[SimilarityResult]:
|
|
446
|
+
"""Search for similar goals.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
query: Query text
|
|
450
|
+
top_k: Number of results to return
|
|
451
|
+
min_similarity: Minimum similarity threshold
|
|
452
|
+
domain_filter: Optional domain to filter by
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
List of similarity results sorted by relevance
|
|
456
|
+
"""
|
|
457
|
+
if not self._goals:
|
|
458
|
+
return []
|
|
459
|
+
|
|
460
|
+
query_embedding = self.provider.embed(query)
|
|
461
|
+
|
|
462
|
+
results: list[tuple[float, EmbeddedGoal]] = []
|
|
463
|
+
|
|
464
|
+
for goal in self._goals.values():
|
|
465
|
+
# Apply domain filter
|
|
466
|
+
if domain_filter and domain_filter not in goal.domains:
|
|
467
|
+
continue
|
|
468
|
+
|
|
469
|
+
similarity = self._cosine_similarity(query_embedding, goal.embedding)
|
|
470
|
+
if similarity >= min_similarity:
|
|
471
|
+
results.append((similarity, goal))
|
|
472
|
+
|
|
473
|
+
# Sort by similarity descending
|
|
474
|
+
results.sort(key=lambda x: x[0], reverse=True)
|
|
475
|
+
|
|
476
|
+
return [
|
|
477
|
+
SimilarityResult(goal=goal, similarity=sim, rank=i + 1)
|
|
478
|
+
for i, (sim, goal) in enumerate(results[:top_k])
|
|
479
|
+
]
|
|
480
|
+
|
|
481
|
+
def search_by_embedding(
|
|
482
|
+
self,
|
|
483
|
+
embedding: list[float],
|
|
484
|
+
top_k: int = 5,
|
|
485
|
+
min_similarity: float = 0.0,
|
|
486
|
+
) -> list[SimilarityResult]:
|
|
487
|
+
"""Search using pre-computed embedding.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
embedding: Pre-computed embedding vector
|
|
491
|
+
top_k: Number of results
|
|
492
|
+
min_similarity: Minimum threshold
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
List of similarity results
|
|
496
|
+
"""
|
|
497
|
+
results: list[tuple[float, EmbeddedGoal]] = []
|
|
498
|
+
|
|
499
|
+
for goal in self._goals.values():
|
|
500
|
+
similarity = self._cosine_similarity(embedding, goal.embedding)
|
|
501
|
+
if similarity >= min_similarity:
|
|
502
|
+
results.append((similarity, goal))
|
|
503
|
+
|
|
504
|
+
results.sort(key=lambda x: x[0], reverse=True)
|
|
505
|
+
|
|
506
|
+
return [
|
|
507
|
+
SimilarityResult(goal=goal, similarity=sim, rank=i + 1)
|
|
508
|
+
for i, (sim, goal) in enumerate(results[:top_k])
|
|
509
|
+
]
|
|
510
|
+
|
|
511
|
+
def get(self, goal_id: str) -> EmbeddedGoal | None:
|
|
512
|
+
"""Get a goal by ID."""
|
|
513
|
+
return self._goals.get(goal_id)
|
|
514
|
+
|
|
515
|
+
def remove(self, goal_id: str) -> bool:
|
|
516
|
+
"""Remove a goal by ID."""
|
|
517
|
+
if goal_id in self._goals:
|
|
518
|
+
del self._goals[goal_id]
|
|
519
|
+
if self.storage_path:
|
|
520
|
+
self._save()
|
|
521
|
+
return True
|
|
522
|
+
return False
|
|
523
|
+
|
|
524
|
+
def update_success_score(self, goal_id: str, score: float):
|
|
525
|
+
"""Update the success score for a goal."""
|
|
526
|
+
if goal_id in self._goals:
|
|
527
|
+
self._goals[goal_id].success_score = score
|
|
528
|
+
if self.storage_path:
|
|
529
|
+
self._save()
|
|
530
|
+
|
|
531
|
+
def __len__(self) -> int:
|
|
532
|
+
return len(self._goals)
|
|
533
|
+
|
|
534
|
+
def __iter__(self) -> Iterator[EmbeddedGoal]:
|
|
535
|
+
return iter(self._goals.values())
|
|
536
|
+
|
|
537
|
+
def _cosine_similarity(self, a: list[float], b: list[float]) -> float:
|
|
538
|
+
"""Compute cosine similarity between two vectors."""
|
|
539
|
+
if len(a) != len(b):
|
|
540
|
+
return 0.0
|
|
541
|
+
|
|
542
|
+
dot = sum(x * y for x, y in zip(a, b, strict=False))
|
|
543
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
544
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
545
|
+
|
|
546
|
+
if norm_a == 0 or norm_b == 0:
|
|
547
|
+
return 0.0
|
|
548
|
+
|
|
549
|
+
return dot / (norm_a * norm_b)
|
|
550
|
+
|
|
551
|
+
def _save(self):
|
|
552
|
+
"""Save to storage."""
|
|
553
|
+
if not self.storage_path:
|
|
554
|
+
return
|
|
555
|
+
|
|
556
|
+
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
|
|
557
|
+
|
|
558
|
+
data = {
|
|
559
|
+
"version": 1,
|
|
560
|
+
"goals": [g.to_dict() for g in self._goals.values()],
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
with self.storage_path.open("w") as f:
|
|
564
|
+
json.dump(data, f, indent=2)
|
|
565
|
+
|
|
566
|
+
def _load(self):
|
|
567
|
+
"""Load from storage."""
|
|
568
|
+
if not self.storage_path or not self.storage_path.exists():
|
|
569
|
+
return
|
|
570
|
+
|
|
571
|
+
try:
|
|
572
|
+
with self.storage_path.open("r") as f:
|
|
573
|
+
data = json.load(f)
|
|
574
|
+
|
|
575
|
+
for goal_data in data.get("goals", []):
|
|
576
|
+
goal = EmbeddedGoal.from_dict(goal_data)
|
|
577
|
+
self._goals[goal.goal_id] = goal
|
|
578
|
+
|
|
579
|
+
except Exception as e:
|
|
580
|
+
logger.warning(f"Failed to load vector store: {e}")
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
# =============================================================================
|
|
584
|
+
# SEMANTIC GOAL MATCHER
|
|
585
|
+
# =============================================================================
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
class SemanticGoalMatcher:
|
|
589
|
+
"""High-level API for semantic goal matching.
|
|
590
|
+
|
|
591
|
+
Integrates with the Socratic workflow builder to find similar
|
|
592
|
+
past goals and their successful workflow configurations.
|
|
593
|
+
"""
|
|
594
|
+
|
|
595
|
+
def __init__(
|
|
596
|
+
self,
|
|
597
|
+
provider: str = "tfidf",
|
|
598
|
+
storage_path: Path | str | None = None,
|
|
599
|
+
api_key: str | None = None,
|
|
600
|
+
):
|
|
601
|
+
"""Initialize the matcher.
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
provider: Embedding provider ("tfidf", "anthropic", "sentence-transformer")
|
|
605
|
+
storage_path: Path to persist vectors
|
|
606
|
+
api_key: API key for cloud providers
|
|
607
|
+
"""
|
|
608
|
+
# Default storage path
|
|
609
|
+
if storage_path is None:
|
|
610
|
+
storage_path = Path.home() / ".empathy" / "socratic" / "embeddings.json"
|
|
611
|
+
|
|
612
|
+
# Create embedding provider
|
|
613
|
+
if provider == "anthropic":
|
|
614
|
+
embedding_provider = AnthropicEmbeddingProvider(api_key=api_key)
|
|
615
|
+
elif provider == "sentence-transformer":
|
|
616
|
+
embedding_provider = SentenceTransformerProvider()
|
|
617
|
+
else:
|
|
618
|
+
embedding_provider = TFIDFEmbeddingProvider()
|
|
619
|
+
|
|
620
|
+
self.store = VectorStore(
|
|
621
|
+
provider=embedding_provider,
|
|
622
|
+
storage_path=storage_path,
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
def index_goal(
|
|
626
|
+
self,
|
|
627
|
+
goal_text: str,
|
|
628
|
+
workflow_id: str | None = None,
|
|
629
|
+
domains: list[str] | None = None,
|
|
630
|
+
success_score: float = 0.0,
|
|
631
|
+
metadata: dict[str, Any] | None = None,
|
|
632
|
+
) -> str:
|
|
633
|
+
"""Index a goal for future similarity search.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
goal_text: The goal text
|
|
637
|
+
workflow_id: ID of the generated workflow
|
|
638
|
+
domains: Detected domains
|
|
639
|
+
success_score: Success score from execution
|
|
640
|
+
metadata: Additional metadata
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
Goal ID
|
|
644
|
+
"""
|
|
645
|
+
goal = self.store.add(
|
|
646
|
+
goal_text=goal_text,
|
|
647
|
+
domains=domains,
|
|
648
|
+
workflow_id=workflow_id,
|
|
649
|
+
success_score=success_score,
|
|
650
|
+
metadata=metadata or {},
|
|
651
|
+
)
|
|
652
|
+
return goal.goal_id
|
|
653
|
+
|
|
654
|
+
def find_similar(
|
|
655
|
+
self,
|
|
656
|
+
goal_text: str,
|
|
657
|
+
top_k: int = 5,
|
|
658
|
+
min_similarity: float = 0.3,
|
|
659
|
+
min_success_score: float = 0.0,
|
|
660
|
+
) -> list[dict[str, Any]]:
|
|
661
|
+
"""Find similar past goals.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
goal_text: The goal to search for
|
|
665
|
+
top_k: Number of results
|
|
666
|
+
min_similarity: Minimum similarity threshold
|
|
667
|
+
min_success_score: Minimum success score filter
|
|
668
|
+
|
|
669
|
+
Returns:
|
|
670
|
+
List of similar goals with their workflows
|
|
671
|
+
"""
|
|
672
|
+
results = self.store.search(
|
|
673
|
+
query=goal_text,
|
|
674
|
+
top_k=top_k * 2, # Get more to filter
|
|
675
|
+
min_similarity=min_similarity,
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
# Filter by success score and format results
|
|
679
|
+
formatted = []
|
|
680
|
+
for result in results:
|
|
681
|
+
if result.goal.success_score >= min_success_score:
|
|
682
|
+
formatted.append(
|
|
683
|
+
{
|
|
684
|
+
"goal_id": result.goal.goal_id,
|
|
685
|
+
"goal_text": result.goal.goal_text,
|
|
686
|
+
"similarity": round(result.similarity, 3),
|
|
687
|
+
"workflow_id": result.goal.workflow_id,
|
|
688
|
+
"domains": result.goal.domains,
|
|
689
|
+
"success_score": result.goal.success_score,
|
|
690
|
+
"metadata": result.goal.metadata,
|
|
691
|
+
}
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
if len(formatted) >= top_k:
|
|
695
|
+
break
|
|
696
|
+
|
|
697
|
+
return formatted
|
|
698
|
+
|
|
699
|
+
def suggest_workflow(
|
|
700
|
+
self,
|
|
701
|
+
goal_text: str,
|
|
702
|
+
min_similarity: float = 0.5,
|
|
703
|
+
min_success_score: float = 0.7,
|
|
704
|
+
) -> dict[str, Any] | None:
|
|
705
|
+
"""Suggest a workflow based on similar successful goals.
|
|
706
|
+
|
|
707
|
+
Args:
|
|
708
|
+
goal_text: The goal to find workflow for
|
|
709
|
+
min_similarity: Minimum similarity required
|
|
710
|
+
min_success_score: Minimum success score required
|
|
711
|
+
|
|
712
|
+
Returns:
|
|
713
|
+
Best matching workflow suggestion or None
|
|
714
|
+
"""
|
|
715
|
+
similar = self.find_similar(
|
|
716
|
+
goal_text=goal_text,
|
|
717
|
+
top_k=1,
|
|
718
|
+
min_similarity=min_similarity,
|
|
719
|
+
min_success_score=min_success_score,
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
if similar:
|
|
723
|
+
return similar[0]
|
|
724
|
+
return None
|
|
725
|
+
|
|
726
|
+
def update_success(self, goal_id: str, success_score: float):
|
|
727
|
+
"""Update success score after workflow execution.
|
|
728
|
+
|
|
729
|
+
Args:
|
|
730
|
+
goal_id: Goal ID to update
|
|
731
|
+
success_score: New success score (0.0-1.0)
|
|
732
|
+
"""
|
|
733
|
+
self.store.update_success_score(goal_id, success_score)
|
|
734
|
+
|
|
735
|
+
@property
|
|
736
|
+
def indexed_count(self) -> int:
|
|
737
|
+
"""Number of indexed goals."""
|
|
738
|
+
return len(self.store)
|