agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
caas/storage/store.py
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Storage module for managing documents and context.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import copy
|
|
8
|
+
import json
|
|
9
|
+
from typing import Dict, Optional, List, Tuple, Any
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
from caas.models import Document, DocumentType, ContentTier, SourceCitation
|
|
14
|
+
from caas.decay import calculate_decay_factor
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DocumentStore:
|
|
18
|
+
"""In-memory document store with optional persistence."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, storage_path: Optional[str] = None):
|
|
21
|
+
"""
|
|
22
|
+
Initialize document store.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
storage_path: Optional path for persistent storage
|
|
26
|
+
"""
|
|
27
|
+
self.documents: Dict[str, Document] = {}
|
|
28
|
+
self.storage_path = Path(storage_path) if storage_path else None
|
|
29
|
+
|
|
30
|
+
if self.storage_path and self.storage_path.exists():
|
|
31
|
+
self._load_from_disk()
|
|
32
|
+
|
|
33
|
+
def add(self, document: Document) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Add a document to the store.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
document: The document to add
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
The document ID
|
|
42
|
+
"""
|
|
43
|
+
self.documents[document.id] = document
|
|
44
|
+
|
|
45
|
+
if self.storage_path:
|
|
46
|
+
self._save_to_disk()
|
|
47
|
+
|
|
48
|
+
return document.id
|
|
49
|
+
|
|
50
|
+
def get(self, document_id: str) -> Optional[Document]:
|
|
51
|
+
"""
|
|
52
|
+
Retrieve a document by ID.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
document_id: The document ID
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
The document if found, None otherwise
|
|
59
|
+
"""
|
|
60
|
+
return self.documents.get(document_id)
|
|
61
|
+
|
|
62
|
+
def list_all(self) -> List[Document]:
|
|
63
|
+
"""
|
|
64
|
+
List all documents in the store.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
List of all documents
|
|
68
|
+
"""
|
|
69
|
+
return list(self.documents.values())
|
|
70
|
+
|
|
71
|
+
def list_by_type(self, doc_type: DocumentType) -> List[Document]:
|
|
72
|
+
"""
|
|
73
|
+
List documents of a specific type.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
doc_type: The document type to filter by
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
List of matching documents
|
|
80
|
+
"""
|
|
81
|
+
return [
|
|
82
|
+
doc for doc in self.documents.values()
|
|
83
|
+
if doc.detected_type == doc_type
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
def delete(self, document_id: str) -> bool:
|
|
87
|
+
"""
|
|
88
|
+
Delete a document from the store.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
document_id: The document ID
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
True if deleted, False if not found
|
|
95
|
+
"""
|
|
96
|
+
if document_id in self.documents:
|
|
97
|
+
del self.documents[document_id]
|
|
98
|
+
|
|
99
|
+
if self.storage_path:
|
|
100
|
+
self._save_to_disk()
|
|
101
|
+
|
|
102
|
+
return True
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
def search(
|
|
106
|
+
self,
|
|
107
|
+
query: str,
|
|
108
|
+
enable_time_decay: bool = True,
|
|
109
|
+
decay_rate: float = 1.0
|
|
110
|
+
) -> List[Document]:
|
|
111
|
+
"""
|
|
112
|
+
Search documents by content or metadata with optional time-based decay ranking.
|
|
113
|
+
|
|
114
|
+
When time decay is enabled:
|
|
115
|
+
- Recent documents are ranked higher than old documents
|
|
116
|
+
- Formula: relevance_score = match_score * decay_factor
|
|
117
|
+
- A document from Yesterday with 80% match beats Last Year with 95% match
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
query: The search query
|
|
121
|
+
enable_time_decay: Whether to apply time-based decay to ranking (default: True)
|
|
122
|
+
decay_rate: Rate of decay (default: 1.0)
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
List of matching documents, sorted by time-weighted relevance
|
|
126
|
+
"""
|
|
127
|
+
query_lower = query.lower()
|
|
128
|
+
query_words = query_lower.split()
|
|
129
|
+
results = []
|
|
130
|
+
|
|
131
|
+
for doc in self.documents.values():
|
|
132
|
+
# Calculate base match score
|
|
133
|
+
match_score = 0.0
|
|
134
|
+
|
|
135
|
+
# Search in content, title, and section titles
|
|
136
|
+
# Check for full phrase match (best)
|
|
137
|
+
if query_lower in doc.title.lower():
|
|
138
|
+
match_score += 1.0 # Title match is most relevant
|
|
139
|
+
|
|
140
|
+
if query_lower in doc.content.lower():
|
|
141
|
+
# Count occurrences for better relevance
|
|
142
|
+
occurrences = doc.content.lower().count(query_lower)
|
|
143
|
+
match_score += min(occurrences * 0.1, 0.5) # Cap at 0.5
|
|
144
|
+
|
|
145
|
+
# Check for individual word matches
|
|
146
|
+
title_lower = doc.title.lower()
|
|
147
|
+
content_lower = doc.content.lower()
|
|
148
|
+
for word in query_words:
|
|
149
|
+
if len(word) > 2: # Skip very short words
|
|
150
|
+
if word in title_lower:
|
|
151
|
+
match_score += 0.3
|
|
152
|
+
if word in content_lower:
|
|
153
|
+
occurrences = content_lower.count(word)
|
|
154
|
+
match_score += min(occurrences * 0.05, 0.2)
|
|
155
|
+
|
|
156
|
+
# Check section titles and content
|
|
157
|
+
for section in doc.sections:
|
|
158
|
+
section_title_lower = section.title.lower()
|
|
159
|
+
section_content_lower = section.content.lower()
|
|
160
|
+
|
|
161
|
+
if query_lower in section_title_lower:
|
|
162
|
+
match_score += 0.4
|
|
163
|
+
|
|
164
|
+
for word in query_words:
|
|
165
|
+
if len(word) > 2:
|
|
166
|
+
if word in section_title_lower:
|
|
167
|
+
match_score += 0.2
|
|
168
|
+
if word in section_content_lower:
|
|
169
|
+
occurrences = section_content_lower.count(word)
|
|
170
|
+
match_score += min(occurrences * 0.02, 0.1)
|
|
171
|
+
|
|
172
|
+
# Only include documents with matches
|
|
173
|
+
if match_score > 0:
|
|
174
|
+
# Apply time decay if enabled
|
|
175
|
+
decay_factor = 1.0 # Default to no decay
|
|
176
|
+
if enable_time_decay:
|
|
177
|
+
decay_factor = calculate_decay_factor(
|
|
178
|
+
doc.ingestion_timestamp,
|
|
179
|
+
reference_time=None,
|
|
180
|
+
decay_rate=decay_rate
|
|
181
|
+
)
|
|
182
|
+
final_score = match_score * decay_factor
|
|
183
|
+
else:
|
|
184
|
+
final_score = match_score
|
|
185
|
+
|
|
186
|
+
# Store score for sorting
|
|
187
|
+
doc.metadata['_search_score'] = final_score
|
|
188
|
+
doc.metadata['_decay_factor'] = decay_factor
|
|
189
|
+
results.append(doc)
|
|
190
|
+
|
|
191
|
+
# Sort by final score (highest first)
|
|
192
|
+
results.sort(key=lambda d: d.metadata.get('_search_score', 0), reverse=True)
|
|
193
|
+
|
|
194
|
+
return results
|
|
195
|
+
|
|
196
|
+
def _save_to_disk(self):
|
|
197
|
+
"""Save documents to disk."""
|
|
198
|
+
if not self.storage_path:
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
|
|
202
|
+
|
|
203
|
+
# Convert documents to dict for JSON serialization
|
|
204
|
+
data = {
|
|
205
|
+
doc_id: doc.model_dump()
|
|
206
|
+
for doc_id, doc in self.documents.items()
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
with open(self.storage_path, 'w') as f:
|
|
210
|
+
json.dump(data, f, indent=2)
|
|
211
|
+
|
|
212
|
+
def _load_from_disk(self):
|
|
213
|
+
"""Load documents from disk."""
|
|
214
|
+
if not self.storage_path or not self.storage_path.exists():
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
with open(self.storage_path, 'r') as f:
|
|
218
|
+
data = json.load(f)
|
|
219
|
+
|
|
220
|
+
# Convert dict back to Document objects
|
|
221
|
+
for doc_id, doc_data in data.items():
|
|
222
|
+
self.documents[doc_id] = Document(**doc_data)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class ContextExtractor:
|
|
226
|
+
"""Extracts relevant context from documents based on weights and time-based decay."""
|
|
227
|
+
|
|
228
|
+
def __init__(
|
|
229
|
+
self,
|
|
230
|
+
store: DocumentStore,
|
|
231
|
+
enrich_metadata: bool = True,
|
|
232
|
+
enable_time_decay: bool = True,
|
|
233
|
+
decay_rate: float = 1.0,
|
|
234
|
+
enable_citations: bool = True,
|
|
235
|
+
detect_conflicts: bool = True
|
|
236
|
+
):
|
|
237
|
+
"""
|
|
238
|
+
Initialize context extractor.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
store: The document store to use
|
|
242
|
+
enrich_metadata: Whether to enrich chunks with metadata (default: True)
|
|
243
|
+
enable_time_decay: Whether to apply time-based decay to relevance (default: True)
|
|
244
|
+
decay_rate: Rate of time decay (default: 1.0). Higher = faster decay.
|
|
245
|
+
enable_citations: Whether to include source citations (default: True)
|
|
246
|
+
detect_conflicts: Whether to detect conflicts between sources (default: True)
|
|
247
|
+
"""
|
|
248
|
+
self.store = store
|
|
249
|
+
self.enrich_metadata = enrich_metadata
|
|
250
|
+
self.enricher = None # enrichment module removed in Public Preview
|
|
251
|
+
self.enable_time_decay = enable_time_decay
|
|
252
|
+
self.decay_rate = decay_rate
|
|
253
|
+
self.enable_citations = False # citations disabled
|
|
254
|
+
self.detect_conflicts = False
|
|
255
|
+
|
|
256
|
+
def _format_section(self, section: 'Section', document: Document) -> str:
|
|
257
|
+
"""
|
|
258
|
+
Format a section for output.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
section: Section to format
|
|
262
|
+
document: Parent document for metadata
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Formatted section string
|
|
266
|
+
"""
|
|
267
|
+
content = section.content
|
|
268
|
+
return f"\n## {section.title}\n{content}\n"
|
|
269
|
+
|
|
270
|
+
def extract_context(
|
|
271
|
+
self,
|
|
272
|
+
document_id: str,
|
|
273
|
+
query: str = "",
|
|
274
|
+
max_tokens: int = 2000
|
|
275
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
276
|
+
"""
|
|
277
|
+
Extract context from a document with structure-aware boosting and time-based decay.
|
|
278
|
+
|
|
279
|
+
Now with Context scoring support:
|
|
280
|
+
- Includes source citations for transparency
|
|
281
|
+
- Detects conflicts between official and practical sources
|
|
282
|
+
- Presents both official and real-world information
|
|
283
|
+
|
|
284
|
+
Prioritizes:
|
|
285
|
+
1. Tier 1 (High Value) content over Tier 2 and Tier 3
|
|
286
|
+
2. Recent documents over old documents (when time decay is enabled)
|
|
287
|
+
|
|
288
|
+
Formula: Final Score = Base Weight * Decay Factor
|
|
289
|
+
Where Decay Factor = 1 / (1 + days_elapsed)
|
|
290
|
+
|
|
291
|
+
Result: A document from Yesterday with 80% match beats a document
|
|
292
|
+
from Last Year with 95% match.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
document_id: The document ID
|
|
296
|
+
query: Optional query to focus context extraction
|
|
297
|
+
max_tokens: Maximum tokens to return
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Tuple of (context_string, metadata)
|
|
301
|
+
"""
|
|
302
|
+
document = self.store.get(document_id)
|
|
303
|
+
if not document:
|
|
304
|
+
return "", {"error": "Document not found"}
|
|
305
|
+
|
|
306
|
+
# Calculate decay factor for the document if time decay is enabled
|
|
307
|
+
decay_factor = 1.0
|
|
308
|
+
if self.enable_time_decay:
|
|
309
|
+
decay_factor = calculate_decay_factor(
|
|
310
|
+
document.ingestion_timestamp,
|
|
311
|
+
reference_time=None, # Use current time
|
|
312
|
+
decay_rate=self.decay_rate
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Create a list of sections with adjusted weights (don't mutate original)
|
|
316
|
+
# This is the key: old documents get their weights reduced
|
|
317
|
+
adjusted_sections = []
|
|
318
|
+
for section in document.sections:
|
|
319
|
+
# Create a shallow copy of the section and adjust weight
|
|
320
|
+
adjusted_section = copy.copy(section)
|
|
321
|
+
adjusted_section.weight = section.weight * decay_factor
|
|
322
|
+
adjusted_sections.append(adjusted_section)
|
|
323
|
+
|
|
324
|
+
# Sort sections by weight (highest first)
|
|
325
|
+
# Now sections from recent documents will rank higher
|
|
326
|
+
sorted_sections = sorted(
|
|
327
|
+
adjusted_sections,
|
|
328
|
+
key=lambda s: s.weight,
|
|
329
|
+
reverse=True
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# If query provided, boost sections matching the query
|
|
333
|
+
if query:
|
|
334
|
+
query_lower = query.lower()
|
|
335
|
+
for section in sorted_sections:
|
|
336
|
+
if query_lower in section.content.lower():
|
|
337
|
+
# Query boost: 50% increase
|
|
338
|
+
section.weight *= 1.5
|
|
339
|
+
|
|
340
|
+
# Re-sort after query boosting
|
|
341
|
+
sorted_sections.sort(key=lambda s: s.weight, reverse=True)
|
|
342
|
+
|
|
343
|
+
# Build context string within token limit
|
|
344
|
+
context_parts = []
|
|
345
|
+
sections_used = []
|
|
346
|
+
total_chars = 0
|
|
347
|
+
char_limit = max_tokens * 4 # Approximate: 4 chars per token
|
|
348
|
+
|
|
349
|
+
for section in sorted_sections:
|
|
350
|
+
# Format section
|
|
351
|
+
section_text = self._format_section(section, document)
|
|
352
|
+
|
|
353
|
+
if total_chars + len(section_text) > char_limit:
|
|
354
|
+
# Add partial section if there's room
|
|
355
|
+
remaining = char_limit - total_chars
|
|
356
|
+
if remaining > 100:
|
|
357
|
+
section_text = section_text[:remaining] + "..."
|
|
358
|
+
context_parts.append(section_text)
|
|
359
|
+
sections_used.append(section.title)
|
|
360
|
+
break
|
|
361
|
+
|
|
362
|
+
context_parts.append(section_text)
|
|
363
|
+
sections_used.append(section.title)
|
|
364
|
+
total_chars += len(section_text)
|
|
365
|
+
|
|
366
|
+
context = "".join(context_parts)
|
|
367
|
+
|
|
368
|
+
metadata = {
|
|
369
|
+
"document_id": document_id,
|
|
370
|
+
"document_type": document.detected_type,
|
|
371
|
+
"sections_used": sections_used,
|
|
372
|
+
"weights_applied": {s.title: s.weight for s in sorted_sections},
|
|
373
|
+
"tiers_applied": {
|
|
374
|
+
s.title: s.tier.value if s.tier else "unknown"
|
|
375
|
+
for s in sorted_sections
|
|
376
|
+
},
|
|
377
|
+
"total_sections": len(document.sections),
|
|
378
|
+
"sections_included": len(sections_used),
|
|
379
|
+
"metadata_enriched": self.enrich_metadata,
|
|
380
|
+
"time_decay_enabled": self.enable_time_decay,
|
|
381
|
+
"decay_factor": decay_factor,
|
|
382
|
+
"ingestion_timestamp": document.ingestion_timestamp,
|
|
383
|
+
"citations": [],
|
|
384
|
+
"conflicts": [],
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
return context, metadata
|
|
388
|
+
|
|
389
|
+
|
caas/triad.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Public Preview — basic context/memory management
|
|
4
|
+
"""
|
|
5
|
+
Tiered Context Manager — single-tier context storage.
|
|
6
|
+
|
|
7
|
+
All items are stored in one flat list regardless of the layer label
|
|
8
|
+
passed through the public API. Hot/Warm/Cold labels are preserved on
|
|
9
|
+
each item for compatibility but do not affect retrieval behaviour.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Dict, List, Optional, Any
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
import uuid
|
|
15
|
+
|
|
16
|
+
from caas.models import ContextLayer, ContextTriadItem, ContextTriadState
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ContextTriadManager:
|
|
20
|
+
"""
|
|
21
|
+
Manages context items in a single flat list.
|
|
22
|
+
|
|
23
|
+
The original Hot/Warm/Cold method signatures are preserved so that
|
|
24
|
+
callers continue to work without changes.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
"""Initialize the Tiered Context Manager."""
|
|
29
|
+
self.state = ContextTriadState()
|
|
30
|
+
|
|
31
|
+
# -- internal helper ---------------------------------------------------
|
|
32
|
+
|
|
33
|
+
def _add_item(
|
|
34
|
+
self,
|
|
35
|
+
layer: ContextLayer,
|
|
36
|
+
content: str,
|
|
37
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
38
|
+
priority: float = 1.0,
|
|
39
|
+
) -> str:
|
|
40
|
+
item_id = str(uuid.uuid4())
|
|
41
|
+
item = ContextTriadItem(
|
|
42
|
+
id=item_id,
|
|
43
|
+
layer=layer,
|
|
44
|
+
content=content,
|
|
45
|
+
metadata=metadata or {},
|
|
46
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
47
|
+
priority=priority,
|
|
48
|
+
)
|
|
49
|
+
# Single backing list — use hot_context for all items
|
|
50
|
+
self.state.hot_context.append(item)
|
|
51
|
+
return item_id
|
|
52
|
+
|
|
53
|
+
# -- public add methods (signatures unchanged) -------------------------
|
|
54
|
+
|
|
55
|
+
def add_hot_context(
|
|
56
|
+
self,
|
|
57
|
+
content: str,
|
|
58
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
59
|
+
priority: float = 1.0,
|
|
60
|
+
) -> str:
|
|
61
|
+
"""Add context labelled as *hot*."""
|
|
62
|
+
return self._add_item(ContextLayer.HOT, content, metadata, priority)
|
|
63
|
+
|
|
64
|
+
def add_warm_context(
|
|
65
|
+
self,
|
|
66
|
+
content: str,
|
|
67
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
68
|
+
priority: float = 1.0,
|
|
69
|
+
) -> str:
|
|
70
|
+
"""Add context labelled as *warm*."""
|
|
71
|
+
return self._add_item(ContextLayer.WARM, content, metadata, priority)
|
|
72
|
+
|
|
73
|
+
def add_cold_context(
|
|
74
|
+
self,
|
|
75
|
+
content: str,
|
|
76
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
77
|
+
priority: float = 1.0,
|
|
78
|
+
) -> str:
|
|
79
|
+
"""Add context labelled as *cold*."""
|
|
80
|
+
return self._add_item(ContextLayer.COLD, content, metadata, priority)
|
|
81
|
+
|
|
82
|
+
# -- retrieval helpers -------------------------------------------------
|
|
83
|
+
|
|
84
|
+
def _format_items(
|
|
85
|
+
self,
|
|
86
|
+
items: List[ContextTriadItem],
|
|
87
|
+
header: str,
|
|
88
|
+
max_tokens: int = 1000,
|
|
89
|
+
include_metadata: bool = False,
|
|
90
|
+
) -> str:
|
|
91
|
+
if not items:
|
|
92
|
+
return ""
|
|
93
|
+
sorted_items = sorted(items, key=lambda x: (-x.priority, x.timestamp or ""))
|
|
94
|
+
parts: List[str] = [f"# {header}\n"]
|
|
95
|
+
total = len(parts[0])
|
|
96
|
+
limit = max_tokens * 4
|
|
97
|
+
for item in sorted_items:
|
|
98
|
+
if include_metadata:
|
|
99
|
+
text = f"\n## {item.metadata.get('source', item.metadata.get('category', 'Unknown'))}\n{item.content}\n"
|
|
100
|
+
else:
|
|
101
|
+
text = f"\n{item.content}\n"
|
|
102
|
+
if total + len(text) > limit:
|
|
103
|
+
break
|
|
104
|
+
parts.append(text)
|
|
105
|
+
total += len(text)
|
|
106
|
+
return "".join(parts)
|
|
107
|
+
|
|
108
|
+
def get_hot_context(self, max_tokens: int = 1000, include_metadata: bool = False) -> str:
|
|
109
|
+
"""Return items labelled *hot*."""
|
|
110
|
+
items = [i for i in self.state.hot_context if i.layer == ContextLayer.HOT]
|
|
111
|
+
return self._format_items(items, "Hot Context (Current Situation)", max_tokens, include_metadata)
|
|
112
|
+
|
|
113
|
+
def get_warm_context(self, max_tokens: int = 500, include_metadata: bool = False) -> str:
|
|
114
|
+
"""Return items labelled *warm*."""
|
|
115
|
+
items = [i for i in self.state.hot_context if i.layer == ContextLayer.WARM]
|
|
116
|
+
return self._format_items(items, "Warm Context (User Persona)", max_tokens, include_metadata)
|
|
117
|
+
|
|
118
|
+
def get_cold_context(
|
|
119
|
+
self,
|
|
120
|
+
query: Optional[str] = None,
|
|
121
|
+
max_tokens: int = 1000,
|
|
122
|
+
include_metadata: bool = False,
|
|
123
|
+
) -> str:
|
|
124
|
+
"""Return items labelled *cold*, optionally filtered by *query*."""
|
|
125
|
+
items = [i for i in self.state.hot_context if i.layer == ContextLayer.COLD]
|
|
126
|
+
if query:
|
|
127
|
+
q = query.lower()
|
|
128
|
+
items = [i for i in items if q in i.content.lower() or q in str(i.metadata).lower()]
|
|
129
|
+
return self._format_items(items, "Cold Context (Historical Archive)", max_tokens, include_metadata)
|
|
130
|
+
|
|
131
|
+
def get_full_context(
|
|
132
|
+
self,
|
|
133
|
+
include_hot: bool = True,
|
|
134
|
+
include_warm: bool = True,
|
|
135
|
+
include_cold: bool = False,
|
|
136
|
+
cold_query: Optional[str] = None,
|
|
137
|
+
max_tokens_per_layer: Optional[Dict[str, int]] = None,
|
|
138
|
+
include_metadata: bool = False,
|
|
139
|
+
) -> Dict[str, Any]:
|
|
140
|
+
"""Return context from requested layers."""
|
|
141
|
+
if max_tokens_per_layer is None:
|
|
142
|
+
max_tokens_per_layer = {"hot": 1000, "warm": 500, "cold": 1000}
|
|
143
|
+
|
|
144
|
+
all_items = self.state.hot_context
|
|
145
|
+
result: Dict[str, Any] = {
|
|
146
|
+
"hot_context": "",
|
|
147
|
+
"warm_context": "",
|
|
148
|
+
"cold_context": "",
|
|
149
|
+
"layers_included": [],
|
|
150
|
+
"total_tokens": 0,
|
|
151
|
+
"metadata": {
|
|
152
|
+
"hot_items_count": sum(1 for i in all_items if i.layer == ContextLayer.HOT),
|
|
153
|
+
"warm_items_count": sum(1 for i in all_items if i.layer == ContextLayer.WARM),
|
|
154
|
+
"cold_items_count": sum(1 for i in all_items if i.layer == ContextLayer.COLD),
|
|
155
|
+
},
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if include_hot:
|
|
159
|
+
ctx = self.get_hot_context(max_tokens_per_layer.get("hot", 1000), include_metadata)
|
|
160
|
+
if ctx:
|
|
161
|
+
result["hot_context"] = ctx
|
|
162
|
+
result["layers_included"].append("hot")
|
|
163
|
+
result["total_tokens"] += len(ctx) // 4
|
|
164
|
+
|
|
165
|
+
if include_warm:
|
|
166
|
+
ctx = self.get_warm_context(max_tokens_per_layer.get("warm", 500), include_metadata)
|
|
167
|
+
if ctx:
|
|
168
|
+
result["warm_context"] = ctx
|
|
169
|
+
result["layers_included"].append("warm")
|
|
170
|
+
result["total_tokens"] += len(ctx) // 4
|
|
171
|
+
|
|
172
|
+
if include_cold and cold_query:
|
|
173
|
+
ctx = self.get_cold_context(cold_query, max_tokens_per_layer.get("cold", 1000), include_metadata)
|
|
174
|
+
if ctx:
|
|
175
|
+
result["cold_context"] = ctx
|
|
176
|
+
result["layers_included"].append("cold")
|
|
177
|
+
result["total_tokens"] += len(ctx) // 4
|
|
178
|
+
|
|
179
|
+
return result
|
|
180
|
+
|
|
181
|
+
# -- mutation helpers --------------------------------------------------
|
|
182
|
+
|
|
183
|
+
def clear_hot_context(self):
|
|
184
|
+
"""Remove items labelled *hot*."""
|
|
185
|
+
self.state.hot_context = [i for i in self.state.hot_context if i.layer != ContextLayer.HOT]
|
|
186
|
+
|
|
187
|
+
def clear_warm_context(self):
|
|
188
|
+
"""Remove items labelled *warm*."""
|
|
189
|
+
self.state.hot_context = [i for i in self.state.hot_context if i.layer != ContextLayer.WARM]
|
|
190
|
+
|
|
191
|
+
def clear_cold_context(self):
|
|
192
|
+
"""Remove items labelled *cold*."""
|
|
193
|
+
self.state.hot_context = [i for i in self.state.hot_context if i.layer != ContextLayer.COLD]
|
|
194
|
+
|
|
195
|
+
def clear_all(self):
|
|
196
|
+
"""Clear all context items."""
|
|
197
|
+
self.state = ContextTriadState()
|
|
198
|
+
|
|
199
|
+
def remove_item(self, item_id: str, layer: Optional[ContextLayer] = None) -> bool:
|
|
200
|
+
"""Remove a specific item by ID."""
|
|
201
|
+
for i, item in enumerate(self.state.hot_context):
|
|
202
|
+
if item.id == item_id and (layer is None or item.layer == layer):
|
|
203
|
+
del self.state.hot_context[i]
|
|
204
|
+
return True
|
|
205
|
+
return False
|
|
206
|
+
|
|
207
|
+
def get_state(self) -> ContextTriadState:
|
|
208
|
+
"""Return the current state."""
|
|
209
|
+
return self.state
|
|
210
|
+
|
|
211
|
+
def set_state(self, state: ContextTriadState):
|
|
212
|
+
"""Replace the current state."""
|
|
213
|
+
self.state = state
|