agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Auto-detection module for identifying document types and structures.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import Dict, List, Tuple, Any
|
|
9
|
+
|
|
10
|
+
from caas.models import Document, DocumentType, Section
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DocumentTypeDetector:
|
|
14
|
+
"""Detects document type based on content analysis."""
|
|
15
|
+
|
|
16
|
+
# Keywords and patterns for different document types
|
|
17
|
+
PATTERNS = {
|
|
18
|
+
DocumentType.LEGAL_CONTRACT: [
|
|
19
|
+
r'\bwhereas\b',
|
|
20
|
+
r'\bparty\b.*\bparty\b',
|
|
21
|
+
r'\bagreement\b',
|
|
22
|
+
r'\bcontract\b',
|
|
23
|
+
r'\btherefore\b',
|
|
24
|
+
r'\bhereby\b',
|
|
25
|
+
r'\bindemnify\b',
|
|
26
|
+
r'\bliability\b',
|
|
27
|
+
r'\btermination\b',
|
|
28
|
+
r'\bgoverning law\b',
|
|
29
|
+
],
|
|
30
|
+
DocumentType.TECHNICAL_DOCUMENTATION: [
|
|
31
|
+
r'\bAPI\b',
|
|
32
|
+
r'\binstallation\b',
|
|
33
|
+
r'\bconfiguration\b',
|
|
34
|
+
r'\bparameter[s]?\b',
|
|
35
|
+
r'\bmethod[s]?\b',
|
|
36
|
+
r'\breturn[s]?\b',
|
|
37
|
+
r'\bexample[s]?\b',
|
|
38
|
+
r'\busage\b',
|
|
39
|
+
r'\bsyntax\b',
|
|
40
|
+
],
|
|
41
|
+
DocumentType.SOURCE_CODE: [
|
|
42
|
+
r'\bfunction\b',
|
|
43
|
+
r'\bclass\b',
|
|
44
|
+
r'\bdef\b',
|
|
45
|
+
r'\bimport\b',
|
|
46
|
+
r'\breturn\b',
|
|
47
|
+
r'\bif\b.*\belse\b',
|
|
48
|
+
r'\bfor\b.*\bin\b',
|
|
49
|
+
r'\bwhile\b',
|
|
50
|
+
],
|
|
51
|
+
DocumentType.RESEARCH_PAPER: [
|
|
52
|
+
r'\babstract\b',
|
|
53
|
+
r'\bintroduction\b',
|
|
54
|
+
r'\bmethodology\b',
|
|
55
|
+
r'\bresults\b',
|
|
56
|
+
r'\bconclusion\b',
|
|
57
|
+
r'\breferences\b',
|
|
58
|
+
r'\bcitation[s]?\b',
|
|
59
|
+
r'\bhypothesis\b',
|
|
60
|
+
],
|
|
61
|
+
DocumentType.TUTORIAL: [
|
|
62
|
+
r'\bstep[s]?\b',
|
|
63
|
+
r'\btutorial\b',
|
|
64
|
+
r'\bhow to\b',
|
|
65
|
+
r'\bguide\b',
|
|
66
|
+
r'\bbeginners?\b',
|
|
67
|
+
r'\blesson\b',
|
|
68
|
+
r'\bexercise[s]?\b',
|
|
69
|
+
],
|
|
70
|
+
DocumentType.API_DOCUMENTATION: [
|
|
71
|
+
r'\bendpoint[s]?\b',
|
|
72
|
+
r'\bGET\b.*\bPOST\b',
|
|
73
|
+
r'\bHTTP\b',
|
|
74
|
+
r'\brequest\b.*\bresponse\b',
|
|
75
|
+
r'\bauthentication\b',
|
|
76
|
+
r'\bheader[s]?\b',
|
|
77
|
+
r'\bstatus code[s]?\b',
|
|
78
|
+
],
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
def detect(self, document: Document) -> DocumentType:
|
|
82
|
+
"""
|
|
83
|
+
Detect the document type based on content analysis.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
document: The document to analyze
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
The detected document type
|
|
90
|
+
"""
|
|
91
|
+
if document.format == "code":
|
|
92
|
+
return DocumentType.SOURCE_CODE
|
|
93
|
+
|
|
94
|
+
content = document.content.lower()
|
|
95
|
+
scores: Dict[DocumentType, int] = {}
|
|
96
|
+
|
|
97
|
+
# Score each document type based on pattern matches
|
|
98
|
+
for doc_type, patterns in self.PATTERNS.items():
|
|
99
|
+
score = 0
|
|
100
|
+
for pattern in patterns:
|
|
101
|
+
matches = len(re.findall(pattern, content, re.IGNORECASE))
|
|
102
|
+
score += matches
|
|
103
|
+
scores[doc_type] = score
|
|
104
|
+
|
|
105
|
+
# Get the type with highest score
|
|
106
|
+
if scores:
|
|
107
|
+
max_score = max(scores.values())
|
|
108
|
+
if max_score > 0:
|
|
109
|
+
detected_type = max(scores, key=scores.get)
|
|
110
|
+
return detected_type
|
|
111
|
+
|
|
112
|
+
return DocumentType.UNKNOWN
|
|
113
|
+
|
|
114
|
+
def detect_structure(self, document: Document) -> Dict[str, Any]:
|
|
115
|
+
"""
|
|
116
|
+
Detect the structural characteristics of a document.
|
|
117
|
+
|
|
118
|
+
Returns information about:
|
|
119
|
+
- Section hierarchy
|
|
120
|
+
- Key sections
|
|
121
|
+
- Document organization
|
|
122
|
+
"""
|
|
123
|
+
structure = {
|
|
124
|
+
"has_clear_sections": len(document.sections) > 1,
|
|
125
|
+
"section_count": len(document.sections),
|
|
126
|
+
"section_titles": [s.title for s in document.sections],
|
|
127
|
+
"key_sections": self._identify_key_sections(document),
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return structure
|
|
131
|
+
|
|
132
|
+
def _identify_key_sections(self, document: Document) -> List[str]:
|
|
133
|
+
"""Identify which sections are likely most important."""
|
|
134
|
+
key_patterns = [
|
|
135
|
+
r'definition[s]?',
|
|
136
|
+
r'summary',
|
|
137
|
+
r'conclusion',
|
|
138
|
+
r'abstract',
|
|
139
|
+
r'introduction',
|
|
140
|
+
r'overview',
|
|
141
|
+
r'getting started',
|
|
142
|
+
r'quick start',
|
|
143
|
+
r'main',
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
key_sections = []
|
|
147
|
+
for section in document.sections:
|
|
148
|
+
title_lower = section.title.lower()
|
|
149
|
+
for pattern in key_patterns:
|
|
150
|
+
if re.search(pattern, title_lower):
|
|
151
|
+
key_sections.append(section.title)
|
|
152
|
+
break
|
|
153
|
+
|
|
154
|
+
return key_sections
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class StructureAnalyzer:
|
|
158
|
+
"""Analyzes document structure for optimization."""
|
|
159
|
+
|
|
160
|
+
def analyze(self, document: Document) -> Dict[str, Any]:
|
|
161
|
+
"""
|
|
162
|
+
Analyze document structure and return insights.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Analysis results including section importance and relationships
|
|
166
|
+
"""
|
|
167
|
+
analysis = {
|
|
168
|
+
"document_type": document.detected_type,
|
|
169
|
+
"section_analysis": [],
|
|
170
|
+
"content_density": self._calculate_density(document),
|
|
171
|
+
"structure_quality": self._assess_structure_quality(document),
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
# Analyze each section
|
|
175
|
+
for section in document.sections:
|
|
176
|
+
section_info = {
|
|
177
|
+
"title": section.title,
|
|
178
|
+
"length": len(section.content),
|
|
179
|
+
"complexity": self._estimate_complexity(section.content),
|
|
180
|
+
"keyword_density": self._calculate_keyword_density(section.content),
|
|
181
|
+
}
|
|
182
|
+
analysis["section_analysis"].append(section_info)
|
|
183
|
+
|
|
184
|
+
return analysis
|
|
185
|
+
|
|
186
|
+
def _calculate_density(self, document: Document) -> float:
|
|
187
|
+
"""Calculate content density (information per character)."""
|
|
188
|
+
if not document.content:
|
|
189
|
+
return 0.0
|
|
190
|
+
|
|
191
|
+
words = len(document.content.split())
|
|
192
|
+
chars = len(document.content)
|
|
193
|
+
return words / chars if chars > 0 else 0.0
|
|
194
|
+
|
|
195
|
+
def _assess_structure_quality(self, document: Document) -> str:
|
|
196
|
+
"""Assess the quality of document structure."""
|
|
197
|
+
if len(document.sections) == 0:
|
|
198
|
+
return "poor"
|
|
199
|
+
elif len(document.sections) < 3:
|
|
200
|
+
return "basic"
|
|
201
|
+
elif len(document.sections) < 8:
|
|
202
|
+
return "good"
|
|
203
|
+
else:
|
|
204
|
+
return "excellent"
|
|
205
|
+
|
|
206
|
+
def _estimate_complexity(self, text: str) -> float:
|
|
207
|
+
"""Estimate text complexity based on various factors."""
|
|
208
|
+
if not text:
|
|
209
|
+
return 0.0
|
|
210
|
+
|
|
211
|
+
words = text.split()
|
|
212
|
+
if not words:
|
|
213
|
+
return 0.0
|
|
214
|
+
|
|
215
|
+
# Average word length
|
|
216
|
+
avg_word_len = sum(len(w) for w in words) / len(words)
|
|
217
|
+
|
|
218
|
+
# Sentence count
|
|
219
|
+
sentences = len(re.split(r'[.!?]+', text))
|
|
220
|
+
words_per_sentence = len(words) / sentences if sentences > 0 else 0
|
|
221
|
+
|
|
222
|
+
# Complexity score (normalized 0-1)
|
|
223
|
+
complexity = min(1.0, (avg_word_len / 10 + words_per_sentence / 30) / 2)
|
|
224
|
+
return complexity
|
|
225
|
+
|
|
226
|
+
def _calculate_keyword_density(self, text: str) -> float:
|
|
227
|
+
"""Calculate density of important keywords."""
|
|
228
|
+
important_words = set([
|
|
229
|
+
'important', 'critical', 'must', 'required', 'essential',
|
|
230
|
+
'key', 'primary', 'main', 'core', 'fundamental'
|
|
231
|
+
])
|
|
232
|
+
|
|
233
|
+
words = text.lower().split()
|
|
234
|
+
if not words:
|
|
235
|
+
return 0.0
|
|
236
|
+
|
|
237
|
+
keyword_count = sum(1 for w in words if w in important_words)
|
|
238
|
+
return keyword_count / len(words)
|
caas/enrichment.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Metadata enrichment module for contextual injection.
|
|
6
|
+
|
|
7
|
+
Solves the "Flat Chunk Fallacy" by enriching chunks with parent metadata.
|
|
8
|
+
Instead of storing isolated chunks like "It increased by 5%", we store:
|
|
9
|
+
"[Document: Q3 Earnings] [Chapter: Revenue] [Section: North America] It increased by 5%."
|
|
10
|
+
|
|
11
|
+
This ensures the vector carries the weight of its context.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import List, Optional
|
|
15
|
+
from caas.models import Section, Document, DocumentType
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MetadataEnricher:
|
|
19
|
+
"""
|
|
20
|
+
Enriches sections with parent metadata for contextual awareness.
|
|
21
|
+
|
|
22
|
+
Transforms isolated chunks into context-aware chunks by injecting
|
|
23
|
+
hierarchical metadata (document, chapter, section).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def enrich_sections(self, document: Document) -> List[Section]:
|
|
27
|
+
"""
|
|
28
|
+
Enrich all sections in a document with metadata prefixes.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
document: Document with sections to enrich
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List of enriched sections
|
|
35
|
+
"""
|
|
36
|
+
enriched_sections = []
|
|
37
|
+
|
|
38
|
+
for section in document.sections:
|
|
39
|
+
enriched_section = self._enrich_section(section, document)
|
|
40
|
+
enriched_sections.append(enriched_section)
|
|
41
|
+
|
|
42
|
+
return enriched_sections
|
|
43
|
+
|
|
44
|
+
def _enrich_section(self, section: Section, document: Document) -> Section:
|
|
45
|
+
"""
|
|
46
|
+
Enrich a single section with metadata prefix.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
section: Section to enrich
|
|
50
|
+
document: Parent document
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Section with enriched content
|
|
54
|
+
"""
|
|
55
|
+
# Build metadata prefix
|
|
56
|
+
metadata_parts = []
|
|
57
|
+
|
|
58
|
+
# Add document title
|
|
59
|
+
metadata_parts.append(f"[Document: {document.title}]")
|
|
60
|
+
|
|
61
|
+
# Add document type if meaningful
|
|
62
|
+
if document.detected_type and document.detected_type.value != "unknown":
|
|
63
|
+
doc_type_display = document.detected_type.value.replace("_", " ").title()
|
|
64
|
+
metadata_parts.append(f"[Type: {doc_type_display}]")
|
|
65
|
+
|
|
66
|
+
# Add chapter/parent section if available
|
|
67
|
+
if section.chapter:
|
|
68
|
+
metadata_parts.append(f"[Chapter: {section.chapter}]")
|
|
69
|
+
elif section.parent_section:
|
|
70
|
+
metadata_parts.append(f"[Parent: {section.parent_section}]")
|
|
71
|
+
|
|
72
|
+
# Add current section
|
|
73
|
+
metadata_parts.append(f"[Section: {section.title}]")
|
|
74
|
+
|
|
75
|
+
# Build enriched content
|
|
76
|
+
metadata_prefix = " ".join(metadata_parts)
|
|
77
|
+
enriched_content = f"{metadata_prefix} {section.content}"
|
|
78
|
+
|
|
79
|
+
# Create a new section with enriched content
|
|
80
|
+
# We preserve the original section but update the content
|
|
81
|
+
# Note: Using model_copy() from Pydantic v2 (we're on v2.5.0)
|
|
82
|
+
enriched_section = section.model_copy()
|
|
83
|
+
enriched_section.content = enriched_content
|
|
84
|
+
|
|
85
|
+
return enriched_section
|
|
86
|
+
|
|
87
|
+
def get_enriched_chunk(
|
|
88
|
+
self,
|
|
89
|
+
section: Section,
|
|
90
|
+
document_title: str,
|
|
91
|
+
document_type: Optional[DocumentType] = None,
|
|
92
|
+
include_type: bool = True
|
|
93
|
+
) -> str:
|
|
94
|
+
"""
|
|
95
|
+
Get an enriched chunk string for a section.
|
|
96
|
+
|
|
97
|
+
Useful for building enriched context on-the-fly without modifying
|
|
98
|
+
the stored section.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
section: Section to enrich
|
|
102
|
+
document_title: Title of parent document
|
|
103
|
+
document_type: Type of document (optional)
|
|
104
|
+
include_type: Whether to include document type in prefix
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Enriched chunk string
|
|
108
|
+
"""
|
|
109
|
+
metadata_parts = []
|
|
110
|
+
|
|
111
|
+
# Add document title
|
|
112
|
+
metadata_parts.append(f"[Document: {document_title}]")
|
|
113
|
+
|
|
114
|
+
# Add document type if requested and available
|
|
115
|
+
if include_type and document_type and document_type.value != "unknown":
|
|
116
|
+
doc_type_display = document_type.value.replace("_", " ").title()
|
|
117
|
+
metadata_parts.append(f"[Type: {doc_type_display}]")
|
|
118
|
+
|
|
119
|
+
# Add hierarchical context
|
|
120
|
+
if section.chapter:
|
|
121
|
+
metadata_parts.append(f"[Chapter: {section.chapter}]")
|
|
122
|
+
elif section.parent_section:
|
|
123
|
+
metadata_parts.append(f"[Parent: {section.parent_section}]")
|
|
124
|
+
|
|
125
|
+
# Add current section
|
|
126
|
+
metadata_parts.append(f"[Section: {section.title}]")
|
|
127
|
+
|
|
128
|
+
# Build and return enriched content
|
|
129
|
+
metadata_prefix = " ".join(metadata_parts)
|
|
130
|
+
return f"{metadata_prefix} {section.content}"
|
caas/gateway/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Trust Gateway: Enterprise-Grade Private Cloud Router
|
|
5
|
+
|
|
6
|
+
The enterprise-ready gateway that can be deployed on-premises or in private cloud
|
|
7
|
+
to address CISO concerns about data security and privacy.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from caas.gateway.trust_gateway import (
|
|
11
|
+
TrustGateway,
|
|
12
|
+
DeploymentMode,
|
|
13
|
+
SecurityPolicy,
|
|
14
|
+
SecurityLevel,
|
|
15
|
+
AuditLog,
|
|
16
|
+
DataRetentionPolicy
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"TrustGateway",
|
|
21
|
+
"DeploymentMode",
|
|
22
|
+
"SecurityPolicy",
|
|
23
|
+
"SecurityLevel",
|
|
24
|
+
"AuditLog",
|
|
25
|
+
"DataRetentionPolicy"
|
|
26
|
+
]
|
|
27
|
+
|