agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
emk/hf_utils.py
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Hugging Face Hub utilities for EMK.
|
|
5
|
+
|
|
6
|
+
This module provides functions to push and pull episode data and experiment
|
|
7
|
+
results to/from the Hugging Face Hub for sharing and reproducibility.
|
|
8
|
+
|
|
9
|
+
Requirements:
|
|
10
|
+
pip install agent-os-kernel[full] # includes emk with huggingface support
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> from emk.hf_utils import upload_episodes_to_hub
|
|
14
|
+
>>> upload_episodes_to_hub(
|
|
15
|
+
... episodes=my_episodes,
|
|
16
|
+
... repo_id="microsoft/emk-experiments",
|
|
17
|
+
... filename="episodes.jsonl"
|
|
18
|
+
... )
|
|
19
|
+
|
|
20
|
+
Note:
|
|
21
|
+
You must be logged in to Hugging Face Hub to push data:
|
|
22
|
+
>>> huggingface_hub.login()
|
|
23
|
+
or set the HF_TOKEN environment variable.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import tempfile
|
|
30
|
+
from datetime import datetime, timezone
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from emk.schema import Episode
|
|
36
|
+
|
|
37
|
+
# Lazy import to avoid requiring huggingface_hub at import time
|
|
38
|
+
_HF_HUB_AVAILABLE = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _check_hf_hub() -> None:
|
|
42
|
+
"""Check if huggingface_hub is available and raise helpful error if not."""
|
|
43
|
+
global _HF_HUB_AVAILABLE
|
|
44
|
+
|
|
45
|
+
if _HF_HUB_AVAILABLE is None:
|
|
46
|
+
try:
|
|
47
|
+
import huggingface_hub # noqa: F401
|
|
48
|
+
_HF_HUB_AVAILABLE = True
|
|
49
|
+
except ImportError:
|
|
50
|
+
_HF_HUB_AVAILABLE = False
|
|
51
|
+
|
|
52
|
+
if not _HF_HUB_AVAILABLE:
|
|
53
|
+
raise ImportError(
|
|
54
|
+
"huggingface_hub is required for this functionality. "
|
|
55
|
+
"Install it with: pip install agent-os-kernel[full]"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def upload_episodes_to_hub(
|
|
60
|
+
episodes: List["Episode"],
|
|
61
|
+
repo_id: str,
|
|
62
|
+
filename: str = "episodes.jsonl",
|
|
63
|
+
*,
|
|
64
|
+
commit_message: Optional[str] = None,
|
|
65
|
+
private: bool = False,
|
|
66
|
+
token: Optional[str] = None,
|
|
67
|
+
branch: Optional[str] = None,
|
|
68
|
+
) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Upload episodes to a Hugging Face Hub dataset repository.
|
|
71
|
+
|
|
72
|
+
This function serializes episodes to JSONL format and uploads them
|
|
73
|
+
to the specified Hugging Face Hub repository.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
episodes: List of Episode objects to upload.
|
|
77
|
+
repo_id: The Hugging Face Hub repository ID (e.g., "username/repo-name").
|
|
78
|
+
filename: Name of the file in the repository (default: "episodes.jsonl").
|
|
79
|
+
commit_message: Custom commit message (auto-generated if not provided).
|
|
80
|
+
private: Whether the repository should be private (default: False).
|
|
81
|
+
token: Hugging Face API token (uses cached token if not provided).
|
|
82
|
+
branch: Branch to upload to (default: main).
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
str: URL of the uploaded file.
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
ImportError: If huggingface_hub is not installed.
|
|
89
|
+
ValueError: If episodes list is empty.
|
|
90
|
+
|
|
91
|
+
Example:
|
|
92
|
+
>>> from emk import Episode
|
|
93
|
+
>>> from emk.hf_utils import upload_episodes_to_hub
|
|
94
|
+
>>> episodes = [Episode(goal="Test", action="Run", result="Pass", reflection="Good")]
|
|
95
|
+
>>> url = upload_episodes_to_hub(
|
|
96
|
+
... episodes=episodes,
|
|
97
|
+
... repo_id="microsoft/emk-test-data"
|
|
98
|
+
... )
|
|
99
|
+
>>> print(f"Uploaded to: {url}")
|
|
100
|
+
"""
|
|
101
|
+
_check_hf_hub()
|
|
102
|
+
from huggingface_hub import HfApi
|
|
103
|
+
|
|
104
|
+
if not episodes:
|
|
105
|
+
raise ValueError("Episodes list cannot be empty")
|
|
106
|
+
|
|
107
|
+
api = HfApi(token=token)
|
|
108
|
+
|
|
109
|
+
# Create repository if it doesn't exist
|
|
110
|
+
api.create_repo(
|
|
111
|
+
repo_id=repo_id,
|
|
112
|
+
repo_type="dataset",
|
|
113
|
+
private=private,
|
|
114
|
+
exist_ok=True,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Serialize episodes to JSONL
|
|
118
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
|
|
119
|
+
for episode in episodes:
|
|
120
|
+
f.write(episode.to_json() + "\n")
|
|
121
|
+
temp_path = f.name
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
# Generate commit message if not provided
|
|
125
|
+
if commit_message is None:
|
|
126
|
+
commit_message = f"Upload {len(episodes)} episodes via emk"
|
|
127
|
+
|
|
128
|
+
# Upload file
|
|
129
|
+
result = api.upload_file(
|
|
130
|
+
path_or_fileobj=temp_path,
|
|
131
|
+
path_in_repo=filename,
|
|
132
|
+
repo_id=repo_id,
|
|
133
|
+
repo_type="dataset",
|
|
134
|
+
commit_message=commit_message,
|
|
135
|
+
revision=branch,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return result
|
|
139
|
+
finally:
|
|
140
|
+
# Cleanup temp file
|
|
141
|
+
Path(temp_path).unlink(missing_ok=True)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def download_episodes_from_hub(
|
|
145
|
+
repo_id: str,
|
|
146
|
+
filename: str = "episodes.jsonl",
|
|
147
|
+
*,
|
|
148
|
+
token: Optional[str] = None,
|
|
149
|
+
revision: Optional[str] = None,
|
|
150
|
+
) -> List["Episode"]:
|
|
151
|
+
"""
|
|
152
|
+
Download episodes from a Hugging Face Hub dataset repository.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
repo_id: The Hugging Face Hub repository ID (e.g., "username/repo-name").
|
|
156
|
+
filename: Name of the file in the repository (default: "episodes.jsonl").
|
|
157
|
+
token: Hugging Face API token (uses cached token if not provided).
|
|
158
|
+
revision: Git revision (branch, tag, or commit) to download from.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
List[Episode]: List of Episode objects loaded from the repository.
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
ImportError: If huggingface_hub is not installed.
|
|
165
|
+
FileNotFoundError: If the file doesn't exist in the repository.
|
|
166
|
+
|
|
167
|
+
Example:
|
|
168
|
+
>>> from emk.hf_utils import download_episodes_from_hub
|
|
169
|
+
>>> episodes = download_episodes_from_hub(
|
|
170
|
+
... repo_id="microsoft/emk-test-data"
|
|
171
|
+
... )
|
|
172
|
+
>>> print(f"Downloaded {len(episodes)} episodes")
|
|
173
|
+
"""
|
|
174
|
+
_check_hf_hub()
|
|
175
|
+
from huggingface_hub import hf_hub_download
|
|
176
|
+
|
|
177
|
+
# Import Episode here to avoid circular imports
|
|
178
|
+
from emk.schema import Episode
|
|
179
|
+
|
|
180
|
+
# Download file
|
|
181
|
+
local_path = hf_hub_download(
|
|
182
|
+
repo_id=repo_id,
|
|
183
|
+
filename=filename,
|
|
184
|
+
repo_type="dataset",
|
|
185
|
+
token=token,
|
|
186
|
+
revision=revision,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Parse episodes
|
|
190
|
+
episodes = []
|
|
191
|
+
with open(local_path, "r") as f:
|
|
192
|
+
for line in f:
|
|
193
|
+
line = line.strip()
|
|
194
|
+
if line:
|
|
195
|
+
episodes.append(Episode.from_json(line))
|
|
196
|
+
|
|
197
|
+
return episodes
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def push_experiment_results(
|
|
201
|
+
results: Dict[str, Any],
|
|
202
|
+
repo_id: str,
|
|
203
|
+
filename: str = "results.json",
|
|
204
|
+
*,
|
|
205
|
+
commit_message: Optional[str] = None,
|
|
206
|
+
private: bool = False,
|
|
207
|
+
token: Optional[str] = None,
|
|
208
|
+
append_timestamp: bool = True,
|
|
209
|
+
) -> str:
|
|
210
|
+
"""
|
|
211
|
+
Push experiment results to Hugging Face Hub.
|
|
212
|
+
|
|
213
|
+
This is useful for tracking experiment runs and sharing reproducible
|
|
214
|
+
results with the research community.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
results: Dictionary of experiment results to upload.
|
|
218
|
+
repo_id: The Hugging Face Hub repository ID.
|
|
219
|
+
filename: Name of the results file (default: "results.json").
|
|
220
|
+
commit_message: Custom commit message.
|
|
221
|
+
private: Whether the repository should be private.
|
|
222
|
+
token: Hugging Face API token.
|
|
223
|
+
append_timestamp: Whether to append timestamp to filename (default: True).
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
str: URL of the uploaded file.
|
|
227
|
+
|
|
228
|
+
Example:
|
|
229
|
+
>>> from emk.hf_utils import push_experiment_results
|
|
230
|
+
>>> results = {
|
|
231
|
+
... "accuracy": 0.95,
|
|
232
|
+
... "latency_ms": 12.5,
|
|
233
|
+
... "episodes_processed": 1000
|
|
234
|
+
... }
|
|
235
|
+
>>> url = push_experiment_results(
|
|
236
|
+
... results=results,
|
|
237
|
+
... repo_id="microsoft/emk-experiments"
|
|
238
|
+
... )
|
|
239
|
+
"""
|
|
240
|
+
_check_hf_hub()
|
|
241
|
+
from huggingface_hub import HfApi
|
|
242
|
+
|
|
243
|
+
api = HfApi(token=token)
|
|
244
|
+
|
|
245
|
+
# Create repository if it doesn't exist
|
|
246
|
+
api.create_repo(
|
|
247
|
+
repo_id=repo_id,
|
|
248
|
+
repo_type="dataset",
|
|
249
|
+
private=private,
|
|
250
|
+
exist_ok=True,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Add metadata to results
|
|
254
|
+
results_with_meta = {
|
|
255
|
+
"_uploaded_at": datetime.now(timezone.utc).isoformat(),
|
|
256
|
+
"_emk_version": _get_emk_version(),
|
|
257
|
+
**results,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
# Modify filename with timestamp if requested
|
|
261
|
+
if append_timestamp:
|
|
262
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
263
|
+
stem = Path(filename).stem
|
|
264
|
+
suffix = Path(filename).suffix or ".json"
|
|
265
|
+
filename = f"{stem}_{timestamp}{suffix}"
|
|
266
|
+
|
|
267
|
+
# Serialize to JSON
|
|
268
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
269
|
+
json.dump(results_with_meta, f, indent=2, default=str)
|
|
270
|
+
temp_path = f.name
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
if commit_message is None:
|
|
274
|
+
commit_message = f"Upload experiment results via emk"
|
|
275
|
+
|
|
276
|
+
result = api.upload_file(
|
|
277
|
+
path_or_fileobj=temp_path,
|
|
278
|
+
path_in_repo=filename,
|
|
279
|
+
repo_id=repo_id,
|
|
280
|
+
repo_type="dataset",
|
|
281
|
+
commit_message=commit_message,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return result
|
|
285
|
+
finally:
|
|
286
|
+
Path(temp_path).unlink(missing_ok=True)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def create_dataset_card(
|
|
290
|
+
repo_id: str,
|
|
291
|
+
description: str,
|
|
292
|
+
*,
|
|
293
|
+
num_episodes: Optional[int] = None,
|
|
294
|
+
tags: Optional[List[str]] = None,
|
|
295
|
+
license: str = "mit",
|
|
296
|
+
token: Optional[str] = None,
|
|
297
|
+
) -> str:
|
|
298
|
+
"""
|
|
299
|
+
Create or update a dataset card (README.md) for an EMK dataset.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
repo_id: The Hugging Face Hub repository ID.
|
|
303
|
+
description: Description of the dataset.
|
|
304
|
+
num_episodes: Number of episodes in the dataset (optional).
|
|
305
|
+
tags: List of tags for the dataset (optional).
|
|
306
|
+
license: License identifier (default: "mit").
|
|
307
|
+
token: Hugging Face API token.
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
str: URL of the dataset card.
|
|
311
|
+
|
|
312
|
+
Example:
|
|
313
|
+
>>> from emk.hf_utils import create_dataset_card
|
|
314
|
+
>>> url = create_dataset_card(
|
|
315
|
+
... repo_id="microsoft/emk-agent-logs",
|
|
316
|
+
... description="Agent experience logs from production system",
|
|
317
|
+
... num_episodes=10000,
|
|
318
|
+
... tags=["agents", "episodic-memory", "nlp"]
|
|
319
|
+
... )
|
|
320
|
+
"""
|
|
321
|
+
_check_hf_hub()
|
|
322
|
+
from huggingface_hub import HfApi
|
|
323
|
+
|
|
324
|
+
api = HfApi(token=token)
|
|
325
|
+
|
|
326
|
+
# Build tags list
|
|
327
|
+
all_tags = ["emk", "episodic-memory", "agent-experiences"]
|
|
328
|
+
if tags:
|
|
329
|
+
all_tags.extend(tags)
|
|
330
|
+
tags_yaml = "\n".join(f"- {tag}" for tag in all_tags)
|
|
331
|
+
|
|
332
|
+
# Build dataset card content
|
|
333
|
+
card_content = f"""---
|
|
334
|
+
license: {license}
|
|
335
|
+
tags:
|
|
336
|
+
{tags_yaml}
|
|
337
|
+
library_name: emk
|
|
338
|
+
---
|
|
339
|
+
|
|
340
|
+
# {repo_id.split('/')[-1]}
|
|
341
|
+
|
|
342
|
+
{description}
|
|
343
|
+
|
|
344
|
+
## Dataset Information
|
|
345
|
+
|
|
346
|
+
- **Format**: JSONL (newline-delimited JSON)
|
|
347
|
+
- **Schema**: EMK Episode (Goal → Action → Result → Reflection)
|
|
348
|
+
- **Library**: [emk](https://github.com/microsoft/agent-governance-toolkit)
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
if num_episodes:
|
|
352
|
+
card_content += f"- **Episodes**: {num_episodes:,}\n"
|
|
353
|
+
|
|
354
|
+
card_content += """
|
|
355
|
+
## Usage
|
|
356
|
+
|
|
357
|
+
```python
|
|
358
|
+
from emk.hf_utils import download_episodes_from_hub
|
|
359
|
+
|
|
360
|
+
episodes = download_episodes_from_hub(
|
|
361
|
+
repo_id="{repo_id}"
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
for episode in episodes[:5]:
|
|
365
|
+
print(f"Goal: {episode.goal}")
|
|
366
|
+
print(f"Result: {episode.result}")
|
|
367
|
+
print("---")
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
## Episode Schema
|
|
371
|
+
|
|
372
|
+
Each episode contains:
|
|
373
|
+
|
|
374
|
+
| Field | Type | Description |
|
|
375
|
+
|-------|------|-------------|
|
|
376
|
+
| `goal` | string | The agent's intended objective |
|
|
377
|
+
| `action` | string | The action taken |
|
|
378
|
+
| `result` | string | The outcome |
|
|
379
|
+
| `reflection` | string | Agent's analysis or learning |
|
|
380
|
+
| `timestamp` | datetime | When the episode was created |
|
|
381
|
+
| `metadata` | object | Additional context |
|
|
382
|
+
| `episode_id` | string | Unique SHA-256 identifier |
|
|
383
|
+
|
|
384
|
+
## License
|
|
385
|
+
|
|
386
|
+
This dataset is released under the {license.upper()} license.
|
|
387
|
+
""".format(repo_id=repo_id, license=license)
|
|
388
|
+
|
|
389
|
+
# Upload README
|
|
390
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
|
|
391
|
+
f.write(card_content)
|
|
392
|
+
temp_path = f.name
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
result = api.upload_file(
|
|
396
|
+
path_or_fileobj=temp_path,
|
|
397
|
+
path_in_repo="README.md",
|
|
398
|
+
repo_id=repo_id,
|
|
399
|
+
repo_type="dataset",
|
|
400
|
+
commit_message="Create/update dataset card via emk",
|
|
401
|
+
)
|
|
402
|
+
return result
|
|
403
|
+
finally:
|
|
404
|
+
Path(temp_path).unlink(missing_ok=True)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _get_emk_version() -> str:
|
|
408
|
+
"""Get the current emk version."""
|
|
409
|
+
try:
|
|
410
|
+
from emk import __version__
|
|
411
|
+
return __version__
|
|
412
|
+
except ImportError:
|
|
413
|
+
return "unknown"
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
__all__ = [
|
|
417
|
+
"upload_episodes_to_hub",
|
|
418
|
+
"download_episodes_from_hub",
|
|
419
|
+
"push_experiment_results",
|
|
420
|
+
"create_dataset_card",
|
|
421
|
+
]
|
emk/indexer.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Public Preview — basic context/memory management
|
|
4
|
+
"""
|
|
5
|
+
Indexer — simple tag-based filtering for episodes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import List, Set, Dict, Any
|
|
9
|
+
import hashlib
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
from emk.schema import Episode
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Indexer:
|
|
16
|
+
"""Tag extraction and simple metadata-based filtering for episodes."""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def extract_tags(text: str, min_length: int = 3) -> Set[str]:
|
|
20
|
+
"""Extract potential search tags from *text*."""
|
|
21
|
+
words = re.findall(r'\b\w+\b', text.lower())
|
|
22
|
+
stop_words = {
|
|
23
|
+
'the', 'is', 'at', 'which', 'on', 'and', 'a', 'an',
|
|
24
|
+
'as', 'are', 'was', 'were', 'been', 'be', 'have', 'has',
|
|
25
|
+
'had', 'do', 'does', 'did', 'will', 'would', 'should',
|
|
26
|
+
'could', 'may', 'might', 'must', 'can', 'to', 'from',
|
|
27
|
+
'in', 'out', 'up', 'down', 'for', 'with', 'by', 'of',
|
|
28
|
+
}
|
|
29
|
+
return {w for w in words if len(w) >= min_length and w not in stop_words}
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def generate_episode_tags(episode: Episode) -> List[str]:
|
|
33
|
+
"""Generate searchable tags from an episode."""
|
|
34
|
+
combined = f"{episode.goal} {episode.action} {episode.result} {episode.reflection}"
|
|
35
|
+
tags = Indexer.extract_tags(combined)
|
|
36
|
+
for key in episode.metadata.keys():
|
|
37
|
+
tags.add(key.lower())
|
|
38
|
+
return sorted(tags)
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def compute_content_hash(episode: Episode) -> str:
|
|
42
|
+
"""Return the content hash (episode_id) of the episode."""
|
|
43
|
+
return episode.episode_id
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def enrich_metadata(episode: Episode, auto_tags: bool = True) -> Dict[str, Any]:
|
|
47
|
+
"""Enrich episode metadata with tags and length metrics."""
|
|
48
|
+
enriched = episode.metadata.copy()
|
|
49
|
+
if auto_tags and 'tags' not in enriched:
|
|
50
|
+
enriched['tags'] = Indexer.generate_episode_tags(episode)
|
|
51
|
+
enriched['goal_length'] = len(episode.goal)
|
|
52
|
+
enriched['action_length'] = len(episode.action)
|
|
53
|
+
enriched['result_length'] = len(episode.result)
|
|
54
|
+
enriched['reflection_length'] = len(episode.reflection)
|
|
55
|
+
return enriched
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def create_search_text(episode: Episode) -> str:
|
|
59
|
+
"""Create a concatenated search text from an episode."""
|
|
60
|
+
parts = [
|
|
61
|
+
f"Goal: {episode.goal}",
|
|
62
|
+
f"Action: {episode.action}",
|
|
63
|
+
f"Result: {episode.result}",
|
|
64
|
+
f"Reflection: {episode.reflection}",
|
|
65
|
+
]
|
|
66
|
+
if episode.metadata:
|
|
67
|
+
metadata_str = ", ".join(f"{k}: {v}" for k, v in episode.metadata.items())
|
|
68
|
+
parts.append(f"Context: {metadata_str}")
|
|
69
|
+
return " | ".join(parts)
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def filter_by_tags(
|
|
73
|
+
episodes: List[Episode],
|
|
74
|
+
required_tags: Set[str],
|
|
75
|
+
) -> List[Episode]:
|
|
76
|
+
"""Return episodes whose auto-generated tags include all *required_tags*."""
|
|
77
|
+
required_lower = {t.lower() for t in required_tags}
|
|
78
|
+
results: List[Episode] = []
|
|
79
|
+
for ep in episodes:
|
|
80
|
+
ep_tags = set(Indexer.generate_episode_tags(ep))
|
|
81
|
+
if required_lower.issubset(ep_tags):
|
|
82
|
+
results.append(ep)
|
|
83
|
+
return results
|
emk/py.typed
ADDED
|
File without changes
|