agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
emk/hf_utils.py ADDED
@@ -0,0 +1,421 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ """
4
+ Hugging Face Hub utilities for EMK.
5
+
6
+ This module provides functions to push and pull episode data and experiment
7
+ results to/from the Hugging Face Hub for sharing and reproducibility.
8
+
9
+ Requirements:
10
+ pip install agent-os-kernel[full] # includes emk with huggingface support
11
+
12
+ Example:
13
+ >>> from emk.hf_utils import upload_episodes_to_hub
14
+ >>> upload_episodes_to_hub(
15
+ ... episodes=my_episodes,
16
+ ... repo_id="microsoft/emk-experiments",
17
+ ... filename="episodes.jsonl"
18
+ ... )
19
+
20
+ Note:
21
+ You must be logged in to Hugging Face Hub to push data:
22
+ >>> huggingface_hub.login()
23
+ or set the HF_TOKEN environment variable.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import tempfile
30
+ from datetime import datetime, timezone
31
+ from pathlib import Path
32
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
33
+
34
+ if TYPE_CHECKING:
35
+ from emk.schema import Episode
36
+
37
+ # Lazy import to avoid requiring huggingface_hub at import time
38
+ _HF_HUB_AVAILABLE = None
39
+
40
+
41
+ def _check_hf_hub() -> None:
42
+ """Check if huggingface_hub is available and raise helpful error if not."""
43
+ global _HF_HUB_AVAILABLE
44
+
45
+ if _HF_HUB_AVAILABLE is None:
46
+ try:
47
+ import huggingface_hub # noqa: F401
48
+ _HF_HUB_AVAILABLE = True
49
+ except ImportError:
50
+ _HF_HUB_AVAILABLE = False
51
+
52
+ if not _HF_HUB_AVAILABLE:
53
+ raise ImportError(
54
+ "huggingface_hub is required for this functionality. "
55
+ "Install it with: pip install agent-os-kernel[full]"
56
+ )
57
+
58
+
59
+ def upload_episodes_to_hub(
60
+ episodes: List["Episode"],
61
+ repo_id: str,
62
+ filename: str = "episodes.jsonl",
63
+ *,
64
+ commit_message: Optional[str] = None,
65
+ private: bool = False,
66
+ token: Optional[str] = None,
67
+ branch: Optional[str] = None,
68
+ ) -> str:
69
+ """
70
+ Upload episodes to a Hugging Face Hub dataset repository.
71
+
72
+ This function serializes episodes to JSONL format and uploads them
73
+ to the specified Hugging Face Hub repository.
74
+
75
+ Args:
76
+ episodes: List of Episode objects to upload.
77
+ repo_id: The Hugging Face Hub repository ID (e.g., "username/repo-name").
78
+ filename: Name of the file in the repository (default: "episodes.jsonl").
79
+ commit_message: Custom commit message (auto-generated if not provided).
80
+ private: Whether the repository should be private (default: False).
81
+ token: Hugging Face API token (uses cached token if not provided).
82
+ branch: Branch to upload to (default: main).
83
+
84
+ Returns:
85
+ str: URL of the uploaded file.
86
+
87
+ Raises:
88
+ ImportError: If huggingface_hub is not installed.
89
+ ValueError: If episodes list is empty.
90
+
91
+ Example:
92
+ >>> from emk import Episode
93
+ >>> from emk.hf_utils import upload_episodes_to_hub
94
+ >>> episodes = [Episode(goal="Test", action="Run", result="Pass", reflection="Good")]
95
+ >>> url = upload_episodes_to_hub(
96
+ ... episodes=episodes,
97
+ ... repo_id="microsoft/emk-test-data"
98
+ ... )
99
+ >>> print(f"Uploaded to: {url}")
100
+ """
101
+ _check_hf_hub()
102
+ from huggingface_hub import HfApi
103
+
104
+ if not episodes:
105
+ raise ValueError("Episodes list cannot be empty")
106
+
107
+ api = HfApi(token=token)
108
+
109
+ # Create repository if it doesn't exist
110
+ api.create_repo(
111
+ repo_id=repo_id,
112
+ repo_type="dataset",
113
+ private=private,
114
+ exist_ok=True,
115
+ )
116
+
117
+ # Serialize episodes to JSONL
118
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
119
+ for episode in episodes:
120
+ f.write(episode.to_json() + "\n")
121
+ temp_path = f.name
122
+
123
+ try:
124
+ # Generate commit message if not provided
125
+ if commit_message is None:
126
+ commit_message = f"Upload {len(episodes)} episodes via emk"
127
+
128
+ # Upload file
129
+ result = api.upload_file(
130
+ path_or_fileobj=temp_path,
131
+ path_in_repo=filename,
132
+ repo_id=repo_id,
133
+ repo_type="dataset",
134
+ commit_message=commit_message,
135
+ revision=branch,
136
+ )
137
+
138
+ return result
139
+ finally:
140
+ # Cleanup temp file
141
+ Path(temp_path).unlink(missing_ok=True)
142
+
143
+
144
+ def download_episodes_from_hub(
145
+ repo_id: str,
146
+ filename: str = "episodes.jsonl",
147
+ *,
148
+ token: Optional[str] = None,
149
+ revision: Optional[str] = None,
150
+ ) -> List["Episode"]:
151
+ """
152
+ Download episodes from a Hugging Face Hub dataset repository.
153
+
154
+ Args:
155
+ repo_id: The Hugging Face Hub repository ID (e.g., "username/repo-name").
156
+ filename: Name of the file in the repository (default: "episodes.jsonl").
157
+ token: Hugging Face API token (uses cached token if not provided).
158
+ revision: Git revision (branch, tag, or commit) to download from.
159
+
160
+ Returns:
161
+ List[Episode]: List of Episode objects loaded from the repository.
162
+
163
+ Raises:
164
+ ImportError: If huggingface_hub is not installed.
165
+ FileNotFoundError: If the file doesn't exist in the repository.
166
+
167
+ Example:
168
+ >>> from emk.hf_utils import download_episodes_from_hub
169
+ >>> episodes = download_episodes_from_hub(
170
+ ... repo_id="microsoft/emk-test-data"
171
+ ... )
172
+ >>> print(f"Downloaded {len(episodes)} episodes")
173
+ """
174
+ _check_hf_hub()
175
+ from huggingface_hub import hf_hub_download
176
+
177
+ # Import Episode here to avoid circular imports
178
+ from emk.schema import Episode
179
+
180
+ # Download file
181
+ local_path = hf_hub_download(
182
+ repo_id=repo_id,
183
+ filename=filename,
184
+ repo_type="dataset",
185
+ token=token,
186
+ revision=revision,
187
+ )
188
+
189
+ # Parse episodes
190
+ episodes = []
191
+ with open(local_path, "r") as f:
192
+ for line in f:
193
+ line = line.strip()
194
+ if line:
195
+ episodes.append(Episode.from_json(line))
196
+
197
+ return episodes
198
+
199
+
200
+ def push_experiment_results(
201
+ results: Dict[str, Any],
202
+ repo_id: str,
203
+ filename: str = "results.json",
204
+ *,
205
+ commit_message: Optional[str] = None,
206
+ private: bool = False,
207
+ token: Optional[str] = None,
208
+ append_timestamp: bool = True,
209
+ ) -> str:
210
+ """
211
+ Push experiment results to Hugging Face Hub.
212
+
213
+ This is useful for tracking experiment runs and sharing reproducible
214
+ results with the research community.
215
+
216
+ Args:
217
+ results: Dictionary of experiment results to upload.
218
+ repo_id: The Hugging Face Hub repository ID.
219
+ filename: Name of the results file (default: "results.json").
220
+ commit_message: Custom commit message.
221
+ private: Whether the repository should be private.
222
+ token: Hugging Face API token.
223
+ append_timestamp: Whether to append timestamp to filename (default: True).
224
+
225
+ Returns:
226
+ str: URL of the uploaded file.
227
+
228
+ Example:
229
+ >>> from emk.hf_utils import push_experiment_results
230
+ >>> results = {
231
+ ... "accuracy": 0.95,
232
+ ... "latency_ms": 12.5,
233
+ ... "episodes_processed": 1000
234
+ ... }
235
+ >>> url = push_experiment_results(
236
+ ... results=results,
237
+ ... repo_id="microsoft/emk-experiments"
238
+ ... )
239
+ """
240
+ _check_hf_hub()
241
+ from huggingface_hub import HfApi
242
+
243
+ api = HfApi(token=token)
244
+
245
+ # Create repository if it doesn't exist
246
+ api.create_repo(
247
+ repo_id=repo_id,
248
+ repo_type="dataset",
249
+ private=private,
250
+ exist_ok=True,
251
+ )
252
+
253
+ # Add metadata to results
254
+ results_with_meta = {
255
+ "_uploaded_at": datetime.now(timezone.utc).isoformat(),
256
+ "_emk_version": _get_emk_version(),
257
+ **results,
258
+ }
259
+
260
+ # Modify filename with timestamp if requested
261
+ if append_timestamp:
262
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
263
+ stem = Path(filename).stem
264
+ suffix = Path(filename).suffix or ".json"
265
+ filename = f"{stem}_{timestamp}{suffix}"
266
+
267
+ # Serialize to JSON
268
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
269
+ json.dump(results_with_meta, f, indent=2, default=str)
270
+ temp_path = f.name
271
+
272
+ try:
273
+ if commit_message is None:
274
+ commit_message = f"Upload experiment results via emk"
275
+
276
+ result = api.upload_file(
277
+ path_or_fileobj=temp_path,
278
+ path_in_repo=filename,
279
+ repo_id=repo_id,
280
+ repo_type="dataset",
281
+ commit_message=commit_message,
282
+ )
283
+
284
+ return result
285
+ finally:
286
+ Path(temp_path).unlink(missing_ok=True)
287
+
288
+
289
+ def create_dataset_card(
290
+ repo_id: str,
291
+ description: str,
292
+ *,
293
+ num_episodes: Optional[int] = None,
294
+ tags: Optional[List[str]] = None,
295
+ license: str = "mit",
296
+ token: Optional[str] = None,
297
+ ) -> str:
298
+ """
299
+ Create or update a dataset card (README.md) for an EMK dataset.
300
+
301
+ Args:
302
+ repo_id: The Hugging Face Hub repository ID.
303
+ description: Description of the dataset.
304
+ num_episodes: Number of episodes in the dataset (optional).
305
+ tags: List of tags for the dataset (optional).
306
+ license: License identifier (default: "mit").
307
+ token: Hugging Face API token.
308
+
309
+ Returns:
310
+ str: URL of the dataset card.
311
+
312
+ Example:
313
+ >>> from emk.hf_utils import create_dataset_card
314
+ >>> url = create_dataset_card(
315
+ ... repo_id="microsoft/emk-agent-logs",
316
+ ... description="Agent experience logs from production system",
317
+ ... num_episodes=10000,
318
+ ... tags=["agents", "episodic-memory", "nlp"]
319
+ ... )
320
+ """
321
+ _check_hf_hub()
322
+ from huggingface_hub import HfApi
323
+
324
+ api = HfApi(token=token)
325
+
326
+ # Build tags list
327
+ all_tags = ["emk", "episodic-memory", "agent-experiences"]
328
+ if tags:
329
+ all_tags.extend(tags)
330
+ tags_yaml = "\n".join(f"- {tag}" for tag in all_tags)
331
+
332
+ # Build dataset card content
333
+ card_content = f"""---
334
+ license: {license}
335
+ tags:
336
+ {tags_yaml}
337
+ library_name: emk
338
+ ---
339
+
340
+ # {repo_id.split('/')[-1]}
341
+
342
+ {description}
343
+
344
+ ## Dataset Information
345
+
346
+ - **Format**: JSONL (newline-delimited JSON)
347
+ - **Schema**: EMK Episode (Goal → Action → Result → Reflection)
348
+ - **Library**: [emk](https://github.com/microsoft/agent-governance-toolkit)
349
+ """
350
+
351
+ if num_episodes:
352
+ card_content += f"- **Episodes**: {num_episodes:,}\n"
353
+
354
+ card_content += """
355
+ ## Usage
356
+
357
+ ```python
358
+ from emk.hf_utils import download_episodes_from_hub
359
+
360
+ episodes = download_episodes_from_hub(
361
+ repo_id="{repo_id}"
362
+ )
363
+
364
+ for episode in episodes[:5]:
365
+ print(f"Goal: {episode.goal}")
366
+ print(f"Result: {episode.result}")
367
+ print("---")
368
+ ```
369
+
370
+ ## Episode Schema
371
+
372
+ Each episode contains:
373
+
374
+ | Field | Type | Description |
375
+ |-------|------|-------------|
376
+ | `goal` | string | The agent's intended objective |
377
+ | `action` | string | The action taken |
378
+ | `result` | string | The outcome |
379
+ | `reflection` | string | Agent's analysis or learning |
380
+ | `timestamp` | datetime | When the episode was created |
381
+ | `metadata` | object | Additional context |
382
+ | `episode_id` | string | Unique SHA-256 identifier |
383
+
384
+ ## License
385
+
386
+ This dataset is released under the {license.upper()} license.
387
+ """.format(repo_id=repo_id, license=license)
388
+
389
+ # Upload README
390
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
391
+ f.write(card_content)
392
+ temp_path = f.name
393
+
394
+ try:
395
+ result = api.upload_file(
396
+ path_or_fileobj=temp_path,
397
+ path_in_repo="README.md",
398
+ repo_id=repo_id,
399
+ repo_type="dataset",
400
+ commit_message="Create/update dataset card via emk",
401
+ )
402
+ return result
403
+ finally:
404
+ Path(temp_path).unlink(missing_ok=True)
405
+
406
+
407
+ def _get_emk_version() -> str:
408
+ """Get the current emk version."""
409
+ try:
410
+ from emk import __version__
411
+ return __version__
412
+ except ImportError:
413
+ return "unknown"
414
+
415
+
416
+ __all__ = [
417
+ "upload_episodes_to_hub",
418
+ "download_episodes_from_hub",
419
+ "push_experiment_results",
420
+ "create_dataset_card",
421
+ ]
emk/indexer.py ADDED
@@ -0,0 +1,83 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ # Public Preview — basic context/memory management
4
+ """
5
+ Indexer — simple tag-based filtering for episodes.
6
+ """
7
+
8
+ from typing import List, Set, Dict, Any
9
+ import hashlib
10
+ import re
11
+
12
+ from emk.schema import Episode
13
+
14
+
15
+ class Indexer:
16
+ """Tag extraction and simple metadata-based filtering for episodes."""
17
+
18
+ @staticmethod
19
+ def extract_tags(text: str, min_length: int = 3) -> Set[str]:
20
+ """Extract potential search tags from *text*."""
21
+ words = re.findall(r'\b\w+\b', text.lower())
22
+ stop_words = {
23
+ 'the', 'is', 'at', 'which', 'on', 'and', 'a', 'an',
24
+ 'as', 'are', 'was', 'were', 'been', 'be', 'have', 'has',
25
+ 'had', 'do', 'does', 'did', 'will', 'would', 'should',
26
+ 'could', 'may', 'might', 'must', 'can', 'to', 'from',
27
+ 'in', 'out', 'up', 'down', 'for', 'with', 'by', 'of',
28
+ }
29
+ return {w for w in words if len(w) >= min_length and w not in stop_words}
30
+
31
+ @staticmethod
32
+ def generate_episode_tags(episode: Episode) -> List[str]:
33
+ """Generate searchable tags from an episode."""
34
+ combined = f"{episode.goal} {episode.action} {episode.result} {episode.reflection}"
35
+ tags = Indexer.extract_tags(combined)
36
+ for key in episode.metadata.keys():
37
+ tags.add(key.lower())
38
+ return sorted(tags)
39
+
40
+ @staticmethod
41
+ def compute_content_hash(episode: Episode) -> str:
42
+ """Return the content hash (episode_id) of the episode."""
43
+ return episode.episode_id
44
+
45
+ @staticmethod
46
+ def enrich_metadata(episode: Episode, auto_tags: bool = True) -> Dict[str, Any]:
47
+ """Enrich episode metadata with tags and length metrics."""
48
+ enriched = episode.metadata.copy()
49
+ if auto_tags and 'tags' not in enriched:
50
+ enriched['tags'] = Indexer.generate_episode_tags(episode)
51
+ enriched['goal_length'] = len(episode.goal)
52
+ enriched['action_length'] = len(episode.action)
53
+ enriched['result_length'] = len(episode.result)
54
+ enriched['reflection_length'] = len(episode.reflection)
55
+ return enriched
56
+
57
+ @staticmethod
58
+ def create_search_text(episode: Episode) -> str:
59
+ """Create a concatenated search text from an episode."""
60
+ parts = [
61
+ f"Goal: {episode.goal}",
62
+ f"Action: {episode.action}",
63
+ f"Result: {episode.result}",
64
+ f"Reflection: {episode.reflection}",
65
+ ]
66
+ if episode.metadata:
67
+ metadata_str = ", ".join(f"{k}: {v}" for k, v in episode.metadata.items())
68
+ parts.append(f"Context: {metadata_str}")
69
+ return " | ".join(parts)
70
+
71
+ @staticmethod
72
+ def filter_by_tags(
73
+ episodes: List[Episode],
74
+ required_tags: Set[str],
75
+ ) -> List[Episode]:
76
+ """Return episodes whose auto-generated tags include all *required_tags*."""
77
+ required_lower = {t.lower() for t in required_tags}
78
+ results: List[Episode] = []
79
+ for ep in episodes:
80
+ ep_tags = set(Indexer.generate_episode_tags(ep))
81
+ if required_lower.issubset(ep_tags):
82
+ results.append(ep)
83
+ return results
emk/py.typed ADDED
File without changes