agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
caas/hf_utils.py ADDED
@@ -0,0 +1,479 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ """
4
+ Hugging Face Hub Utilities for CaaS.
5
+
6
+ This module provides utilities for uploading and downloading datasets,
7
+ experiment results, and model artifacts to/from Hugging Face Hub.
8
+
9
+ Example:
10
+ Upload experiment results to Hugging Face::
11
+
12
+ from caas.hf_utils import CaaSHubClient
13
+
14
+ client = CaaSHubClient(repo_id="microsoft/caas-benchmark")
15
+ client.upload_experiment_logs(
16
+ results_path="experiments/results.json",
17
+ commit_message="Add benchmark results v0.2.0"
18
+ )
19
+
20
+ Download the benchmark corpus::
21
+
22
+ client = CaaSHubClient(repo_id="microsoft/caas-benchmark")
23
+ corpus_path = client.download_benchmark_corpus()
24
+ print(f"Corpus downloaded to: {corpus_path}")
25
+
26
+ Note:
27
+ Requires the `huggingface_hub` package: ``pip install huggingface_hub``
28
+
29
+ For uploads, you must be authenticated. Run ``huggingface-cli login``
30
+ or set the ``HF_TOKEN`` environment variable.
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ import json
36
+ import os
37
+ from dataclasses import dataclass, field
38
+ from datetime import datetime, timezone
39
+ from pathlib import Path
40
+ from typing import Any, Dict, List, Optional, Union
41
+
42
+ # Lazy import to avoid hard dependency
43
+ try:
44
+ from huggingface_hub import (
45
+ HfApi,
46
+ hf_hub_download,
47
+ snapshot_download,
48
+ upload_file,
49
+ upload_folder,
50
+ create_repo,
51
+ RepoUrl,
52
+ )
53
+
54
+ HF_HUB_AVAILABLE = True
55
+ except ImportError:
56
+ HF_HUB_AVAILABLE = False
57
+
58
+
59
+ __all__ = [
60
+ "CaaSHubClient",
61
+ "ExperimentMetadata",
62
+ "upload_experiment_logs",
63
+ "download_benchmark_corpus",
64
+ "push_dataset_to_hub",
65
+ ]
66
+
67
+
68
+ # Default repository IDs
69
+ DEFAULT_BENCHMARK_REPO = "microsoft/caas-benchmark"
70
+ DEFAULT_DATASET_REPO = "microsoft/caas-enterprise-docs"
71
+
72
+
73
+ @dataclass
74
+ class ExperimentMetadata:
75
+ """Metadata for an experiment run.
76
+
77
+ Attributes:
78
+ experiment_name: Human-readable name for the experiment.
79
+ caas_version: Version of CaaS used.
80
+ timestamp: ISO format timestamp of the experiment.
81
+ python_version: Python version used.
82
+ metrics: Dictionary of metric names to values.
83
+ config: Configuration parameters used.
84
+ tags: List of tags for categorization.
85
+ """
86
+
87
+ experiment_name: str
88
+ caas_version: str
89
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
90
+ python_version: str = ""
91
+ metrics: Dict[str, float] = field(default_factory=dict)
92
+ config: Dict[str, Any] = field(default_factory=dict)
93
+ tags: List[str] = field(default_factory=list)
94
+
95
+ def to_dict(self) -> Dict[str, Any]:
96
+ """Convert metadata to a dictionary.
97
+
98
+ Returns:
99
+ Dict containing all metadata fields.
100
+ """
101
+ return {
102
+ "experiment_name": self.experiment_name,
103
+ "caas_version": self.caas_version,
104
+ "timestamp": self.timestamp,
105
+ "python_version": self.python_version,
106
+ "metrics": self.metrics,
107
+ "config": self.config,
108
+ "tags": self.tags,
109
+ }
110
+
111
+
112
+ class CaaSHubClient:
113
+ """Client for interacting with Hugging Face Hub for CaaS artifacts.
114
+
115
+ This client provides methods to upload and download datasets,
116
+ experiment results, and benchmark corpora.
117
+
118
+ Attributes:
119
+ repo_id: The Hugging Face repository ID (format: "username/repo-name").
120
+ token: Optional Hugging Face API token. If not provided, uses cached token.
121
+ repo_type: Type of repository ("dataset", "model", or "space").
122
+
123
+ Example:
124
+ Initialize and download benchmark corpus::
125
+
126
+ client = CaaSHubClient(repo_id="microsoft/caas-benchmark")
127
+ corpus_path = client.download_benchmark_corpus()
128
+
129
+ Upload experiment results::
130
+
131
+ client = CaaSHubClient(repo_id="microsoft/caas-results")
132
+ client.upload_experiment_logs("results/experiment_001.json")
133
+ """
134
+
135
+ def __init__(
136
+ self,
137
+ repo_id: str = DEFAULT_BENCHMARK_REPO,
138
+ token: Optional[str] = None,
139
+ repo_type: str = "dataset",
140
+ ) -> None:
141
+ """Initialize the Hugging Face Hub client.
142
+
143
+ Args:
144
+ repo_id: The Hugging Face repository ID.
145
+ token: Optional API token. Falls back to HF_TOKEN env var or cached login.
146
+ repo_type: Type of repository ("dataset", "model", or "space").
147
+
148
+ Raises:
149
+ ImportError: If huggingface_hub is not installed.
150
+ """
151
+ if not HF_HUB_AVAILABLE:
152
+ raise ImportError(
153
+ "huggingface_hub is required for Hugging Face integration. "
154
+ "Install it with: pip install huggingface_hub"
155
+ )
156
+
157
+ self.repo_id = repo_id
158
+ self.token = token or os.environ.get("HF_TOKEN")
159
+ self.repo_type = repo_type
160
+ self._api = HfApi(token=self.token)
161
+
162
+ def download_benchmark_corpus(
163
+ self,
164
+ local_dir: Optional[Union[str, Path]] = None,
165
+ revision: str = "main",
166
+ ) -> Path:
167
+ """Download the CaaS benchmark corpus from Hugging Face.
168
+
169
+ Args:
170
+ local_dir: Local directory to download to. Defaults to cache.
171
+ revision: Git revision (branch, tag, or commit hash).
172
+
173
+ Returns:
174
+ Path to the downloaded corpus directory.
175
+
176
+ Example:
177
+ Download to custom directory::
178
+
179
+ client = CaaSHubClient()
180
+ path = client.download_benchmark_corpus(local_dir="./data/corpus")
181
+ """
182
+ if local_dir:
183
+ local_dir = Path(local_dir)
184
+ local_dir.mkdir(parents=True, exist_ok=True)
185
+
186
+ downloaded_path = snapshot_download(
187
+ repo_id=self.repo_id,
188
+ repo_type=self.repo_type,
189
+ revision=revision,
190
+ local_dir=str(local_dir) if local_dir else None,
191
+ token=self.token,
192
+ )
193
+
194
+ return Path(downloaded_path)
195
+
196
+ def download_file(
197
+ self,
198
+ filename: str,
199
+ local_dir: Optional[Union[str, Path]] = None,
200
+ revision: str = "main",
201
+ ) -> Path:
202
+ """Download a specific file from the repository.
203
+
204
+ Args:
205
+ filename: Path to the file within the repository.
206
+ local_dir: Local directory to save the file.
207
+ revision: Git revision.
208
+
209
+ Returns:
210
+ Path to the downloaded file.
211
+
212
+ Example:
213
+ Download specific results file::
214
+
215
+ client = CaaSHubClient()
216
+ path = client.download_file("results/evaluation_results.json")
217
+ """
218
+ downloaded_path = hf_hub_download(
219
+ repo_id=self.repo_id,
220
+ filename=filename,
221
+ repo_type=self.repo_type,
222
+ revision=revision,
223
+ local_dir=str(local_dir) if local_dir else None,
224
+ token=self.token,
225
+ )
226
+
227
+ return Path(downloaded_path)
228
+
229
+ def upload_experiment_logs(
230
+ self,
231
+ results_path: Union[str, Path],
232
+ path_in_repo: Optional[str] = None,
233
+ commit_message: Optional[str] = None,
234
+ metadata: Optional[ExperimentMetadata] = None,
235
+ ) -> str:
236
+ """Upload experiment results to Hugging Face Hub.
237
+
238
+ Args:
239
+ results_path: Local path to the results file (JSON or folder).
240
+ path_in_repo: Path within the repository. Defaults to filename.
241
+ commit_message: Git commit message.
242
+ metadata: Optional experiment metadata to include.
243
+
244
+ Returns:
245
+ URL of the uploaded file.
246
+
247
+ Example:
248
+ Upload with metadata::
249
+
250
+ metadata = ExperimentMetadata(
251
+ experiment_name="ablation_study_v1",
252
+ caas_version="0.2.0",
253
+ metrics={"precision_at_5": 0.847}
254
+ )
255
+ url = client.upload_experiment_logs(
256
+ "results.json",
257
+ metadata=metadata
258
+ )
259
+ """
260
+ results_path = Path(results_path)
261
+
262
+ if path_in_repo is None:
263
+ path_in_repo = f"results/{results_path.name}"
264
+
265
+ if commit_message is None:
266
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
267
+ commit_message = f"Upload experiment results: {timestamp}"
268
+
269
+ # If metadata provided, merge it into the results
270
+ if metadata and results_path.suffix == ".json":
271
+ with open(results_path, "r", encoding="utf-8") as f:
272
+ data = json.load(f)
273
+
274
+ data["_metadata"] = metadata.to_dict()
275
+
276
+ # Write to temp file
277
+ temp_path = results_path.parent / f"_upload_{results_path.name}"
278
+ with open(temp_path, "w", encoding="utf-8") as f:
279
+ json.dump(data, f, indent=2)
280
+
281
+ upload_path = temp_path
282
+ else:
283
+ upload_path = results_path
284
+
285
+ try:
286
+ url = upload_file(
287
+ path_or_fileobj=str(upload_path),
288
+ path_in_repo=path_in_repo,
289
+ repo_id=self.repo_id,
290
+ repo_type=self.repo_type,
291
+ commit_message=commit_message,
292
+ token=self.token,
293
+ )
294
+ return url
295
+ finally:
296
+ # Clean up temp file
297
+ if metadata and results_path.suffix == ".json":
298
+ temp_path.unlink(missing_ok=True)
299
+
300
+ def upload_folder(
301
+ self,
302
+ folder_path: Union[str, Path],
303
+ path_in_repo: str = "",
304
+ commit_message: Optional[str] = None,
305
+ ignore_patterns: Optional[List[str]] = None,
306
+ ) -> str:
307
+ """Upload a folder to Hugging Face Hub.
308
+
309
+ Args:
310
+ folder_path: Local folder path.
311
+ path_in_repo: Target path within the repository.
312
+ commit_message: Git commit message.
313
+ ignore_patterns: Patterns to ignore (e.g., ["*.pyc", "__pycache__"]).
314
+
315
+ Returns:
316
+ URL of the repository.
317
+
318
+ Example:
319
+ Upload entire results folder::
320
+
321
+ url = client.upload_folder(
322
+ "experiments/results/",
323
+ path_in_repo="benchmark_results/v0.2.0"
324
+ )
325
+ """
326
+ folder_path = Path(folder_path)
327
+
328
+ if commit_message is None:
329
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
330
+ commit_message = f"Upload folder: {folder_path.name} at {timestamp}"
331
+
332
+ if ignore_patterns is None:
333
+ ignore_patterns = ["*.pyc", "__pycache__", ".git", ".DS_Store"]
334
+
335
+ return upload_folder(
336
+ folder_path=str(folder_path),
337
+ path_in_repo=path_in_repo,
338
+ repo_id=self.repo_id,
339
+ repo_type=self.repo_type,
340
+ commit_message=commit_message,
341
+ ignore_patterns=ignore_patterns,
342
+ token=self.token,
343
+ )
344
+
345
+ def create_dataset_repo(
346
+ self,
347
+ repo_name: Optional[str] = None,
348
+ private: bool = False,
349
+ exist_ok: bool = True,
350
+ ) -> str:
351
+ """Create a new dataset repository on Hugging Face Hub.
352
+
353
+ Args:
354
+ repo_name: Name for the new repository. Uses self.repo_id if None.
355
+ private: Whether the repository should be private.
356
+ exist_ok: Don't raise error if repo already exists.
357
+
358
+ Returns:
359
+ URL of the created repository.
360
+
361
+ Example:
362
+ Create a new private dataset repo::
363
+
364
+ client = CaaSHubClient(repo_id="myuser/my-caas-experiments")
365
+ url = client.create_dataset_repo(private=True)
366
+ """
367
+ repo_id = repo_name or self.repo_id
368
+
369
+ result: RepoUrl = create_repo(
370
+ repo_id=repo_id,
371
+ repo_type="dataset",
372
+ private=private,
373
+ exist_ok=exist_ok,
374
+ token=self.token,
375
+ )
376
+
377
+ return str(result)
378
+
379
+
380
+ # Convenience functions for quick access
381
+ def upload_experiment_logs(
382
+ results_path: Union[str, Path],
383
+ repo_id: str = DEFAULT_BENCHMARK_REPO,
384
+ commit_message: Optional[str] = None,
385
+ token: Optional[str] = None,
386
+ ) -> str:
387
+ """Upload experiment results to Hugging Face Hub.
388
+
389
+ Convenience function that creates a client and uploads results.
390
+
391
+ Args:
392
+ results_path: Path to the results file.
393
+ repo_id: Target repository ID.
394
+ commit_message: Git commit message.
395
+ token: Optional API token.
396
+
397
+ Returns:
398
+ URL of the uploaded file.
399
+
400
+ Example:
401
+ Quick upload::
402
+
403
+ from caas.hf_utils import upload_experiment_logs
404
+ url = upload_experiment_logs("results/eval.json")
405
+ """
406
+ client = CaaSHubClient(repo_id=repo_id, token=token)
407
+ return client.upload_experiment_logs(
408
+ results_path=results_path,
409
+ commit_message=commit_message,
410
+ )
411
+
412
+
413
+ def download_benchmark_corpus(
414
+ local_dir: Optional[Union[str, Path]] = None,
415
+ repo_id: str = DEFAULT_BENCHMARK_REPO,
416
+ token: Optional[str] = None,
417
+ ) -> Path:
418
+ """Download the CaaS benchmark corpus.
419
+
420
+ Convenience function for downloading the official benchmark corpus.
421
+
422
+ Args:
423
+ local_dir: Local directory to download to.
424
+ repo_id: Source repository ID.
425
+ token: Optional API token.
426
+
427
+ Returns:
428
+ Path to the downloaded corpus.
429
+
430
+ Example:
431
+ Quick download::
432
+
433
+ from caas.hf_utils import download_benchmark_corpus
434
+ corpus_path = download_benchmark_corpus("./data")
435
+ """
436
+ client = CaaSHubClient(repo_id=repo_id, token=token)
437
+ return client.download_benchmark_corpus(local_dir=local_dir)
438
+
439
+
440
+ def push_dataset_to_hub(
441
+ data_path: Union[str, Path],
442
+ repo_id: str,
443
+ commit_message: Optional[str] = None,
444
+ private: bool = False,
445
+ token: Optional[str] = None,
446
+ ) -> str:
447
+ """Push a dataset folder to Hugging Face Hub.
448
+
449
+ Creates the repository if it doesn't exist and uploads the data.
450
+
451
+ Args:
452
+ data_path: Path to the dataset folder.
453
+ repo_id: Target repository ID (format: "username/dataset-name").
454
+ commit_message: Git commit message.
455
+ private: Whether to create a private repository.
456
+ token: Optional API token.
457
+
458
+ Returns:
459
+ URL of the repository.
460
+
461
+ Example:
462
+ Push local dataset::
463
+
464
+ from caas.hf_utils import push_dataset_to_hub
465
+ url = push_dataset_to_hub(
466
+ data_path="./benchmarks/data/sample_corpus",
467
+ repo_id="myuser/enterprise-docs-benchmark"
468
+ )
469
+ """
470
+ client = CaaSHubClient(repo_id=repo_id, token=token)
471
+
472
+ # Create repo if needed
473
+ client.create_dataset_repo(private=private, exist_ok=True)
474
+
475
+ # Upload the folder
476
+ return client.upload_folder(
477
+ folder_path=data_path,
478
+ commit_message=commit_message,
479
+ )
@@ -0,0 +1,23 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ """
4
+ Ingestion module initialization.
5
+ """
6
+
7
+ from caas.ingestion.processors import (
8
+ BaseProcessor,
9
+ PDFProcessor,
10
+ HTMLProcessor,
11
+ CodeProcessor,
12
+ ProcessorFactory,
13
+ )
14
+ from caas.ingestion.structure_parser import StructureParser
15
+
16
+ __all__ = [
17
+ "BaseProcessor",
18
+ "PDFProcessor",
19
+ "HTMLProcessor",
20
+ "CodeProcessor",
21
+ "ProcessorFactory",
22
+ "StructureParser",
23
+ ]