agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
caas/hf_utils.py
ADDED
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Hugging Face Hub Utilities for CaaS.
|
|
5
|
+
|
|
6
|
+
This module provides utilities for uploading and downloading datasets,
|
|
7
|
+
experiment results, and model artifacts to/from Hugging Face Hub.
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
Upload experiment results to Hugging Face::
|
|
11
|
+
|
|
12
|
+
from caas.hf_utils import CaaSHubClient
|
|
13
|
+
|
|
14
|
+
client = CaaSHubClient(repo_id="microsoft/caas-benchmark")
|
|
15
|
+
client.upload_experiment_logs(
|
|
16
|
+
results_path="experiments/results.json",
|
|
17
|
+
commit_message="Add benchmark results v0.2.0"
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
Download the benchmark corpus::
|
|
21
|
+
|
|
22
|
+
client = CaaSHubClient(repo_id="microsoft/caas-benchmark")
|
|
23
|
+
corpus_path = client.download_benchmark_corpus()
|
|
24
|
+
print(f"Corpus downloaded to: {corpus_path}")
|
|
25
|
+
|
|
26
|
+
Note:
|
|
27
|
+
Requires the `huggingface_hub` package: ``pip install huggingface_hub``
|
|
28
|
+
|
|
29
|
+
For uploads, you must be authenticated. Run ``huggingface-cli login``
|
|
30
|
+
or set the ``HF_TOKEN`` environment variable.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import json
|
|
36
|
+
import os
|
|
37
|
+
from dataclasses import dataclass, field
|
|
38
|
+
from datetime import datetime, timezone
|
|
39
|
+
from pathlib import Path
|
|
40
|
+
from typing import Any, Dict, List, Optional, Union
|
|
41
|
+
|
|
42
|
+
# Lazy import to avoid hard dependency
|
|
43
|
+
try:
|
|
44
|
+
from huggingface_hub import (
|
|
45
|
+
HfApi,
|
|
46
|
+
hf_hub_download,
|
|
47
|
+
snapshot_download,
|
|
48
|
+
upload_file,
|
|
49
|
+
upload_folder,
|
|
50
|
+
create_repo,
|
|
51
|
+
RepoUrl,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
HF_HUB_AVAILABLE = True
|
|
55
|
+
except ImportError:
|
|
56
|
+
HF_HUB_AVAILABLE = False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
__all__ = [
|
|
60
|
+
"CaaSHubClient",
|
|
61
|
+
"ExperimentMetadata",
|
|
62
|
+
"upload_experiment_logs",
|
|
63
|
+
"download_benchmark_corpus",
|
|
64
|
+
"push_dataset_to_hub",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Default repository IDs
|
|
69
|
+
DEFAULT_BENCHMARK_REPO = "microsoft/caas-benchmark"
|
|
70
|
+
DEFAULT_DATASET_REPO = "microsoft/caas-enterprise-docs"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class ExperimentMetadata:
|
|
75
|
+
"""Metadata for an experiment run.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
experiment_name: Human-readable name for the experiment.
|
|
79
|
+
caas_version: Version of CaaS used.
|
|
80
|
+
timestamp: ISO format timestamp of the experiment.
|
|
81
|
+
python_version: Python version used.
|
|
82
|
+
metrics: Dictionary of metric names to values.
|
|
83
|
+
config: Configuration parameters used.
|
|
84
|
+
tags: List of tags for categorization.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
experiment_name: str
|
|
88
|
+
caas_version: str
|
|
89
|
+
timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
|
90
|
+
python_version: str = ""
|
|
91
|
+
metrics: Dict[str, float] = field(default_factory=dict)
|
|
92
|
+
config: Dict[str, Any] = field(default_factory=dict)
|
|
93
|
+
tags: List[str] = field(default_factory=list)
|
|
94
|
+
|
|
95
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
96
|
+
"""Convert metadata to a dictionary.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Dict containing all metadata fields.
|
|
100
|
+
"""
|
|
101
|
+
return {
|
|
102
|
+
"experiment_name": self.experiment_name,
|
|
103
|
+
"caas_version": self.caas_version,
|
|
104
|
+
"timestamp": self.timestamp,
|
|
105
|
+
"python_version": self.python_version,
|
|
106
|
+
"metrics": self.metrics,
|
|
107
|
+
"config": self.config,
|
|
108
|
+
"tags": self.tags,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class CaaSHubClient:
|
|
113
|
+
"""Client for interacting with Hugging Face Hub for CaaS artifacts.
|
|
114
|
+
|
|
115
|
+
This client provides methods to upload and download datasets,
|
|
116
|
+
experiment results, and benchmark corpora.
|
|
117
|
+
|
|
118
|
+
Attributes:
|
|
119
|
+
repo_id: The Hugging Face repository ID (format: "username/repo-name").
|
|
120
|
+
token: Optional Hugging Face API token. If not provided, uses cached token.
|
|
121
|
+
repo_type: Type of repository ("dataset", "model", or "space").
|
|
122
|
+
|
|
123
|
+
Example:
|
|
124
|
+
Initialize and download benchmark corpus::
|
|
125
|
+
|
|
126
|
+
client = CaaSHubClient(repo_id="microsoft/caas-benchmark")
|
|
127
|
+
corpus_path = client.download_benchmark_corpus()
|
|
128
|
+
|
|
129
|
+
Upload experiment results::
|
|
130
|
+
|
|
131
|
+
client = CaaSHubClient(repo_id="microsoft/caas-results")
|
|
132
|
+
client.upload_experiment_logs("results/experiment_001.json")
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
def __init__(
|
|
136
|
+
self,
|
|
137
|
+
repo_id: str = DEFAULT_BENCHMARK_REPO,
|
|
138
|
+
token: Optional[str] = None,
|
|
139
|
+
repo_type: str = "dataset",
|
|
140
|
+
) -> None:
|
|
141
|
+
"""Initialize the Hugging Face Hub client.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
repo_id: The Hugging Face repository ID.
|
|
145
|
+
token: Optional API token. Falls back to HF_TOKEN env var or cached login.
|
|
146
|
+
repo_type: Type of repository ("dataset", "model", or "space").
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
ImportError: If huggingface_hub is not installed.
|
|
150
|
+
"""
|
|
151
|
+
if not HF_HUB_AVAILABLE:
|
|
152
|
+
raise ImportError(
|
|
153
|
+
"huggingface_hub is required for Hugging Face integration. "
|
|
154
|
+
"Install it with: pip install huggingface_hub"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
self.repo_id = repo_id
|
|
158
|
+
self.token = token or os.environ.get("HF_TOKEN")
|
|
159
|
+
self.repo_type = repo_type
|
|
160
|
+
self._api = HfApi(token=self.token)
|
|
161
|
+
|
|
162
|
+
def download_benchmark_corpus(
|
|
163
|
+
self,
|
|
164
|
+
local_dir: Optional[Union[str, Path]] = None,
|
|
165
|
+
revision: str = "main",
|
|
166
|
+
) -> Path:
|
|
167
|
+
"""Download the CaaS benchmark corpus from Hugging Face.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
local_dir: Local directory to download to. Defaults to cache.
|
|
171
|
+
revision: Git revision (branch, tag, or commit hash).
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Path to the downloaded corpus directory.
|
|
175
|
+
|
|
176
|
+
Example:
|
|
177
|
+
Download to custom directory::
|
|
178
|
+
|
|
179
|
+
client = CaaSHubClient()
|
|
180
|
+
path = client.download_benchmark_corpus(local_dir="./data/corpus")
|
|
181
|
+
"""
|
|
182
|
+
if local_dir:
|
|
183
|
+
local_dir = Path(local_dir)
|
|
184
|
+
local_dir.mkdir(parents=True, exist_ok=True)
|
|
185
|
+
|
|
186
|
+
downloaded_path = snapshot_download(
|
|
187
|
+
repo_id=self.repo_id,
|
|
188
|
+
repo_type=self.repo_type,
|
|
189
|
+
revision=revision,
|
|
190
|
+
local_dir=str(local_dir) if local_dir else None,
|
|
191
|
+
token=self.token,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return Path(downloaded_path)
|
|
195
|
+
|
|
196
|
+
def download_file(
|
|
197
|
+
self,
|
|
198
|
+
filename: str,
|
|
199
|
+
local_dir: Optional[Union[str, Path]] = None,
|
|
200
|
+
revision: str = "main",
|
|
201
|
+
) -> Path:
|
|
202
|
+
"""Download a specific file from the repository.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
filename: Path to the file within the repository.
|
|
206
|
+
local_dir: Local directory to save the file.
|
|
207
|
+
revision: Git revision.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Path to the downloaded file.
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
Download specific results file::
|
|
214
|
+
|
|
215
|
+
client = CaaSHubClient()
|
|
216
|
+
path = client.download_file("results/evaluation_results.json")
|
|
217
|
+
"""
|
|
218
|
+
downloaded_path = hf_hub_download(
|
|
219
|
+
repo_id=self.repo_id,
|
|
220
|
+
filename=filename,
|
|
221
|
+
repo_type=self.repo_type,
|
|
222
|
+
revision=revision,
|
|
223
|
+
local_dir=str(local_dir) if local_dir else None,
|
|
224
|
+
token=self.token,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return Path(downloaded_path)
|
|
228
|
+
|
|
229
|
+
def upload_experiment_logs(
|
|
230
|
+
self,
|
|
231
|
+
results_path: Union[str, Path],
|
|
232
|
+
path_in_repo: Optional[str] = None,
|
|
233
|
+
commit_message: Optional[str] = None,
|
|
234
|
+
metadata: Optional[ExperimentMetadata] = None,
|
|
235
|
+
) -> str:
|
|
236
|
+
"""Upload experiment results to Hugging Face Hub.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
results_path: Local path to the results file (JSON or folder).
|
|
240
|
+
path_in_repo: Path within the repository. Defaults to filename.
|
|
241
|
+
commit_message: Git commit message.
|
|
242
|
+
metadata: Optional experiment metadata to include.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
URL of the uploaded file.
|
|
246
|
+
|
|
247
|
+
Example:
|
|
248
|
+
Upload with metadata::
|
|
249
|
+
|
|
250
|
+
metadata = ExperimentMetadata(
|
|
251
|
+
experiment_name="ablation_study_v1",
|
|
252
|
+
caas_version="0.2.0",
|
|
253
|
+
metrics={"precision_at_5": 0.847}
|
|
254
|
+
)
|
|
255
|
+
url = client.upload_experiment_logs(
|
|
256
|
+
"results.json",
|
|
257
|
+
metadata=metadata
|
|
258
|
+
)
|
|
259
|
+
"""
|
|
260
|
+
results_path = Path(results_path)
|
|
261
|
+
|
|
262
|
+
if path_in_repo is None:
|
|
263
|
+
path_in_repo = f"results/{results_path.name}"
|
|
264
|
+
|
|
265
|
+
if commit_message is None:
|
|
266
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
|
267
|
+
commit_message = f"Upload experiment results: {timestamp}"
|
|
268
|
+
|
|
269
|
+
# If metadata provided, merge it into the results
|
|
270
|
+
if metadata and results_path.suffix == ".json":
|
|
271
|
+
with open(results_path, "r", encoding="utf-8") as f:
|
|
272
|
+
data = json.load(f)
|
|
273
|
+
|
|
274
|
+
data["_metadata"] = metadata.to_dict()
|
|
275
|
+
|
|
276
|
+
# Write to temp file
|
|
277
|
+
temp_path = results_path.parent / f"_upload_{results_path.name}"
|
|
278
|
+
with open(temp_path, "w", encoding="utf-8") as f:
|
|
279
|
+
json.dump(data, f, indent=2)
|
|
280
|
+
|
|
281
|
+
upload_path = temp_path
|
|
282
|
+
else:
|
|
283
|
+
upload_path = results_path
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
url = upload_file(
|
|
287
|
+
path_or_fileobj=str(upload_path),
|
|
288
|
+
path_in_repo=path_in_repo,
|
|
289
|
+
repo_id=self.repo_id,
|
|
290
|
+
repo_type=self.repo_type,
|
|
291
|
+
commit_message=commit_message,
|
|
292
|
+
token=self.token,
|
|
293
|
+
)
|
|
294
|
+
return url
|
|
295
|
+
finally:
|
|
296
|
+
# Clean up temp file
|
|
297
|
+
if metadata and results_path.suffix == ".json":
|
|
298
|
+
temp_path.unlink(missing_ok=True)
|
|
299
|
+
|
|
300
|
+
def upload_folder(
|
|
301
|
+
self,
|
|
302
|
+
folder_path: Union[str, Path],
|
|
303
|
+
path_in_repo: str = "",
|
|
304
|
+
commit_message: Optional[str] = None,
|
|
305
|
+
ignore_patterns: Optional[List[str]] = None,
|
|
306
|
+
) -> str:
|
|
307
|
+
"""Upload a folder to Hugging Face Hub.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
folder_path: Local folder path.
|
|
311
|
+
path_in_repo: Target path within the repository.
|
|
312
|
+
commit_message: Git commit message.
|
|
313
|
+
ignore_patterns: Patterns to ignore (e.g., ["*.pyc", "__pycache__"]).
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
URL of the repository.
|
|
317
|
+
|
|
318
|
+
Example:
|
|
319
|
+
Upload entire results folder::
|
|
320
|
+
|
|
321
|
+
url = client.upload_folder(
|
|
322
|
+
"experiments/results/",
|
|
323
|
+
path_in_repo="benchmark_results/v0.2.0"
|
|
324
|
+
)
|
|
325
|
+
"""
|
|
326
|
+
folder_path = Path(folder_path)
|
|
327
|
+
|
|
328
|
+
if commit_message is None:
|
|
329
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
|
330
|
+
commit_message = f"Upload folder: {folder_path.name} at {timestamp}"
|
|
331
|
+
|
|
332
|
+
if ignore_patterns is None:
|
|
333
|
+
ignore_patterns = ["*.pyc", "__pycache__", ".git", ".DS_Store"]
|
|
334
|
+
|
|
335
|
+
return upload_folder(
|
|
336
|
+
folder_path=str(folder_path),
|
|
337
|
+
path_in_repo=path_in_repo,
|
|
338
|
+
repo_id=self.repo_id,
|
|
339
|
+
repo_type=self.repo_type,
|
|
340
|
+
commit_message=commit_message,
|
|
341
|
+
ignore_patterns=ignore_patterns,
|
|
342
|
+
token=self.token,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
def create_dataset_repo(
|
|
346
|
+
self,
|
|
347
|
+
repo_name: Optional[str] = None,
|
|
348
|
+
private: bool = False,
|
|
349
|
+
exist_ok: bool = True,
|
|
350
|
+
) -> str:
|
|
351
|
+
"""Create a new dataset repository on Hugging Face Hub.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
repo_name: Name for the new repository. Uses self.repo_id if None.
|
|
355
|
+
private: Whether the repository should be private.
|
|
356
|
+
exist_ok: Don't raise error if repo already exists.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
URL of the created repository.
|
|
360
|
+
|
|
361
|
+
Example:
|
|
362
|
+
Create a new private dataset repo::
|
|
363
|
+
|
|
364
|
+
client = CaaSHubClient(repo_id="myuser/my-caas-experiments")
|
|
365
|
+
url = client.create_dataset_repo(private=True)
|
|
366
|
+
"""
|
|
367
|
+
repo_id = repo_name or self.repo_id
|
|
368
|
+
|
|
369
|
+
result: RepoUrl = create_repo(
|
|
370
|
+
repo_id=repo_id,
|
|
371
|
+
repo_type="dataset",
|
|
372
|
+
private=private,
|
|
373
|
+
exist_ok=exist_ok,
|
|
374
|
+
token=self.token,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
return str(result)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
# Convenience functions for quick access
|
|
381
|
+
def upload_experiment_logs(
|
|
382
|
+
results_path: Union[str, Path],
|
|
383
|
+
repo_id: str = DEFAULT_BENCHMARK_REPO,
|
|
384
|
+
commit_message: Optional[str] = None,
|
|
385
|
+
token: Optional[str] = None,
|
|
386
|
+
) -> str:
|
|
387
|
+
"""Upload experiment results to Hugging Face Hub.
|
|
388
|
+
|
|
389
|
+
Convenience function that creates a client and uploads results.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
results_path: Path to the results file.
|
|
393
|
+
repo_id: Target repository ID.
|
|
394
|
+
commit_message: Git commit message.
|
|
395
|
+
token: Optional API token.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
URL of the uploaded file.
|
|
399
|
+
|
|
400
|
+
Example:
|
|
401
|
+
Quick upload::
|
|
402
|
+
|
|
403
|
+
from caas.hf_utils import upload_experiment_logs
|
|
404
|
+
url = upload_experiment_logs("results/eval.json")
|
|
405
|
+
"""
|
|
406
|
+
client = CaaSHubClient(repo_id=repo_id, token=token)
|
|
407
|
+
return client.upload_experiment_logs(
|
|
408
|
+
results_path=results_path,
|
|
409
|
+
commit_message=commit_message,
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def download_benchmark_corpus(
|
|
414
|
+
local_dir: Optional[Union[str, Path]] = None,
|
|
415
|
+
repo_id: str = DEFAULT_BENCHMARK_REPO,
|
|
416
|
+
token: Optional[str] = None,
|
|
417
|
+
) -> Path:
|
|
418
|
+
"""Download the CaaS benchmark corpus.
|
|
419
|
+
|
|
420
|
+
Convenience function for downloading the official benchmark corpus.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
local_dir: Local directory to download to.
|
|
424
|
+
repo_id: Source repository ID.
|
|
425
|
+
token: Optional API token.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
Path to the downloaded corpus.
|
|
429
|
+
|
|
430
|
+
Example:
|
|
431
|
+
Quick download::
|
|
432
|
+
|
|
433
|
+
from caas.hf_utils import download_benchmark_corpus
|
|
434
|
+
corpus_path = download_benchmark_corpus("./data")
|
|
435
|
+
"""
|
|
436
|
+
client = CaaSHubClient(repo_id=repo_id, token=token)
|
|
437
|
+
return client.download_benchmark_corpus(local_dir=local_dir)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def push_dataset_to_hub(
|
|
441
|
+
data_path: Union[str, Path],
|
|
442
|
+
repo_id: str,
|
|
443
|
+
commit_message: Optional[str] = None,
|
|
444
|
+
private: bool = False,
|
|
445
|
+
token: Optional[str] = None,
|
|
446
|
+
) -> str:
|
|
447
|
+
"""Push a dataset folder to Hugging Face Hub.
|
|
448
|
+
|
|
449
|
+
Creates the repository if it doesn't exist and uploads the data.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
data_path: Path to the dataset folder.
|
|
453
|
+
repo_id: Target repository ID (format: "username/dataset-name").
|
|
454
|
+
commit_message: Git commit message.
|
|
455
|
+
private: Whether to create a private repository.
|
|
456
|
+
token: Optional API token.
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
URL of the repository.
|
|
460
|
+
|
|
461
|
+
Example:
|
|
462
|
+
Push local dataset::
|
|
463
|
+
|
|
464
|
+
from caas.hf_utils import push_dataset_to_hub
|
|
465
|
+
url = push_dataset_to_hub(
|
|
466
|
+
data_path="./benchmarks/data/sample_corpus",
|
|
467
|
+
repo_id="myuser/enterprise-docs-benchmark"
|
|
468
|
+
)
|
|
469
|
+
"""
|
|
470
|
+
client = CaaSHubClient(repo_id=repo_id, token=token)
|
|
471
|
+
|
|
472
|
+
# Create repo if needed
|
|
473
|
+
client.create_dataset_repo(private=private, exist_ok=True)
|
|
474
|
+
|
|
475
|
+
# Upload the folder
|
|
476
|
+
return client.upload_folder(
|
|
477
|
+
folder_path=data_path,
|
|
478
|
+
commit_message=commit_message,
|
|
479
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Ingestion module initialization.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from caas.ingestion.processors import (
|
|
8
|
+
BaseProcessor,
|
|
9
|
+
PDFProcessor,
|
|
10
|
+
HTMLProcessor,
|
|
11
|
+
CodeProcessor,
|
|
12
|
+
ProcessorFactory,
|
|
13
|
+
)
|
|
14
|
+
from caas.ingestion.structure_parser import StructureParser
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"BaseProcessor",
|
|
18
|
+
"PDFProcessor",
|
|
19
|
+
"HTMLProcessor",
|
|
20
|
+
"CodeProcessor",
|
|
21
|
+
"ProcessorFactory",
|
|
22
|
+
"StructureParser",
|
|
23
|
+
]
|