mettle-verifier 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mettle/__init__.py +39 -0
- mettle/api_models.py +238 -0
- mettle/app_config.py +22 -0
- mettle/auth.py +34 -0
- mettle/challenge_adapter.py +1252 -0
- mettle/challenger.py +209 -0
- mettle/cli.py +442 -0
- mettle/llm_challenges.py +546 -0
- mettle/models.py +109 -0
- mettle/router.py +551 -0
- mettle/session_manager.py +510 -0
- mettle/signing.py +251 -0
- mettle/solver.py +304 -0
- mettle/vcp.py +273 -0
- mettle/verifier.py +263 -0
- mettle_verifier-0.1.0.dist-info/METADATA +255 -0
- mettle_verifier-0.1.0.dist-info/RECORD +21 -0
- mettle_verifier-0.1.0.dist-info/WHEEL +5 -0
- mettle_verifier-0.1.0.dist-info/entry_points.txt +2 -0
- mettle_verifier-0.1.0.dist-info/licenses/LICENSE +199 -0
- mettle_verifier-0.1.0.dist-info/top_level.txt +1 -0
mettle/__init__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""METTLE: Machine Entity Trustbuilding through Turing-inverse Logic Examination."""
|
|
2
|
+
|
|
3
|
+
from .challenger import generate_challenge, generate_challenge_set
|
|
4
|
+
from .models import (
|
|
5
|
+
BadgeInfo,
|
|
6
|
+
Challenge,
|
|
7
|
+
ChallengeRequest,
|
|
8
|
+
ChallengeResponse,
|
|
9
|
+
ChallengeType,
|
|
10
|
+
Difficulty,
|
|
11
|
+
MettleResult,
|
|
12
|
+
MettleSession,
|
|
13
|
+
VerificationResult,
|
|
14
|
+
)
|
|
15
|
+
from .vcp import VCPTokenClaim, build_mettle_attestation, compute_tier, format_csm1_line, parse_csm1_token
|
|
16
|
+
from .verifier import compute_mettle_result, verify_response
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"BadgeInfo",
|
|
20
|
+
"Challenge",
|
|
21
|
+
"ChallengeRequest",
|
|
22
|
+
"ChallengeResponse",
|
|
23
|
+
"ChallengeType",
|
|
24
|
+
"Difficulty",
|
|
25
|
+
"MettleResult",
|
|
26
|
+
"MettleSession",
|
|
27
|
+
"VCPTokenClaim",
|
|
28
|
+
"VerificationResult",
|
|
29
|
+
"build_mettle_attestation",
|
|
30
|
+
"compute_mettle_result",
|
|
31
|
+
"compute_tier",
|
|
32
|
+
"format_csm1_line",
|
|
33
|
+
"generate_challenge",
|
|
34
|
+
"generate_challenge_set",
|
|
35
|
+
"parse_csm1_token",
|
|
36
|
+
"verify_response",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
__version__ = "0.1.0"
|
mettle/api_models.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Pydantic models for METTLE API.
|
|
2
|
+
|
|
3
|
+
Request/response models for session management, verification, and multi-round challenges.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import enum
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SessionStatus(str, enum.Enum):
|
|
16
|
+
"""Session state machine states."""
|
|
17
|
+
|
|
18
|
+
CREATED = "created"
|
|
19
|
+
CHALLENGES_GENERATED = "challenges_generated"
|
|
20
|
+
IN_PROGRESS = "in_progress"
|
|
21
|
+
COMPLETED = "completed"
|
|
22
|
+
EXPIRED = "expired"
|
|
23
|
+
CANCELLED = "cancelled"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Suite name constants
|
|
27
|
+
SUITE_NAMES = [
|
|
28
|
+
"adversarial",
|
|
29
|
+
"native",
|
|
30
|
+
"self-reference",
|
|
31
|
+
"social",
|
|
32
|
+
"inverse-turing",
|
|
33
|
+
"anti-thrall",
|
|
34
|
+
"agency",
|
|
35
|
+
"counter-coaching",
|
|
36
|
+
"intent-provenance",
|
|
37
|
+
"novel-reasoning",
|
|
38
|
+
"governance", # Suite 11: Governance verification (action gates, constitutional recitation, etc.)
|
|
39
|
+
"llm-dynamic", # Suite 12: Claude-powered dynamic challenges (requires ANTHROPIC_API_KEY)
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
MULTI_ROUND_SUITE = "novel-reasoning"
|
|
43
|
+
GOVERNANCE_SUITE = "governance"
|
|
44
|
+
LLM_DYNAMIC_SUITE = "llm-dynamic"
|
|
45
|
+
SINGLE_SHOT_SUITES = [s for s in SUITE_NAMES if s != MULTI_ROUND_SUITE]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---- Request Models ----
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class OperatorCommitment(BaseModel):
|
|
52
|
+
"""Operator accountability commitment submitted with session creation.
|
|
53
|
+
|
|
54
|
+
The operator signs a commitment accepting accountability for the agent.
|
|
55
|
+
Ed25519 signature is verified server-side before attestation is issued.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
operator_pseudonym: str = Field(description="Operator identifier (can be pseudonymous)")
|
|
59
|
+
operator_public_key: str = Field(description="Ed25519 public key (PEM format)")
|
|
60
|
+
signed_commitment: str = Field(
|
|
61
|
+
description="Base64-encoded Ed25519 signature over: 'I accept accountability for agent {entity_id}'"
|
|
62
|
+
)
|
|
63
|
+
contact_method: str = Field(description="Contact method type: email_hash, platform_handle, legal_entity")
|
|
64
|
+
contact_hash: str = Field(description="SHA-256 of actual contact info (verifiable without revealing)")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class CreateSessionRequest(BaseModel):
|
|
68
|
+
"""Request to start a METTLE verification session."""
|
|
69
|
+
|
|
70
|
+
suites: list[str] = Field(default=["all"], description="Suite names or 'all'")
|
|
71
|
+
difficulty: Literal["easy", "standard", "hard"] = "standard"
|
|
72
|
+
entity_id: str | None = Field(default=None, description="Optional entity identifier")
|
|
73
|
+
vcp_token: str | None = Field(
|
|
74
|
+
default=None,
|
|
75
|
+
description="Optional CSM-1 VCP token for enhanced Suite 9 verification",
|
|
76
|
+
)
|
|
77
|
+
operator_commitment: OperatorCommitment | None = Field(
|
|
78
|
+
default=None,
|
|
79
|
+
description="Optional operator accountability commitment (enables Platinum tier)",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class RoundAnswerRequest(BaseModel):
|
|
84
|
+
"""Submit answers for a multi-round challenge round."""
|
|
85
|
+
|
|
86
|
+
answers: dict[str, Any] = Field(description="Challenge-specific answers")
|
|
87
|
+
submitted_at: datetime | None = Field(default=None, description="Client-side timestamp")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class VerifyRequest(BaseModel):
|
|
91
|
+
"""Submit answers for a single-shot suite."""
|
|
92
|
+
|
|
93
|
+
suite: str = Field(description="Suite name to verify")
|
|
94
|
+
answers: dict[str, Any] = Field(description="Suite-specific answers")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ---- Response Models ----
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class CreateSessionResponse(BaseModel):
|
|
101
|
+
"""Response after creating a verification session."""
|
|
102
|
+
|
|
103
|
+
session_id: str
|
|
104
|
+
created_at: datetime
|
|
105
|
+
expires_at: datetime
|
|
106
|
+
suites: list[str]
|
|
107
|
+
challenges: dict[str, Any] = Field(description="Suite name -> challenge data (no answers)")
|
|
108
|
+
time_budget_ms: int
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class RoundFeedbackResponse(BaseModel):
|
|
112
|
+
"""Feedback after a multi-round answer submission."""
|
|
113
|
+
|
|
114
|
+
round_num: int
|
|
115
|
+
accuracy: float
|
|
116
|
+
errors: list[str]
|
|
117
|
+
feedback: dict[str, Any]
|
|
118
|
+
time_remaining_ms: int
|
|
119
|
+
next_round_data: dict[str, Any] | None = Field(default=None, description="Data for next round; null if final")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class VerifyResponse(BaseModel):
|
|
123
|
+
"""Result of a single-shot suite verification."""
|
|
124
|
+
|
|
125
|
+
suite: str
|
|
126
|
+
passed: bool
|
|
127
|
+
score: float
|
|
128
|
+
details: dict[str, Any]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class GovernanceAttestation(BaseModel):
|
|
132
|
+
"""Attests the governance framework governing an agent.
|
|
133
|
+
|
|
134
|
+
Populated during METTLE verification when the agent provides a VCP token
|
|
135
|
+
containing Creed governance metadata. Enables platforms to distinguish
|
|
136
|
+
between governed and ungoverned agents.
|
|
137
|
+
|
|
138
|
+
Trust tier implications:
|
|
139
|
+
- Platinum requires governance_attestation to be present and verified
|
|
140
|
+
- has_action_gate is the key differentiator for the Rathbun scenario
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
framework: str = Field(description="Governance framework: creed-space, custom, none")
|
|
144
|
+
framework_version: str | None = Field(default=None, description="Framework version (e.g. 2.1.0)")
|
|
145
|
+
constitutional_hash: str | None = Field(
|
|
146
|
+
default=None,
|
|
147
|
+
description="SHA-256 hash of active constitution at verification time",
|
|
148
|
+
)
|
|
149
|
+
has_action_gate: bool = Field(
|
|
150
|
+
default=False,
|
|
151
|
+
description="Whether agent has action-level governance (Public Action Gate or equivalent)",
|
|
152
|
+
)
|
|
153
|
+
has_drift_detection: bool = Field(
|
|
154
|
+
default=False,
|
|
155
|
+
description="Whether constitution drift is monitored at runtime",
|
|
156
|
+
)
|
|
157
|
+
has_bilateral: bool = Field(
|
|
158
|
+
default=False,
|
|
159
|
+
description="Whether bilateral alignment is active",
|
|
160
|
+
)
|
|
161
|
+
verified_at: datetime = Field(description="When governance was verified")
|
|
162
|
+
attestation_signature: str | None = Field(
|
|
163
|
+
default=None,
|
|
164
|
+
description="Ed25519 signature over governance fields",
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class OperatorAttestation(BaseModel):
|
|
169
|
+
"""Cryptographic link from agent to operator.
|
|
170
|
+
|
|
171
|
+
Even pseudonymous operators provide a verifiable accountability chain.
|
|
172
|
+
The contact_hash allows platforms to verify contact info exists without
|
|
173
|
+
revealing it publicly. If the agent causes harm, the platform can request
|
|
174
|
+
the operator reveal themselves by providing the preimage.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
operator_pseudonym: str = Field(description="Operator identifier (can be pseudonymous)")
|
|
178
|
+
operator_public_key: str = Field(description="Ed25519 public key (PEM format)")
|
|
179
|
+
operator_signed_commitment: str = Field(
|
|
180
|
+
description="Operator signs: 'I accept accountability for agent {entity_id}'"
|
|
181
|
+
)
|
|
182
|
+
commitment_timestamp: datetime = Field(description="When commitment was signed")
|
|
183
|
+
contact_method: str = Field(
|
|
184
|
+
description="Contact method type: email_hash, platform_handle, legal_entity"
|
|
185
|
+
)
|
|
186
|
+
contact_hash: str = Field(
|
|
187
|
+
description="SHA-256 of actual contact info (verifiable without revealing)"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class SessionResultResponse(BaseModel):
|
|
192
|
+
"""Final results for a completed session."""
|
|
193
|
+
|
|
194
|
+
session_id: str
|
|
195
|
+
status: str
|
|
196
|
+
suites_completed: list[str]
|
|
197
|
+
results: dict[str, Any]
|
|
198
|
+
overall_passed: bool
|
|
199
|
+
tier: str | None = Field(default=None, description="METTLE verification tier (bronze/silver/gold/platinum)")
|
|
200
|
+
iteration_curve: dict[str, Any] | None = Field(default=None, description="Only for sessions including Suite 10")
|
|
201
|
+
vcp_attestation: dict[str, Any] | None = Field(
|
|
202
|
+
default=None,
|
|
203
|
+
description="VCP-compatible attestation (when include_vcp=true)",
|
|
204
|
+
)
|
|
205
|
+
governance_attestation: GovernanceAttestation | None = Field(
|
|
206
|
+
default=None,
|
|
207
|
+
description="Governance framework attestation (for Platinum tier)",
|
|
208
|
+
)
|
|
209
|
+
operator_attestation: OperatorAttestation | None = Field(
|
|
210
|
+
default=None,
|
|
211
|
+
description="Operator accountability chain (cryptographic link agent -> operator)",
|
|
212
|
+
)
|
|
213
|
+
elapsed_ms: int
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class SuiteInfoResponse(BaseModel):
|
|
217
|
+
"""Information about a single verification suite."""
|
|
218
|
+
|
|
219
|
+
name: str
|
|
220
|
+
display_name: str
|
|
221
|
+
description: str
|
|
222
|
+
suite_number: int
|
|
223
|
+
is_multi_round: bool
|
|
224
|
+
difficulty_levels: list[str]
|
|
225
|
+
available: bool = True
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class SessionStatusResponse(BaseModel):
|
|
229
|
+
"""Current status of a verification session."""
|
|
230
|
+
|
|
231
|
+
session_id: str
|
|
232
|
+
status: SessionStatus
|
|
233
|
+
suites: list[str]
|
|
234
|
+
created_at: datetime
|
|
235
|
+
expires_at: datetime
|
|
236
|
+
current_round: int | None = None
|
|
237
|
+
suites_completed: list[str] = Field(default_factory=list)
|
|
238
|
+
elapsed_ms: int
|
mettle/app_config.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Pydantic Settings for METTLE standalone."""
|
|
2
|
+
|
|
3
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MettleSettings(BaseSettings):
|
|
7
|
+
redis_url: str = "redis://localhost:6379"
|
|
8
|
+
api_keys: str = ""
|
|
9
|
+
dev_mode: bool = False
|
|
10
|
+
cors_origins: str = "*"
|
|
11
|
+
vcp_signing_key: str = ""
|
|
12
|
+
model_config = SettingsConfigDict(
|
|
13
|
+
env_prefix="METTLE_",
|
|
14
|
+
env_file=".env",
|
|
15
|
+
env_file_encoding="utf-8",
|
|
16
|
+
# The project environment can contain many non-METTLE keys.
|
|
17
|
+
# Ignore unknown keys so settings import remains stable.
|
|
18
|
+
extra="ignore",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
settings = MettleSettings()
|
mettle/auth.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Simple API key bearer authentication for METTLE standalone."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from fastapi import Depends, HTTPException, status
|
|
7
|
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
security = HTTPBearer(auto_error=False)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AuthenticatedUser(BaseModel):
|
|
15
|
+
user_id: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def require_authenticated_user(
|
|
19
|
+
credentials: HTTPAuthorizationCredentials | None = Depends(security),
|
|
20
|
+
) -> AuthenticatedUser:
|
|
21
|
+
if credentials is None:
|
|
22
|
+
raise HTTPException(
|
|
23
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
24
|
+
detail="Not authenticated",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
api_key = credentials.credentials
|
|
28
|
+
dev_mode = os.getenv("METTLE_DEV_MODE", "false").lower() == "true"
|
|
29
|
+
valid_keys = os.getenv("METTLE_API_KEYS", "").split(",")
|
|
30
|
+
if dev_mode or api_key in valid_keys:
|
|
31
|
+
return AuthenticatedUser(user_id=f"key:{api_key[:8]}...")
|
|
32
|
+
raise HTTPException(
|
|
33
|
+
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key"
|
|
34
|
+
)
|