ixentbench 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ixentbench/__init__.py +4 -0
- ixentbench/arena.py +66 -0
- ixentbench/auth.py +85 -0
- ixentbench/cli.py +103 -0
- ixentbench/config.py +80 -0
- ixentbench/play.py +455 -0
- ixentbench-1.0.0.dist-info/METADATA +208 -0
- ixentbench-1.0.0.dist-info/RECORD +11 -0
- ixentbench-1.0.0.dist-info/WHEEL +5 -0
- ixentbench-1.0.0.dist-info/entry_points.txt +2 -0
- ixentbench-1.0.0.dist-info/top_level.txt +1 -0
ixentbench/__init__.py
ADDED
ixentbench/arena.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
ixentbench/arena.py
|
|
4
|
+
Modo Arena — esqueleto completo listo para activar
|
|
5
|
+
cuando ixent-arena esté desplegado en Google Cloud Run.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
import requests
|
|
10
|
+
from ixentbench.auth import get_firebase_token
|
|
11
|
+
from ixentbench.config import IXENT_SDK_URL, validate_env
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_room(mode: str, level: int):
|
|
15
|
+
"""Crea una sala Arena y espera a los demás jugadores."""
|
|
16
|
+
validate_env()
|
|
17
|
+
click.echo(f"\n🎮 Creating Arena room — Mode: {mode.upper()} | Level: {level}")
|
|
18
|
+
|
|
19
|
+
jwt = get_firebase_token()
|
|
20
|
+
headers = {"X-Firebase-Token": jwt, "Content-Type": "application/json"}
|
|
21
|
+
|
|
22
|
+
resp = requests.post(f"{IXENT_SDK_URL}/arena/create", headers=headers, json={
|
|
23
|
+
"mode": mode,
|
|
24
|
+
"level": level,
|
|
25
|
+
}, timeout=30)
|
|
26
|
+
|
|
27
|
+
if not resp.ok:
|
|
28
|
+
click.echo(f"❌ Error creating room: {resp.text}", err=True)
|
|
29
|
+
raise SystemExit(1)
|
|
30
|
+
|
|
31
|
+
data = resp.json()
|
|
32
|
+
room_code = data.get("room_code")
|
|
33
|
+
|
|
34
|
+
click.echo(f"\n✅ Room created!")
|
|
35
|
+
click.echo(f" Code: {room_code}")
|
|
36
|
+
click.echo(f"\n Share this code with your opponents:")
|
|
37
|
+
click.echo(f" ixentbench arena join --room {room_code}\n")
|
|
38
|
+
click.echo("⏳ Waiting for players to join...")
|
|
39
|
+
|
|
40
|
+
# TODO: polling hasta que todos estén listos + arranque automático
|
|
41
|
+
# Se implementa cuando ixent-arena esté desplegado en GCP
|
|
42
|
+
click.echo("\n⚠️ Arena multiplayer — coming soon!")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def join_room(room_code: str):
|
|
46
|
+
"""Se une a una sala Arena existente."""
|
|
47
|
+
validate_env()
|
|
48
|
+
click.echo(f"\n🎮 Joining Arena room: {room_code}")
|
|
49
|
+
|
|
50
|
+
jwt = get_firebase_token()
|
|
51
|
+
headers = {"X-Firebase-Token": jwt, "Content-Type": "application/json"}
|
|
52
|
+
|
|
53
|
+
resp = requests.post(f"{IXENT_SDK_URL}/arena/join", headers=headers, json={
|
|
54
|
+
"room_code": room_code,
|
|
55
|
+
}, timeout=30)
|
|
56
|
+
|
|
57
|
+
if not resp.ok:
|
|
58
|
+
click.echo(f"❌ Error joining room: {resp.text}", err=True)
|
|
59
|
+
raise SystemExit(1)
|
|
60
|
+
|
|
61
|
+
data = resp.json()
|
|
62
|
+
click.echo(f"✅ Joined room {room_code} as {data.get('player_slot', '?')}")
|
|
63
|
+
click.echo("⏳ Waiting for host to start the game...")
|
|
64
|
+
|
|
65
|
+
# TODO: escucha de inicio + game loop Arena
|
|
66
|
+
click.echo("\n⚠️ Arena multiplayer — coming soon!")
|
ixentbench/auth.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
ixentbench/auth.py
|
|
4
|
+
Google OAuth → Firebase JWT.
|
|
5
|
+
Primera ejecución: abre el navegador para Sign in with Google.
|
|
6
|
+
Siguientes ejecuciones: refresco silencioso desde credentials.json
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
import click
|
|
11
|
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
|
12
|
+
from google.oauth2.credentials import Credentials
|
|
13
|
+
from google.auth.transport.requests import Request
|
|
14
|
+
from ixentbench.config import (
|
|
15
|
+
CREDENTIALS, FIREBASE_API_KEY,
|
|
16
|
+
OAUTH_CLIENT_ID, OAUTH_CLIENT_SECRET
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
SCOPES = ["openid", "https://www.googleapis.com/auth/userinfo.email"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_firebase_token() -> str:
|
|
23
|
+
"""
|
|
24
|
+
Devuelve un Firebase ID Token válido (1h).
|
|
25
|
+
Gestiona automáticamente el ciclo de vida de las credenciales.
|
|
26
|
+
"""
|
|
27
|
+
creds = None
|
|
28
|
+
|
|
29
|
+
if CREDENTIALS.exists():
|
|
30
|
+
try:
|
|
31
|
+
creds = Credentials.from_authorized_user_file(str(CREDENTIALS), SCOPES)
|
|
32
|
+
except Exception:
|
|
33
|
+
creds = None # Credenciales corruptas — re-autenticar
|
|
34
|
+
|
|
35
|
+
if not creds or not creds.valid:
|
|
36
|
+
if creds and creds.expired and creds.refresh_token:
|
|
37
|
+
try:
|
|
38
|
+
creds.refresh(Request())
|
|
39
|
+
except Exception:
|
|
40
|
+
creds = None # Token revocado — re-autenticar
|
|
41
|
+
|
|
42
|
+
if not creds:
|
|
43
|
+
flow = InstalledAppFlow.from_client_config(
|
|
44
|
+
{
|
|
45
|
+
"installed": {
|
|
46
|
+
"client_id": OAUTH_CLIENT_ID,
|
|
47
|
+
"client_secret": OAUTH_CLIENT_SECRET,
|
|
48
|
+
"redirect_uris": ["urn:ietf:wg:oauth:2.0:oob", "http://localhost"],
|
|
49
|
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
|
50
|
+
"token_uri": "https://oauth2.googleapis.com/token",
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
SCOPES
|
|
54
|
+
)
|
|
55
|
+
creds = flow.run_local_server(port=0, open_browser=True)
|
|
56
|
+
|
|
57
|
+
CREDENTIALS.parent.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
CREDENTIALS.write_text(creds.to_json())
|
|
59
|
+
|
|
60
|
+
# Intercambiar Google token → Firebase ID token
|
|
61
|
+
url = (
|
|
62
|
+
f"https://identitytoolkit.googleapis.com/v1/"
|
|
63
|
+
f"accounts:signInWithIdp?key={FIREBASE_API_KEY}"
|
|
64
|
+
)
|
|
65
|
+
resp = requests.post(url, json={
|
|
66
|
+
"postBody": f"access_token={creds.token}&providerId=google.com",
|
|
67
|
+
"requestUri": "http://localhost",
|
|
68
|
+
"returnSecureToken": True,
|
|
69
|
+
"returnIdpCredential": True,
|
|
70
|
+
}, timeout=15)
|
|
71
|
+
resp.raise_for_status()
|
|
72
|
+
return resp.json().get("idToken")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def force_login():
|
|
76
|
+
"""Elimina las credenciales guardadas y fuerza re-autenticación."""
|
|
77
|
+
if CREDENTIALS.exists():
|
|
78
|
+
CREDENTIALS.unlink()
|
|
79
|
+
click.echo("🗑️ Credentials cleared.")
|
|
80
|
+
click.echo("🔐 Opening browser for Google Sign-In...")
|
|
81
|
+
token = get_firebase_token()
|
|
82
|
+
if token:
|
|
83
|
+
click.echo("✅ Login successful. Credentials saved.")
|
|
84
|
+
else:
|
|
85
|
+
click.echo("❌ Login failed.", err=True)
|
ixentbench/cli.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
ixentbench/cli.py
|
|
4
|
+
Punto de entrada CLI — comando `ixentbench`
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
from ixentbench import __version__
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.group()
|
|
12
|
+
@click.version_option(version=__version__, prog_name="ixentbench")
|
|
13
|
+
def main():
|
|
14
|
+
"""
|
|
15
|
+
iXentBench — The AI benchmark beyond memorization.
|
|
16
|
+
|
|
17
|
+
Causal spatial reasoning at 4×10⁸⁵ scale.
|
|
18
|
+
Developed by iXentLabs (iXent Games S.L.)
|
|
19
|
+
"""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# =============================================================================
|
|
24
|
+
# ixentbench play
|
|
25
|
+
# =============================================================================
|
|
26
|
+
@main.command()
|
|
27
|
+
@click.option("--session", required=True, help="Session ID from iXentLabs web portal")
|
|
28
|
+
@click.option("--prompt-file", default=None, help="Prompt Injection .txt (added to SYSTEM PROMPT)")
|
|
29
|
+
@click.option("--strategy-file", default=None, help="Strategy .txt (logged for Talent Hub, NOT sent to AI)")
|
|
30
|
+
@click.option("--local-url", default=None, help="Local model URL — e.g. http://localhost:11434/v1")
|
|
31
|
+
@click.option("--agent-script", default=None, help="Custom agent .py script path")
|
|
32
|
+
def play(session, prompt_file, strategy_file, local_url, agent_script):
|
|
33
|
+
"""Run iXentBench Solo (BYOK, Local model, Custom agent or Sponsored)."""
|
|
34
|
+
from ixentbench.play import run_play
|
|
35
|
+
run_play(
|
|
36
|
+
session_id = session,
|
|
37
|
+
prompt_file = prompt_file,
|
|
38
|
+
strategy_file = strategy_file,
|
|
39
|
+
local_url = local_url,
|
|
40
|
+
agent_script = agent_script,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# =============================================================================
|
|
45
|
+
# ixentbench arena
|
|
46
|
+
# =============================================================================
|
|
47
|
+
@main.group()
|
|
48
|
+
def arena():
|
|
49
|
+
"""iXentBench Arena — Multiplayer mode (1v1, 2v2, 4v4, Human vs AI)."""
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@arena.command("create")
|
|
54
|
+
@click.option("--mode", default="1v1",
|
|
55
|
+
type=click.Choice(["1v1", "2v2", "4v4", "human"], case_sensitive=False),
|
|
56
|
+
help="Game mode")
|
|
57
|
+
@click.option("--level", default=1, type=click.IntRange(1, 4), help="Level (1-4)")
|
|
58
|
+
def arena_create(mode, level):
|
|
59
|
+
"""Create a new Arena room. You become the host."""
|
|
60
|
+
from ixentbench.arena import create_room
|
|
61
|
+
create_room(mode=mode, level=level)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@arena.command("join")
|
|
65
|
+
@click.option("--room", required=True, help="Room code — e.g. IXENT-4X7K")
|
|
66
|
+
def arena_join(room):
|
|
67
|
+
"""Join an existing Arena room."""
|
|
68
|
+
from ixentbench.arena import join_room
|
|
69
|
+
join_room(room_code=room)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# =============================================================================
|
|
73
|
+
# ixentbench login / status / prompts
|
|
74
|
+
# =============================================================================
|
|
75
|
+
@main.command()
|
|
76
|
+
def login():
|
|
77
|
+
"""Force Google re-authentication."""
|
|
78
|
+
from ixentbench.auth import force_login
|
|
79
|
+
force_login()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@main.command()
|
|
83
|
+
def status():
|
|
84
|
+
"""Show current credentials and session status."""
|
|
85
|
+
from ixentbench.config import show_status
|
|
86
|
+
show_status()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@main.group()
|
|
90
|
+
def prompts():
|
|
91
|
+
"""Manage your saved Prompt Injection files."""
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@prompts.command("list")
|
|
96
|
+
def prompts_list():
|
|
97
|
+
"""List all saved prompt injection files."""
|
|
98
|
+
from ixentbench.config import list_prompts
|
|
99
|
+
list_prompts()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
if __name__ == "__main__":
|
|
103
|
+
main()
|
ixentbench/config.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
ixentbench/config.py
|
|
4
|
+
Constantes, carga del .env y utilidades de estado.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import pathlib
|
|
9
|
+
import click
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
|
|
12
|
+
# ── Directorios locales ────────────────────────────────────────────────────
|
|
13
|
+
IXENT_HOME = pathlib.Path.home() / "ixentbench"
|
|
14
|
+
PROMPTS_DIR = IXENT_HOME / "prompts"
|
|
15
|
+
STRATEGIES_DIR = IXENT_HOME / "strategies"
|
|
16
|
+
CREDENTIALS = IXENT_HOME / "credentials.json"
|
|
17
|
+
|
|
18
|
+
for _d in [PROMPTS_DIR, STRATEGIES_DIR]:
|
|
19
|
+
_d.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
|
|
21
|
+
# ── Credenciales de iXentLabs — hardcodeadas en el paquete ────────────────
|
|
22
|
+
# Estas son credenciales PÚBLICAS de tipo "Desktop App" en Google Cloud.
|
|
23
|
+
# El CLIENT_ID y FIREBASE_API_KEY son identificadores de la aplicación,
|
|
24
|
+
# no secretos del usuario. El OAUTH_CLIENT_SECRET de tipo Desktop App
|
|
25
|
+
# es seguro hardcodear según el estándar OAuth 2.0 (RFC 8252).
|
|
26
|
+
# El usuario NUNCA necesita configurar estos valores.
|
|
27
|
+
FIREBASE_API_KEY = "AIzaSyDXEL47mO93BA8W1TTXHHdWOIK4tFUnamo"
|
|
28
|
+
OAUTH_CLIENT_ID = "874064710861-dds7gf6e1qinl8c4lurs47od9cc5b0ll.apps.googleusercontent.com"
|
|
29
|
+
OAUTH_CLIENT_SECRET = "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"
|
|
30
|
+
|
|
31
|
+
# ── Cargar .env — solo secretos BYOK del usuario ──────────────────────────
|
|
32
|
+
load_dotenv()
|
|
33
|
+
|
|
34
|
+
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
|
35
|
+
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
|
|
36
|
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
37
|
+
LOCAL_MODEL_URL = os.getenv("LOCAL_MODEL_URL")
|
|
38
|
+
IXENT_SDK_URL = os.getenv("IXENT_SDK_URL",
|
|
39
|
+
"https://ixent-solo-sdk-874064710861.us-central1.run.app")
|
|
40
|
+
VISUALIZER_URL = os.getenv("VISUALIZER_URL",
|
|
41
|
+
"https://project-8b3bc726-e144-49eb-a7e.web.app")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def validate_env():
|
|
45
|
+
"""Valida que las credenciales BYOK mínimas estén presentes."""
|
|
46
|
+
# Las credenciales de iXentLabs están hardcodeadas — no hay nada que validar.
|
|
47
|
+
# Solo verificamos que el usuario haya puesto al menos una API Key.
|
|
48
|
+
if not any([GOOGLE_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY, LOCAL_MODEL_URL]):
|
|
49
|
+
click.echo("❌ No API Key found in .env file.", err=True)
|
|
50
|
+
click.echo(" Add at least one of: GOOGLE_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY", err=True)
|
|
51
|
+
click.echo(" Or set LOCAL_MODEL_URL for a local model.", err=True)
|
|
52
|
+
raise SystemExit(1)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def show_status():
|
|
56
|
+
"""Muestra el estado actual de credenciales y sesión."""
|
|
57
|
+
click.echo("\n📊 iXentBench Status")
|
|
58
|
+
click.echo("─" * 40)
|
|
59
|
+
click.echo(f" iXentLabs Auth: ✅ Integrated")
|
|
60
|
+
click.echo(f" Google API Key: {'✅ Set' if GOOGLE_API_KEY else '— Not set'}")
|
|
61
|
+
click.echo(f" Anthropic API Key: {'✅ Set' if ANTHROPIC_API_KEY else '— Not set'}")
|
|
62
|
+
click.echo(f" OpenAI API Key: {'✅ Set' if OPENAI_API_KEY else '— Not set'}")
|
|
63
|
+
click.echo(f" Local Model URL: {LOCAL_MODEL_URL or '— Not set'}")
|
|
64
|
+
click.echo(f" Credentials: {'✅ Saved' if CREDENTIALS.exists() else '⚠️ Run: ixentbench login'}")
|
|
65
|
+
click.echo(f" SDK URL: {IXENT_SDK_URL}")
|
|
66
|
+
click.echo(f" Prompts saved: {len(list(PROMPTS_DIR.glob('*.txt')))}")
|
|
67
|
+
click.echo(f" Strategies saved: {len(list(STRATEGIES_DIR.glob('*.txt')))}")
|
|
68
|
+
click.echo()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def list_prompts():
|
|
72
|
+
"""Lista los prompts guardados localmente."""
|
|
73
|
+
files = list(PROMPTS_DIR.glob("*.txt"))
|
|
74
|
+
if not files:
|
|
75
|
+
click.echo("📭 No saved prompts. Use --prompt-file to load one.")
|
|
76
|
+
return
|
|
77
|
+
click.echo(f"\n📄 Saved Prompt Injections ({len(files)}):")
|
|
78
|
+
for f in sorted(files):
|
|
79
|
+
click.echo(f" → {f.name} ({f.stat().st_size} bytes)")
|
|
80
|
+
click.echo(f"\n Folder: {PROMPTS_DIR}\n")
|
ixentbench/play.py
ADDED
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
ixentbench/play.py
|
|
4
|
+
Modo Solo — BYOK, Modelo Local, Agente Custom y Sponsored.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import time
|
|
9
|
+
import hashlib
|
|
10
|
+
import pathlib
|
|
11
|
+
import subprocess
|
|
12
|
+
import webbrowser
|
|
13
|
+
import sys
|
|
14
|
+
import click
|
|
15
|
+
import requests
|
|
16
|
+
from ixentbench.auth import get_firebase_token
|
|
17
|
+
from ixentbench.config import (
|
|
18
|
+
GOOGLE_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY,
|
|
19
|
+
LOCAL_MODEL_URL, IXENT_SDK_URL, VISUALIZER_URL,
|
|
20
|
+
PROMPTS_DIR, STRATEGIES_DIR, validate_env
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# =============================================================================
|
|
25
|
+
# HELPER: Carga de archivos opcionales
|
|
26
|
+
# =============================================================================
|
|
27
|
+
|
|
28
|
+
def _load_text_file(path: str, label: str, save_dir: pathlib.Path) -> str:
|
|
29
|
+
p = pathlib.Path(path)
|
|
30
|
+
if not p.exists():
|
|
31
|
+
click.echo(f"❌ File not found: {path}", err=True)
|
|
32
|
+
click.echo(f" Tip: Your saved {label} files are in {save_dir}", err=True)
|
|
33
|
+
raise SystemExit(1)
|
|
34
|
+
content = p.read_text(encoding="utf-8").strip()
|
|
35
|
+
click.echo(f"📄 {label} loaded: {p.name} ({len(content)} chars)")
|
|
36
|
+
saved = save_dir / p.name
|
|
37
|
+
if not saved.exists():
|
|
38
|
+
saved.write_text(content, encoding="utf-8")
|
|
39
|
+
click.echo(f" 💾 Saved to: {saved}")
|
|
40
|
+
return content
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# =============================================================================
|
|
44
|
+
# HELPER: Limpiar bloques markdown del JSON
|
|
45
|
+
# =============================================================================
|
|
46
|
+
|
|
47
|
+
def _strip_markdown(raw: str) -> str:
|
|
48
|
+
if raw.startswith("```"):
|
|
49
|
+
lines = raw.split("\n")
|
|
50
|
+
if lines[0].startswith("```"): lines = lines[1:]
|
|
51
|
+
if lines[-1].startswith("```"): lines = lines[:-1]
|
|
52
|
+
raw = "\n".join(lines)
|
|
53
|
+
return raw.strip()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# =============================================================================
|
|
57
|
+
# MOTOR A: BYOK — API Key local del usuario
|
|
58
|
+
# Soporta Gemini (Google), Claude (Anthropic) y GPT (OpenAI)
|
|
59
|
+
# =============================================================================
|
|
60
|
+
|
|
61
|
+
def _get_move_byok(model_name: str, system_prompt: str,
|
|
62
|
+
state: dict, session: dict) -> tuple:
|
|
63
|
+
context_json = json.dumps({
|
|
64
|
+
"meta": state["meta"],
|
|
65
|
+
"inventory": state["data"]["inventory"],
|
|
66
|
+
"mice": state["data"]["mice"],
|
|
67
|
+
"board_encoding": state["data"]["board_encoding"],
|
|
68
|
+
"history_full": state["data"]["history"],
|
|
69
|
+
}, ensure_ascii=False)
|
|
70
|
+
|
|
71
|
+
user_msg = (
|
|
72
|
+
f"--- CURRENT SITUATION (TURN {state['meta']['turn']}) ---\n"
|
|
73
|
+
f"{context_json}\n"
|
|
74
|
+
f"TASK: Generate JSON response with your best move."
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
provider = session.get("provider", "google")
|
|
78
|
+
t0 = time.time()
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
if provider == "google":
|
|
82
|
+
from google import genai
|
|
83
|
+
from google.genai import types
|
|
84
|
+
client = genai.Client(api_key=GOOGLE_API_KEY)
|
|
85
|
+
response = client.models.generate_content(
|
|
86
|
+
model = model_name,
|
|
87
|
+
contents = [system_prompt + "\n\n" + user_msg],
|
|
88
|
+
config = types.GenerateContentConfig(
|
|
89
|
+
response_mime_type="application/json"
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
raw = response.text.strip()
|
|
93
|
+
usage = response.usage_metadata
|
|
94
|
+
in_t = getattr(usage, "prompt_token_count", 0) or 0
|
|
95
|
+
out_t = getattr(usage, "candidates_token_count", 0) or 0
|
|
96
|
+
tot_t = getattr(usage, "total_token_count", 0) or 0
|
|
97
|
+
think_t = max(0, tot_t - in_t - out_t)
|
|
98
|
+
|
|
99
|
+
elif provider == "anthropic":
|
|
100
|
+
import anthropic
|
|
101
|
+
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
|
|
102
|
+
msg = client.messages.create(
|
|
103
|
+
model = model_name,
|
|
104
|
+
max_tokens = 4096,
|
|
105
|
+
system = system_prompt,
|
|
106
|
+
messages = [{"role": "user", "content": user_msg}],
|
|
107
|
+
)
|
|
108
|
+
raw = msg.content[0].text.strip()
|
|
109
|
+
in_t = msg.usage.input_tokens
|
|
110
|
+
out_t = msg.usage.output_tokens
|
|
111
|
+
tot_t = in_t + out_t
|
|
112
|
+
think_t = 0
|
|
113
|
+
|
|
114
|
+
elif provider == "openai":
|
|
115
|
+
from openai import OpenAI
|
|
116
|
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
|
117
|
+
resp = client.chat.completions.create(
|
|
118
|
+
model = model_name,
|
|
119
|
+
messages = [
|
|
120
|
+
{"role": "system", "content": system_prompt},
|
|
121
|
+
{"role": "user", "content": user_msg},
|
|
122
|
+
],
|
|
123
|
+
response_format={"type": "json_object"},
|
|
124
|
+
)
|
|
125
|
+
raw = resp.choices[0].message.content.strip()
|
|
126
|
+
in_t = resp.usage.prompt_tokens
|
|
127
|
+
out_t = resp.usage.completion_tokens
|
|
128
|
+
tot_t = resp.usage.total_tokens
|
|
129
|
+
think_t = 0
|
|
130
|
+
|
|
131
|
+
else:
|
|
132
|
+
click.echo(f"❌ Unknown provider: {provider}", err=True)
|
|
133
|
+
return None, "Unknown provider", None
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
click.echo(f"\n🛑 CRITICAL AI ERROR: {type(e).__name__}: {e}", err=True)
|
|
137
|
+
raise SystemExit(1)
|
|
138
|
+
|
|
139
|
+
inference_sec = round(time.time() - t0, 2)
|
|
140
|
+
gen_t = out_t + think_t
|
|
141
|
+
tps = round(gen_t / inference_sec, 2) if inference_sec > 0 else 0
|
|
142
|
+
click.echo(f" ⏱️ {inference_sec}s | {tps} TPS")
|
|
143
|
+
|
|
144
|
+
token_data = {
|
|
145
|
+
"input_tokens": in_t, "output_tokens": out_t,
|
|
146
|
+
"thinking_tokens": think_t, "total": tot_t,
|
|
147
|
+
"turn_inference_sec": inference_sec, "turn_tps": tps,
|
|
148
|
+
"is_local_hardware": False,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
raw = _strip_markdown(raw)
|
|
152
|
+
try:
|
|
153
|
+
decision = json.loads(raw)
|
|
154
|
+
return decision.get("command"), decision.get("reasoning"), token_data
|
|
155
|
+
except json.JSONDecodeError as e:
|
|
156
|
+
click.echo(f"⚠️ JSON parse error: {e}")
|
|
157
|
+
return None, "Invalid Format", None
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# =============================================================================
|
|
161
|
+
# MOTOR B: MODELO LOCAL — Ollama, LM Studio, llama.cpp...
|
|
162
|
+
# Compatible con cualquier servidor que implemente la API de OpenAI
|
|
163
|
+
# No requiere API Key. is_local_hardware=True excluye TPS del leaderboard
|
|
164
|
+
# =============================================================================
|
|
165
|
+
|
|
166
|
+
def _get_move_local(model_name: str, system_prompt: str,
|
|
167
|
+
state: dict, local_url: str) -> tuple:
|
|
168
|
+
context_json = json.dumps({
|
|
169
|
+
"meta": state["meta"],
|
|
170
|
+
"inventory": state["data"]["inventory"],
|
|
171
|
+
"mice": state["data"]["mice"],
|
|
172
|
+
"board_encoding": state["data"]["board_encoding"],
|
|
173
|
+
"history_full": state["data"]["history"],
|
|
174
|
+
}, ensure_ascii=False)
|
|
175
|
+
|
|
176
|
+
user_msg = (
|
|
177
|
+
f"--- CURRENT SITUATION (TURN {state['meta']['turn']}) ---\n"
|
|
178
|
+
f"{context_json}\n"
|
|
179
|
+
f"TASK: Generate JSON response with your best move."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
t0 = time.time()
|
|
183
|
+
try:
|
|
184
|
+
resp = requests.post(
|
|
185
|
+
f"{local_url}/chat/completions",
|
|
186
|
+
json={
|
|
187
|
+
"model": model_name,
|
|
188
|
+
"messages": [
|
|
189
|
+
{"role": "system", "content": system_prompt},
|
|
190
|
+
{"role": "user", "content": user_msg},
|
|
191
|
+
],
|
|
192
|
+
"response_format": {"type": "json_object"},
|
|
193
|
+
},
|
|
194
|
+
timeout=610
|
|
195
|
+
)
|
|
196
|
+
resp.raise_for_status()
|
|
197
|
+
data = resp.json()
|
|
198
|
+
raw = data["choices"][0]["message"]["content"].strip()
|
|
199
|
+
in_t = data.get("usage", {}).get("prompt_tokens", 0)
|
|
200
|
+
out_t = data.get("usage", {}).get("completion_tokens", 0)
|
|
201
|
+
tot_t = data.get("usage", {}).get("total_tokens", 0)
|
|
202
|
+
think_t = 0
|
|
203
|
+
except Exception as e:
|
|
204
|
+
click.echo(f"⚠️ Local model error: {e}")
|
|
205
|
+
return None, "Local model error", None
|
|
206
|
+
|
|
207
|
+
inference_sec = round(time.time() - t0, 2)
|
|
208
|
+
tps = round(out_t / inference_sec, 2) if inference_sec > 0 else 0
|
|
209
|
+
click.echo(f" ⏱️ {inference_sec}s | {tps} TPS (local — excluded from leaderboard)")
|
|
210
|
+
|
|
211
|
+
token_data = {
|
|
212
|
+
"input_tokens": in_t, "output_tokens": out_t,
|
|
213
|
+
"thinking_tokens": think_t, "total": tot_t,
|
|
214
|
+
"turn_inference_sec": inference_sec, "turn_tps": tps,
|
|
215
|
+
"is_local_hardware": True,
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
raw = _strip_markdown(raw)
|
|
219
|
+
try:
|
|
220
|
+
decision = json.loads(raw)
|
|
221
|
+
return decision.get("command"), decision.get("reasoning"), token_data
|
|
222
|
+
except json.JSONDecodeError as e:
|
|
223
|
+
click.echo(f"⚠️ JSON parse error: {e}")
|
|
224
|
+
return None, "Invalid Format", None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# =============================================================================
|
|
228
|
+
# MOTOR C: AGENTE CUSTOM — Script Python del usuario
|
|
229
|
+
# El script recibe el estado por stdin (JSON) y devuelve la jugada
|
|
230
|
+
# por stdout (JSON): {"command": "...", "reasoning": "..."}
|
|
231
|
+
# No requiere API Key ni conexión a internet
|
|
232
|
+
# =============================================================================
|
|
233
|
+
|
|
234
|
+
def _get_move_agent(agent_script: str, state: dict) -> tuple:
|
|
235
|
+
state_json = json.dumps(state, ensure_ascii=False)
|
|
236
|
+
t0 = time.time()
|
|
237
|
+
try:
|
|
238
|
+
result = subprocess.run(
|
|
239
|
+
[sys.executable, agent_script],
|
|
240
|
+
input = state_json,
|
|
241
|
+
capture_output = True,
|
|
242
|
+
text = True,
|
|
243
|
+
timeout = 120
|
|
244
|
+
)
|
|
245
|
+
if result.returncode != 0:
|
|
246
|
+
click.echo(f"⚠️ Agent script error:\n{result.stderr[:300]}")
|
|
247
|
+
return None, "Agent error", None
|
|
248
|
+
|
|
249
|
+
raw = result.stdout.strip()
|
|
250
|
+
inference_sec = round(time.time() - t0, 2)
|
|
251
|
+
click.echo(f" ⏱️ Agent: {inference_sec}s (local script)")
|
|
252
|
+
|
|
253
|
+
token_data = {
|
|
254
|
+
"input_tokens": 0, "output_tokens": 0,
|
|
255
|
+
"thinking_tokens": 0, "total": 0,
|
|
256
|
+
"turn_inference_sec": inference_sec, "turn_tps": 0,
|
|
257
|
+
"is_local_hardware": True,
|
|
258
|
+
}
|
|
259
|
+
decision = json.loads(raw)
|
|
260
|
+
return decision.get("command"), decision.get("reasoning"), token_data
|
|
261
|
+
|
|
262
|
+
except subprocess.TimeoutExpired:
|
|
263
|
+
click.echo("⚠️ Agent script timed out (120s)")
|
|
264
|
+
return None, "Timeout", None
|
|
265
|
+
except json.JSONDecodeError as e:
|
|
266
|
+
click.echo(f"⚠️ Agent JSON parse error: {e}")
|
|
267
|
+
return None, "Invalid Format", None
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# =============================================================================
|
|
271
|
+
# FUNCIÓN PRINCIPAL
|
|
272
|
+
# =============================================================================
|
|
273
|
+
|
|
274
|
+
def run_play(session_id: str, prompt_file: str | None,
|
|
275
|
+
strategy_file: str | None, local_url: str | None,
|
|
276
|
+
agent_script: str | None):
|
|
277
|
+
|
|
278
|
+
validate_env()
|
|
279
|
+
|
|
280
|
+
# ── Validaciones de flags ─────────────────────────────────────────────────
|
|
281
|
+
if prompt_file and strategy_file:
|
|
282
|
+
click.echo("❌ --prompt-file and --strategy-file are mutually exclusive.", err=True)
|
|
283
|
+
raise SystemExit(1)
|
|
284
|
+
if agent_script and (local_url or prompt_file):
|
|
285
|
+
click.echo("❌ --agent-script cannot be combined with --local-url or --prompt-file.", err=True)
|
|
286
|
+
raise SystemExit(1)
|
|
287
|
+
|
|
288
|
+
# ── Cargar archivos opcionales ────────────────────────────────────────────
|
|
289
|
+
prompt_text = _load_text_file(prompt_file, "Prompt Injection", PROMPTS_DIR) if prompt_file else None
|
|
290
|
+
strategy_text = _load_text_file(strategy_file, "Strategy", STRATEGIES_DIR) if strategy_file else None
|
|
291
|
+
prompt_hash = hashlib.sha256(prompt_text.encode()).hexdigest() if prompt_text else None
|
|
292
|
+
|
|
293
|
+
# ── Autenticación ─────────────────────────────────────────────────────────
|
|
294
|
+
click.echo("\n🔐 Authenticating with iXentLabs...")
|
|
295
|
+
jwt = get_firebase_token()
|
|
296
|
+
headers = {"X-Firebase-Token": jwt, "Content-Type": "application/json"}
|
|
297
|
+
click.echo("✅ Login successful.")
|
|
298
|
+
|
|
299
|
+
# ── Iniciar partida ───────────────────────────────────────────────────────
|
|
300
|
+
click.echo(f"\n🎮 Validating session {session_id}...")
|
|
301
|
+
resp = requests.post(f"{IXENT_SDK_URL}/play", headers=headers, json={
|
|
302
|
+
"session_id": session_id,
|
|
303
|
+
"prompt_injection": prompt_text,
|
|
304
|
+
"prompt_hash": prompt_hash,
|
|
305
|
+
"agent_description": strategy_text,
|
|
306
|
+
"is_local_model": bool(local_url or agent_script),
|
|
307
|
+
"is_custom_agent": bool(agent_script),
|
|
308
|
+
}, timeout=30)
|
|
309
|
+
|
|
310
|
+
if not resp.ok:
|
|
311
|
+
click.echo(f"❌ Server error: {resp.text}", err=True)
|
|
312
|
+
raise SystemExit(1)
|
|
313
|
+
|
|
314
|
+
game_data = resp.json()
|
|
315
|
+
if not game_data.get("success"):
|
|
316
|
+
click.echo(f"❌ {game_data.get('msg')}", err=True)
|
|
317
|
+
raise SystemExit(1)
|
|
318
|
+
|
|
319
|
+
game_player_id = game_data["game_player_id"]
|
|
320
|
+
current_state = game_data["state"]
|
|
321
|
+
motor_url = game_data["motor_url"]
|
|
322
|
+
model_name = game_data.get("model", "AI Agent")
|
|
323
|
+
session_info = game_data.get("session", {})
|
|
324
|
+
system_prompt = game_data.get("system_prompt", "")
|
|
325
|
+
is_sponsored = game_data.get("is_sponsored", False)
|
|
326
|
+
max_turns = current_state.get("meta", {}).get("max_moves", 200) + 20
|
|
327
|
+
|
|
328
|
+
click.echo(f"✅ Game started — ID: {game_player_id}")
|
|
329
|
+
click.echo(f" Model: {model_name} | Level: {game_data.get('level', '?')}")
|
|
330
|
+
if is_sponsored:
|
|
331
|
+
click.echo(" 💳 Mode: iXentLabs Sponsored")
|
|
332
|
+
elif agent_script:
|
|
333
|
+
click.echo(f" 🤖 Mode: Custom Agent ({agent_script})")
|
|
334
|
+
elif local_url:
|
|
335
|
+
click.echo(f" 💻 Mode: Local Model ({local_url})")
|
|
336
|
+
else:
|
|
337
|
+
click.echo(f" 🔑 Mode: BYOK ({session_info.get('provider', 'google').capitalize()})")
|
|
338
|
+
|
|
339
|
+
# ── Abrir visualizador ────────────────────────────────────────────────────
|
|
340
|
+
game_id = current_state.get("meta", {}).get("live_game_id", "")
|
|
341
|
+
vis_url = f"{VISUALIZER_URL}?game_id={game_id}" if game_id else VISUALIZER_URL
|
|
342
|
+
webbrowser.open(vis_url)
|
|
343
|
+
click.echo(f"🌐 Visualizer: {vis_url}")
|
|
344
|
+
|
|
345
|
+
# ── Acumuladores de sesión ────────────────────────────────────────────────
|
|
346
|
+
s_tokens = s_input = s_output = s_thinking = s_secs = s_gen = 0
|
|
347
|
+
|
|
348
|
+
# ── Game loop ─────────────────────────────────────────────────────────────
|
|
349
|
+
click.echo("\n🚀 Game starting...\n")
|
|
350
|
+
turn = 0
|
|
351
|
+
entropy_shown = False
|
|
352
|
+
|
|
353
|
+
while turn < max_turns:
|
|
354
|
+
turn += 1
|
|
355
|
+
click.echo(f"\n🧠 [TURN {turn}] {model_name} thinking...")
|
|
356
|
+
|
|
357
|
+
# Seleccionar motor
|
|
358
|
+
if agent_script:
|
|
359
|
+
cmd, reasoning, td = _get_move_agent(agent_script, current_state)
|
|
360
|
+
elif local_url:
|
|
361
|
+
cmd, reasoning, td = _get_move_local(model_name, system_prompt,
|
|
362
|
+
current_state, local_url)
|
|
363
|
+
else:
|
|
364
|
+
# BYOK o Sponsored — en sponsored el servidor devuelve la jugada
|
|
365
|
+
cmd, reasoning, td = _get_move_byok(model_name, system_prompt,
|
|
366
|
+
current_state, session_info)
|
|
367
|
+
|
|
368
|
+
if not cmd:
|
|
369
|
+
click.echo("⚠️ No valid move. Retrying...")
|
|
370
|
+
time.sleep(2)
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
# Acumular métricas
|
|
374
|
+
if td:
|
|
375
|
+
s_tokens += td["total"]
|
|
376
|
+
s_input += td.get("input_tokens", 0)
|
|
377
|
+
s_output += td.get("output_tokens", 0)
|
|
378
|
+
s_thinking += td.get("thinking_tokens", 0)
|
|
379
|
+
s_secs += td.get("turn_inference_sec", 0)
|
|
380
|
+
s_gen += td.get("output_tokens", 0) + td.get("thinking_tokens", 0)
|
|
381
|
+
|
|
382
|
+
avg_tps = round(s_gen / s_secs, 2) if s_secs > 0 else 0
|
|
383
|
+
click.echo(f" 📊 Tokens: {td['total'] if td else 0} (Session: {s_tokens})")
|
|
384
|
+
click.echo(f" 💭 {(reasoning or '—')[:120]}")
|
|
385
|
+
click.echo(f" ⚡ {cmd}")
|
|
386
|
+
|
|
387
|
+
# Enviar jugada
|
|
388
|
+
try:
|
|
389
|
+
move_resp = requests.post(
|
|
390
|
+
f"{IXENT_SDK_URL}/move",
|
|
391
|
+
headers = headers,
|
|
392
|
+
json = {
|
|
393
|
+
"game_player_id": game_player_id,
|
|
394
|
+
"command": cmd,
|
|
395
|
+
"reasoning": reasoning,
|
|
396
|
+
"motor_url": motor_url,
|
|
397
|
+
"token_usage": {
|
|
398
|
+
"total": s_tokens,
|
|
399
|
+
"total_input_tokens": s_input,
|
|
400
|
+
"total_output_tokens": s_output,
|
|
401
|
+
"total_thinking_tokens": s_thinking,
|
|
402
|
+
"total_inference_sec": round(s_secs, 2),
|
|
403
|
+
"average_tps": avg_tps,
|
|
404
|
+
"turn_input_tokens": td.get("input_tokens", 0) if td else 0,
|
|
405
|
+
"turn_output_tokens": td.get("output_tokens", 0) if td else 0,
|
|
406
|
+
"turn_thinking_tokens": td.get("thinking_tokens", 0) if td else 0,
|
|
407
|
+
"turn_inference_sec": td.get("turn_inference_sec", 0) if td else 0,
|
|
408
|
+
"turn_tps": td.get("turn_tps", 0) if td else 0,
|
|
409
|
+
"is_local_hardware": td.get("is_local_hardware", False) if td else False,
|
|
410
|
+
}
|
|
411
|
+
},
|
|
412
|
+
timeout = 610
|
|
413
|
+
)
|
|
414
|
+
move_resp.raise_for_status()
|
|
415
|
+
data = move_resp.json()
|
|
416
|
+
except Exception as e:
|
|
417
|
+
click.echo(f"❌ Move error: {e}")
|
|
418
|
+
time.sleep(2)
|
|
419
|
+
continue
|
|
420
|
+
|
|
421
|
+
if not data.get("success", True):
|
|
422
|
+
click.echo(f" 🚫 REJECTED: {data.get('msg')}")
|
|
423
|
+
time.sleep(2)
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
click.echo(" 👍 ACCEPTED")
|
|
427
|
+
current_state = data.get("state", {})
|
|
428
|
+
gym = data.get("gym_metrics", {})
|
|
429
|
+
|
|
430
|
+
# Detectar entropía
|
|
431
|
+
if not entropy_shown:
|
|
432
|
+
for entry in current_state.get("data", {}).get("history", []):
|
|
433
|
+
if "[EVENT]" in str(entry):
|
|
434
|
+
click.echo(f"\n ⚠️ ENTROPY EVENT: {entry}\n")
|
|
435
|
+
entropy_shown = True
|
|
436
|
+
break
|
|
437
|
+
|
|
438
|
+
# Detectar fin de partida
|
|
439
|
+
if (gym.get("terminated") or gym.get("truncated") or
|
|
440
|
+
current_state.get("status", {}).get("game_over")):
|
|
441
|
+
st = current_state.get("status", {})
|
|
442
|
+
result = st.get("result", "UNKNOWN")
|
|
443
|
+
score = current_state.get("scoring", {}).get("benchmark_score", {}).get("P1", 0)
|
|
444
|
+
mice = st.get("mice_rescued", {}).get("P1", 0)
|
|
445
|
+
total = st.get("total_mice_per_player", 0)
|
|
446
|
+
click.echo(f"\n{'🏆' if result == 'VICTORY' else '💀'} GAME OVER: {result}")
|
|
447
|
+
click.echo(f" Score: {score}")
|
|
448
|
+
click.echo(f" Mice: {mice}/{total}")
|
|
449
|
+
click.echo(f" Turns: {turn}")
|
|
450
|
+
click.echo(f" Tokens: {s_tokens}")
|
|
451
|
+
click.echo(f" Time: {round(s_secs, 1)}s")
|
|
452
|
+
click.echo(f" Avg TPS: {avg_tps}")
|
|
453
|
+
break
|
|
454
|
+
|
|
455
|
+
click.echo("\n✅ Session complete.")
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ixentbench
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: The AI benchmark beyond memorization — Causal spatial reasoning at 4×10⁸⁵ scale
|
|
5
|
+
Author-email: iXentLabs <contact@ixentlabs.com>
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Project-URL: Homepage, https://ixentlabs.com
|
|
8
|
+
Project-URL: Repository, https://github.com/ixentlabs/ixentbench
|
|
9
|
+
Project-URL: Documentation, https://github.com/ixentlabs/ixentbench#readme
|
|
10
|
+
Keywords: ai,benchmark,reasoning,llm,evaluation
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: click>=8.1
|
|
22
|
+
Requires-Dist: requests>=2.31
|
|
23
|
+
Requires-Dist: python-dotenv>=1.0
|
|
24
|
+
Requires-Dist: google-genai>=1.0
|
|
25
|
+
Requires-Dist: google-auth>=2.28
|
|
26
|
+
Requires-Dist: google-auth-oauthlib>=1.2
|
|
27
|
+
Provides-Extra: anthropic
|
|
28
|
+
Requires-Dist: anthropic>=0.25; extra == "anthropic"
|
|
29
|
+
Provides-Extra: openai
|
|
30
|
+
Requires-Dist: openai>=1.30; extra == "openai"
|
|
31
|
+
|
|
32
|
+
# iXentBench — The AI Benchmark Beyond Memorization
|
|
33
|
+
|
|
34
|
+
Causal spatial reasoning at 4×10⁸⁵ scale, powered by **Caps i Caps** — the game that makes memorization impossible.
|
|
35
|
+
Developed by [iXentLabs](https://ixentlabs.com) (iXent Games S.L.)
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install ixentbench
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
**1. Configure your session** at [ixentlabs.com](https://ixentlabs.com)
|
|
50
|
+
Login with Google, select benchmark type, level and AI model.
|
|
51
|
+
Download your `.env` file and add your API Key.
|
|
52
|
+
|
|
53
|
+
**2. Run your session:**
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
ixentbench play --session YOUR_SESSION_ID
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
The visualizer opens automatically in your browser.
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Play Modes
|
|
64
|
+
|
|
65
|
+
There are four ways to play iXentBench, designed for every type of participant — from AI engineers to pure mathematicians and AI enthusiasts.
|
|
66
|
+
|
|
67
|
+
| Flag | A — BYOK | B — Local Model | C — Custom Agent | D — Sponsored |
|
|
68
|
+
|---|---|---|---|---|
|
|
69
|
+
| Uses LLM | ✅ Cloud API | ✅ Local | ❌ Pure code | ✅ iXentLabs |
|
|
70
|
+
| API Key needed | ✅ Yours | ❌ No | ❌ No | ❌ No |
|
|
71
|
+
| `--prompt-file` (optional) | ✅ Yes | ✅ Yes | ❌ No | ✅ Yes |
|
|
72
|
+
| `--strategy-file` (optional)| ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes |
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
### A — BYOK (Bring Your Own Key)
|
|
77
|
+
Use your own API Key from Gemini, Claude, GPT, ...
|
|
78
|
+
Your key **never leaves your machine** — it is read locally and never sent to our servers.
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
ixentbench play --session IQWUWP
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
`.env` file:
|
|
85
|
+
```
|
|
86
|
+
GOOGLE_API_KEY=your_key_here
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
### B — Local Open Source Model
|
|
92
|
+
Run any open source model (Llama, Mistral, Qwen...) locally using Ollama, LM Studio or llama.cpp.
|
|
93
|
+
No API Key required. Full privacy — no data leaves your machine.
|
|
94
|
+
⚠️ Inference speed metrics are excluded from global leaderboards (hardware-dependent).
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# First start your local model, then:
|
|
98
|
+
ixentbench play --session IQWUWP --local-url http://localhost:11434/v1
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
### C — Custom Agent Script
|
|
104
|
+
For engineers and researchers who want to solve iXentBench using **pure code** — no LLM required.
|
|
105
|
+
Write your own Python script using any algorithm you choose: Minimax, MCTS, A*, heuristics, neural networks trained from scratch — anything goes.
|
|
106
|
+
|
|
107
|
+
Your script receives the full board state via `stdin` (JSON) and must return a move via `stdout` (JSON). It runs entirely on your machine.
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
ixentbench play --session IQWUWP --agent-script ./my_agent.py
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Your script must output:
|
|
114
|
+
```json
|
|
115
|
+
{"command": "G4@P21(b=0)+90", "reasoning": "Explanation of your decision"}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
> 💡 **Tip:** Even if your agent uses pure code, we strongly recommend adding a `--strategy-file` to explain your approach. Top results are presented to AI companies for talent opportunities (see Talent Hub below).
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
### D — iXentLabs Sponsored
|
|
123
|
+
Select "Use iXentLabs credits" on the web — we provide the API Key.
|
|
124
|
+
No `.env` file needed. The cloud handles everything.
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
ixentbench play --session IQWUWP
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Arena Mode (Multiplayer)
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
# Create a room (you become the host)
|
|
136
|
+
ixentbench arena create --mode 4v4 --level 2
|
|
137
|
+
|
|
138
|
+
# Join an existing room
|
|
139
|
+
ixentbench arena join --room IXENT-4X7K
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Supported modes: `1v1`, `2v2`, `4v4`, `human` (Human vs AI).
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## Prompt Injection & Strategy Files
|
|
147
|
+
|
|
148
|
+
### Prompt Injection (`--prompt-file`) — Optional
|
|
149
|
+
Inject your own strategic prompt directly into the AI's System Prompt before the game starts.
|
|
150
|
+
Applies to modes **A, B and D** (LLM-based). Not applicable to mode C.
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
ixentbench play --session IQWUWP --prompt-file my_prompt.txt
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
> 💡 A well-crafted Prompt Injection can dramatically improve your AI's performance. Experiment freely — your injection is recorded alongside your results.
|
|
157
|
+
|
|
158
|
+
### Strategy File (`--strategy-file`) — Optional
|
|
159
|
+
A written description of your approach — how you designed your agent, what techniques you used, what insights guided your decisions.
|
|
160
|
+
**It is never sent to the AI.** It is stored securely and linked to your results for the Talent Hub.
|
|
161
|
+
|
|
162
|
+
Applies to **all modes (A, B, C and D)**.
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
ixentbench play --session IQWUWP --strategy-file my_strategy.txt
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
> 💡 We strongly recommend always adding a Strategy File, especially for Custom Agents (mode C). It is your opportunity to showcase your thinking to the world's leading AI companies.
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## 🏆 iXentLabs Talent Hub
|
|
173
|
+
|
|
174
|
+
iXentBench is more than a benchmark — it is a **talent discovery platform**.
|
|
175
|
+
|
|
176
|
+
With your explicit prior consent, iXentLabs will present the top results of each benchmark category to leading AI companies and research labs. This includes your performance metrics, your Prompt Injection (if any), and your Strategy File — giving organizations a rare window into the reasoning and engineering skills behind the results.
|
|
177
|
+
|
|
178
|
+
> Your personal data (name, email, payment details) is always protected and never shared.
|
|
179
|
+
> Only anonymized benchmark data is presented, identified solely by your chosen nickname and avatar, unless you opt in to full visibility.
|
|
180
|
+
|
|
181
|
+
If you achieve an exceptional result and want your work to be seen by the teams building the future of AI — **iXentBench is your stage**.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Utility Commands
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
ixentbench login # Force Google re-authentication
|
|
189
|
+
ixentbench status # Show credentials and session status
|
|
190
|
+
ixentbench prompts list # List your locally saved prompt files
|
|
191
|
+
ixentbench --version # Show version
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Security
|
|
197
|
+
|
|
198
|
+
- API Keys are **always read from your local `.env`** — never passed as arguments
|
|
199
|
+
- Google authentication uses **OAuth 2.0** — credentials saved locally in `~/ixentbench/`
|
|
200
|
+
- Game data (moves, reasoning, metrics) is stored by iXentLabs per our [Terms of Service](https://ixentlabs.com/terms)
|
|
201
|
+
- Personal data (name, email) is protected per [Privacy Policy](https://ixentlabs.com/privacy)
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
Proprietary — © 2026 iXentLabs (iXent Games S.L.)
|
|
208
|
+
Contact: contact@ixentlabs.com
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
ixentbench/__init__.py,sha256=V60_0fzMxT0e7CYtHjLHQfc7Weg-ngTf1WdlGqB6xJY,132
|
|
2
|
+
ixentbench/arena.py,sha256=S58sCdwjZFza-VkAI4Cp6GqLD27s9ZzrB_o0tU7pTZc,2208
|
|
3
|
+
ixentbench/auth.py,sha256=NWp3izjHASQCzzspnK7ZLBWsn2eBJOuO0_6qbCF2YEw,2954
|
|
4
|
+
ixentbench/cli.py,sha256=geG1Zz41ZGIgycPhzEfdtEX5wzhIahhdQh3QunR5Gcs,3262
|
|
5
|
+
ixentbench/config.py,sha256=ObuWFqk9U2PNimr_C3TPqufgp7i8mAEkYeyuK775HNo,3812
|
|
6
|
+
ixentbench/play.py,sha256=AeFZGbJrcEkkNeX3YHQTM28wt0BhYnA3JfXbA41gbSQ,19382
|
|
7
|
+
ixentbench-1.0.0.dist-info/METADATA,sha256=uToz1CxWyyEgfZnXtxxw14Rvapl_0OYNDoLrIQNAuLI,7202
|
|
8
|
+
ixentbench-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
ixentbench-1.0.0.dist-info/entry_points.txt,sha256=ItlV5TLM2OAxxpQMxUj8RCY6Tmw7ragofeul8nx3R0Y,51
|
|
10
|
+
ixentbench-1.0.0.dist-info/top_level.txt,sha256=dPivHNeijwMN1FSGBgfUMCwYfJhG4YPV949EK4MjX_I,11
|
|
11
|
+
ixentbench-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ixentbench
|