cortex-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ """
2
+ Cortex-Engine — AI Operating System for Coding.
3
+
4
+ Routes queries across 30–50+ language models, manages GPU scheduling,
5
+ LRU caching, output evaluation, and continuous feedback-driven routing.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+ __author__ = "Cortex-Engine Contributors"
10
+ __license__ = "MIT"
11
+
12
+ from cortex_engine.main import app # noqa: F401 — expose ASGI app at top level
cortex_engine/cli.py ADDED
@@ -0,0 +1,51 @@
1
+ """
2
+ Cortex-Engine — CLI entry point.
3
+
4
+ Usage:
5
+ cortex-engine # start the API server (default)
6
+ cortex-engine serve # same as above
7
+ cortex-engine --help
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import sys
14
+
15
+
16
+ def main() -> None:
17
+ parser = argparse.ArgumentParser(
18
+ prog="cortex-engine",
19
+ description="Cortex-Engine — AI Operating System for Coding.",
20
+ )
21
+ parser.add_argument(
22
+ "command",
23
+ nargs="?",
24
+ default="serve",
25
+ choices=["serve"],
26
+ help="Command to run (default: serve)",
27
+ )
28
+ parser.add_argument("--host", default="0.0.0.0", help="Bind host (default: 0.0.0.0)")
29
+ parser.add_argument("--port", type=int, default=8000, help="Bind port (default: 8000)")
30
+ parser.add_argument("--workers", type=int, default=1, help="Worker count (default: 1)")
31
+ parser.add_argument("--reload", action="store_true", help="Enable auto-reload (dev mode)")
32
+ args = parser.parse_args()
33
+
34
+ if args.command == "serve":
35
+ try:
36
+ import uvicorn
37
+ except ImportError:
38
+ print("ERROR: uvicorn is required. Run: uv add uvicorn", file=sys.stderr)
39
+ sys.exit(1)
40
+
41
+ uvicorn.run(
42
+ "cortex_engine.main:app",
43
+ host=args.host,
44
+ port=args.port,
45
+ workers=args.workers,
46
+ reload=args.reload,
47
+ )
48
+
49
+
50
+ if __name__ == "__main__":
51
+ main()
@@ -0,0 +1,35 @@
1
+ """
2
+ Cortex-Engine — Configuration
3
+ Reads from environment variables with sensible defaults.
4
+ """
5
+
6
+ from __future__ import annotations
7
+ from typing import List
8
+ from pydantic_settings import BaseSettings, SettingsConfigDict
9
+
10
+
11
+ class Settings(BaseSettings):
12
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
13
+
14
+ # Redis
15
+ redis_url: str = "redis://localhost:6379/0"
16
+ redis_max_connections: int = 50
17
+
18
+ # Server
19
+ host: str = "0.0.0.0"
20
+ port: int = 8000
21
+ workers: int = 1
22
+ reload: bool = False
23
+
24
+ # CORS
25
+ allowed_origins: List[str] = ["*"]
26
+
27
+ # Cache
28
+ cache_ttl_seconds: int = 3600
29
+
30
+ # Feature flags
31
+ enable_evaluation: bool = True
32
+ enable_feedback: bool = True
33
+
34
+
35
+ settings = Settings()
@@ -0,0 +1,41 @@
1
+ """
2
+ Cortex-Engine — Dependency Injection
3
+ All FastAPI Depends() providers live here.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from functools import lru_cache
9
+ from typing import AsyncGenerator
10
+
11
+ import redis.asyncio as redis
12
+ from fastapi import Request
13
+
14
+ from cortex_engine.services.cache_manager import CacheManager
15
+ from cortex_engine.services.evaluator import EvaluationEngine
16
+ from cortex_engine.services.feedback import FeedbackSystem
17
+ from cortex_engine.services.orchestrator import ModelOrchestrator
18
+ from cortex_engine.services.registry import ModelRegistry
19
+ from cortex_engine.services.router import RouterEngine
20
+ from cortex_engine.services.scheduler import Scheduler
21
+
22
+
23
+ # ── Low-level services (pulled from app.state) ────────────────────────────────
24
+
25
+ def get_redis(request: Request) -> redis.Redis:
26
+ return request.app.state.redis
27
+
28
+ def get_registry(request: Request) -> ModelRegistry:
29
+ return request.app.state.registry
30
+
31
+ def get_cache(request: Request) -> CacheManager:
32
+ return request.app.state.cache
33
+
34
+ def get_scheduler(request: Request) -> Scheduler:
35
+ return request.app.state.scheduler
36
+
37
+ def get_feedback(request: Request) -> FeedbackSystem:
38
+ return request.app.state.feedback
39
+
40
+ def get_orchestrator(request: Request) -> ModelOrchestrator:
41
+ return request.app.state.orchestrator
cortex_engine/main.py ADDED
@@ -0,0 +1,180 @@
1
+ """
2
+ Cortex-Engine — Main Application
3
+ FastAPI entry point. Bootstraps all services on startup.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import time
10
+ from contextlib import asynccontextmanager
11
+
12
+ import redis.asyncio as redis_asyncio
13
+ from fastapi import FastAPI
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from fastapi.middleware.gzip import GZipMiddleware
16
+
17
+ from cortex_engine.config import settings
18
+ from cortex_engine.routers.inference import router as inference_router
19
+ from cortex_engine.routers.api import registry_router, admin_router
20
+ from cortex_engine.services.cache_manager import CacheManager
21
+ from cortex_engine.services.evaluator import EvaluationEngine
22
+ from cortex_engine.services.feedback import FeedbackSystem
23
+ from cortex_engine.services.orchestrator import ModelOrchestrator
24
+ from cortex_engine.services.registry import ModelRegistry
25
+ from cortex_engine.services.router import RouterEngine
26
+ from cortex_engine.services.scheduler import Scheduler
27
+
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format="%(asctime)s | %(levelname)-8s | %(name)s — %(message)s",
31
+ )
32
+ log = logging.getLogger("cortex_engine")
33
+
34
+ _BOOT_TIME = time.time()
35
+
36
+
37
+ # ── Lifespan (startup / shutdown) ─────────────────────────────────────────────
38
+
39
+ @asynccontextmanager
40
+ async def lifespan(app: FastAPI):
41
+ log.info("🚀 Cortex-Engine booting …")
42
+
43
+ # Redis
44
+ pool = redis_asyncio.ConnectionPool.from_url(
45
+ settings.redis_url,
46
+ max_connections=settings.redis_max_connections,
47
+ decode_responses=True,
48
+ )
49
+ r = redis_asyncio.Redis(connection_pool=pool)
50
+ await r.ping()
51
+ log.info("✅ Redis connected: %s", settings.redis_url)
52
+
53
+ # Services
54
+ registry = ModelRegistry(r)
55
+ cache = CacheManager(r)
56
+ scheduler = Scheduler(r, registry)
57
+ router = RouterEngine(registry=registry)
58
+ evaluator = EvaluationEngine()
59
+ feedback = FeedbackSystem(r)
60
+
61
+ await registry.seed()
62
+
63
+ orchestrator = ModelOrchestrator(
64
+ registry=registry,
65
+ router=router,
66
+ scheduler=scheduler,
67
+ cache=cache,
68
+ evaluator=evaluator,
69
+ feedback=feedback,
70
+ )
71
+
72
+ # Attach to app state
73
+ app.state.redis = r
74
+ app.state.registry = registry
75
+ app.state.cache = cache
76
+ app.state.scheduler = scheduler
77
+ app.state.router = router
78
+ app.state.evaluator = evaluator
79
+ app.state.feedback = feedback
80
+ app.state.orchestrator = orchestrator
81
+
82
+ log.info("✅ All services initialised.")
83
+ yield
84
+
85
+ # Shutdown
86
+ log.info("🛑 Cortex-Engine shutting down …")
87
+ await r.aclose()
88
+ await pool.aclose()
89
+
90
+
91
+ # ── App ───────────────────────────────────────────────────────────────────────
92
+
93
+ app = FastAPI(
94
+ title="Cortex-Engine",
95
+ description=(
96
+ "Distributed AI system: routes queries to 30–50+ models, "
97
+ "manages GPU scheduling, caching, and continuous feedback."
98
+ ),
99
+ version="0.1.0",
100
+ lifespan=lifespan,
101
+ docs_url="/docs",
102
+ redoc_url="/redoc",
103
+ )
104
+
105
+ app.add_middleware(GZipMiddleware, minimum_size=1000)
106
+ app.add_middleware(
107
+ CORSMiddleware,
108
+ allow_origins=settings.allowed_origins,
109
+ allow_methods=["*"],
110
+ allow_headers=["*"],
111
+ )
112
+
113
+
114
+ # ── Latency middleware ─────────────────────────────────────────────────────────
115
+
116
+ import time as _time
117
+ from fastapi import Request, Response
118
+
119
+ @app.middleware("http")
120
+ async def latency_header(request: Request, call_next):
121
+ t0 = _time.perf_counter()
122
+ response: Response = await call_next(request)
123
+ response.headers["X-Process-Time-Ms"] = str(
124
+ round((_time.perf_counter() - t0) * 1000, 2)
125
+ )
126
+ return response
127
+
128
+
129
+ # ── Routers ───────────────────────────────────────────────────────────────────
130
+
131
+ app.include_router(inference_router)
132
+ app.include_router(registry_router)
133
+ app.include_router(admin_router)
134
+
135
+
136
+ # ── Health endpoints ───────────────────────────────────────────────────────────
137
+
138
+ @app.get("/", tags=["Health"])
139
+ async def root():
140
+ return {"service": "Cortex-Engine", "version": "0.1.0", "status": "ok"}
141
+
142
+
143
+ @app.get("/health", tags=["Health"])
144
+ async def health(request: Request):
145
+ r: redis_asyncio.Redis = request.app.state.redis
146
+ try:
147
+ await r.ping()
148
+ redis_ok = True
149
+ except Exception:
150
+ redis_ok = False
151
+
152
+ reg_stats = await request.app.state.registry.stats()
153
+ cache_info = await request.app.state.cache.stats()
154
+
155
+ return {
156
+ "status": "ok" if redis_ok else "degraded",
157
+ "uptime_seconds": round(time.time() - _BOOT_TIME, 1),
158
+ "redis": "up" if redis_ok else "down",
159
+ "models": reg_stats,
160
+ "cache": cache_info,
161
+ }
162
+
163
+
164
+ @app.get("/metrics", tags=["Health"])
165
+ async def metrics(request: Request):
166
+ fb_stats = await request.app.state.feedback.accuracy_stats()
167
+ cache_stats = await request.app.state.cache.stats()
168
+ reg_stats = await request.app.state.registry.stats()
169
+ queue_depth = await request.app.state.scheduler.queue_depth()
170
+ gpu_loads = await request.app.state.scheduler.gpu_loads()
171
+
172
+ return {
173
+ "uptime_seconds": round(time.time() - _BOOT_TIME, 1),
174
+ "routing_accuracy": fb_stats["accuracy"],
175
+ "cache_hit_rate": cache_stats["hit_rate"],
176
+ "models": reg_stats,
177
+ "queue_depth": queue_depth,
178
+ "gpu_loads": gpu_loads,
179
+ "feedback": fb_stats,
180
+ }
@@ -0,0 +1 @@
1
+ # models sub-package
@@ -0,0 +1,158 @@
1
+ """
2
+ Cortex-Engine — Pydantic Schemas
3
+ Defines all request/response models used across the system.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from enum import Enum
9
+ from typing import Any, Dict, List, Optional
10
+ from pydantic import BaseModel, ConfigDict, Field
11
+
12
+
13
+ # ─────────────────────────────────────────────
14
+ # Enums
15
+ # ─────────────────────────────────────────────
16
+
17
+ class ModelType(str, Enum):
18
+ CODING = "coding"
19
+ DEBUGGING = "debugging"
20
+ EXPLANATION = "explanation"
21
+ TESTING = "testing"
22
+ REFACTORING = "refactoring"
23
+ DOCS = "docs"
24
+ GENERAL = "general"
25
+
26
+
27
+ class ModelStatus(str, Enum):
28
+ AVAILABLE = "available"
29
+ LOADING = "loading"
30
+ BUSY = "busy"
31
+ OFFLINE = "offline"
32
+
33
+
34
+ class EvalMethod(str, Enum):
35
+ UNIT_TEST = "unit_test"
36
+ STATIC_ANALYSIS = "static_analysis"
37
+ LLM_GRADING = "llm_grading"
38
+ EXECUTION = "execution"
39
+
40
+
41
+ # ─────────────────────────────────────────────
42
+ # Model Registry Schemas
43
+ # ─────────────────────────────────────────────
44
+
45
+ class ModelMetadata(BaseModel):
46
+ model_name: str
47
+ type: ModelType
48
+ cluster: str # e.g. "python", "typescript", "rust"
49
+ size: str # e.g. "7B", "14B", "70B"
50
+ latency_ms: int = Field(ge=0) # expected p50 latency in ms
51
+ cost_per_tok: float = Field(ge=0.0) # USD per 1K tokens
52
+ gpu_location: str # e.g. "gpu-0", "gpu-3"
53
+ status: ModelStatus = ModelStatus.AVAILABLE
54
+ priority: int = 5 # 1 (lowest) – 10 (highest)
55
+ tags: List[str] = []
56
+
57
+ model_config = ConfigDict(use_enum_values=True)
58
+
59
+
60
+ class ModelRegistryResponse(BaseModel):
61
+ models: List[ModelMetadata]
62
+ total: int
63
+ available: int
64
+
65
+
66
+ # ─────────────────────────────────────────────
67
+ # Inference Schemas
68
+ # ─────────────────────────────────────────────
69
+
70
+ class InferenceRequest(BaseModel):
71
+ query: str = Field(..., min_length=1, max_length=32_000)
72
+ preferred_type: Optional[ModelType] = None
73
+ preferred_model: Optional[str] = None # override routing
74
+ max_tokens: int = Field(default=2048, ge=64, le=16384)
75
+ temperature: float = Field(default=0.2, ge=0.0, le=2.0)
76
+ evaluate: bool = True
77
+ user_id: Optional[str] = None
78
+ session_id: Optional[str] = None
79
+ metadata: Dict[str, Any] = {}
80
+
81
+
82
+ class RouteDecision(BaseModel):
83
+ cluster: str
84
+ selected_model: str
85
+ confidence: float = Field(ge=0.0, le=1.0)
86
+ fallback_models: List[str] = []
87
+ routing_latency_ms: float = 0.0
88
+
89
+
90
+ class EvaluationResult(BaseModel):
91
+ success: bool
92
+ score: float = Field(ge=0.0, le=1.0)
93
+ method: EvalMethod
94
+ details: str = ""
95
+
96
+
97
+ class InferenceResponse(BaseModel):
98
+ request_id: str
99
+ output: str
100
+ model_used: str
101
+ route: RouteDecision
102
+ evaluation: Optional[EvaluationResult] = None
103
+ total_latency_ms: float
104
+ cached: bool = False
105
+ tokens_used: int = 0
106
+
107
+
108
+ # ─────────────────────────────────────────────
109
+ # Scheduler Schemas
110
+ # ─────────────────────────────────────────────
111
+
112
+ class ScheduleRequest(BaseModel):
113
+ model_name: str
114
+ gpu_hint: Optional[str] = None # prefer specific GPU
115
+ priority: int = 5
116
+
117
+ class ScheduleResult(BaseModel):
118
+ model_name: str
119
+ gpu_location: str
120
+ queue_position: int
121
+ estimated_wait_ms: float
122
+
123
+
124
+ # ─────────────────────────────────────────────
125
+ # Feedback Schemas
126
+ # ─────────────────────────────────────────────
127
+
128
+ class FeedbackEntry(BaseModel):
129
+ request_id: str
130
+ query: str
131
+ predicted_route: str
132
+ actual_route: Optional[str] = None
133
+ success: bool
134
+ score: float
135
+ user_correction: Optional[str] = None # what the correct model should have been
136
+
137
+
138
+ # ─────────────────────────────────────────────
139
+ # Health / Metrics
140
+ # ─────────────────────────────────────────────
141
+
142
+ class GPUStats(BaseModel):
143
+ gpu_id: str
144
+ utilization: float # 0.0 – 1.0
145
+ memory_used: float # GB
146
+ memory_total: float # GB
147
+ models_loaded: List[str] = []
148
+
149
+
150
+ class KernelMetrics(BaseModel):
151
+ uptime_seconds: float
152
+ total_requests: int
153
+ requests_per_second: float
154
+ cache_hit_rate: float
155
+ avg_latency_ms: float
156
+ routing_accuracy: float
157
+ gpu_stats: List[GPUStats] = []
158
+ models_available: int
cortex_engine/py.typed ADDED
@@ -0,0 +1 @@
1
+ # This file marks cortex_engine as a typed package (PEP 561).
@@ -0,0 +1 @@
1
+ # routers sub-package
@@ -0,0 +1,86 @@
1
+ """
2
+ Cortex-Engine — /registry and /admin routes
3
+ """
4
+
5
+ from fastapi import APIRouter, Depends, HTTPException, status
6
+ from cortex_engine.models.schemas import ModelMetadata, ModelRegistryResponse, ModelStatus
7
+ from cortex_engine.services.registry import ModelRegistry
8
+ from cortex_engine.services.scheduler import Scheduler
9
+ from cortex_engine.services.cache_manager import CacheManager
10
+ from cortex_engine.services.feedback import FeedbackSystem
11
+ from cortex_engine.dependencies import get_registry, get_scheduler, get_cache, get_feedback
12
+
13
+ registry_router = APIRouter(prefix="/registry", tags=["Registry"])
14
+ admin_router = APIRouter(prefix="/admin", tags=["Admin"])
15
+
16
+
17
+ # ── Registry ──────────────────────────────────────────────────────────────────
18
+
19
+ @registry_router.get("/", response_model=ModelRegistryResponse)
20
+ async def list_models(reg: ModelRegistry = Depends(get_registry)):
21
+ models = await reg.list_all()
22
+ stats = await reg.stats()
23
+ return ModelRegistryResponse(models=models, total=stats["total"], available=stats["available"])
24
+
25
+
26
+ @registry_router.post("/", status_code=status.HTTP_201_CREATED)
27
+ async def register_model(model: ModelMetadata, reg: ModelRegistry = Depends(get_registry)):
28
+ await reg.register(model)
29
+ return {"registered": model.model_name}
30
+
31
+
32
+ @registry_router.get("/{model_name}")
33
+ async def get_model(model_name: str, reg: ModelRegistry = Depends(get_registry)):
34
+ model = await reg.get(model_name)
35
+ if not model:
36
+ raise HTTPException(404, f"Model '{model_name}' not found")
37
+ return model
38
+
39
+
40
+ @registry_router.patch("/{model_name}/status")
41
+ async def set_model_status(
42
+ model_name: str, status: ModelStatus,
43
+ reg: ModelRegistry = Depends(get_registry),
44
+ ):
45
+ ok = await reg.update_status(model_name, status)
46
+ if not ok:
47
+ raise HTTPException(404, f"Model '{model_name}' not found")
48
+ return {"updated": model_name, "status": status}
49
+
50
+
51
+ @registry_router.delete("/{model_name}", status_code=204)
52
+ async def deregister_model(model_name: str, reg: ModelRegistry = Depends(get_registry)):
53
+ ok = await reg.deregister(model_name)
54
+ if not ok:
55
+ raise HTTPException(404, f"Model '{model_name}' not found")
56
+
57
+
58
+ # ── Admin ─────────────────────────────────────────────────────────────────────
59
+
60
+ @admin_router.get("/queue")
61
+ async def queue_status(sched: Scheduler = Depends(get_scheduler)):
62
+ return {
63
+ "queue_depth": await sched.queue_depth(),
64
+ "running_jobs": await sched.running_jobs(),
65
+ "gpu_loads": await sched.gpu_loads(),
66
+ }
67
+
68
+
69
+ @admin_router.get("/cache")
70
+ async def cache_status(cache: CacheManager = Depends(get_cache)):
71
+ return {
72
+ "stats": await cache.stats(),
73
+ "warm_pool": await cache.warm_pool(),
74
+ "eviction_log": await cache.eviction_log(20),
75
+ }
76
+
77
+
78
+ @admin_router.get("/feedback")
79
+ async def feedback_status(
80
+ limit: int = 50,
81
+ fb: FeedbackSystem = Depends(get_feedback),
82
+ ):
83
+ return {
84
+ "accuracy_stats": await fb.accuracy_stats(),
85
+ "recent": await fb.recent(limit),
86
+ }
@@ -0,0 +1,38 @@
1
+ """
2
+ Cortex-Engine — /inference routes
3
+ """
4
+
5
+ from fastapi import APIRouter, Depends, HTTPException, status
6
+ from cortex_engine.models.schemas import InferenceRequest, InferenceResponse
7
+ from cortex_engine.services.orchestrator import ModelOrchestrator
8
+ from cortex_engine.dependencies import get_orchestrator
9
+
10
+ router = APIRouter(prefix="/inference", tags=["Inference"])
11
+
12
+
13
+ @router.post(
14
+ "/",
15
+ response_model=InferenceResponse,
16
+ status_code=status.HTTP_200_OK,
17
+ summary="Submit a query for AI inference",
18
+ )
19
+ async def infer(
20
+ req: InferenceRequest,
21
+ orchestrator: ModelOrchestrator = Depends(get_orchestrator),
22
+ ) -> InferenceResponse:
23
+ try:
24
+ return await orchestrator.infer(req)
25
+ except RuntimeError as exc:
26
+ raise HTTPException(status_code=503, detail=str(exc))
27
+
28
+
29
+ @router.get(
30
+ "/route-preview",
31
+ summary="Preview which model would be selected without running inference",
32
+ )
33
+ async def route_preview(
34
+ query: str,
35
+ orchestrator: ModelOrchestrator = Depends(get_orchestrator),
36
+ ):
37
+ route = await orchestrator._router.route(query=query)
38
+ return route
@@ -0,0 +1 @@
1
+ # services sub-package