cluxion-agentplugin-preprocessing 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cluxion_agentplugin_adapters/claude/.claude-plugin/plugin.json +8 -0
- cluxion_agentplugin_adapters/claude/skills/preprocess/SKILL.md +33 -0
- cluxion_agentplugin_adapters/codex/config-snippet.toml +5 -0
- cluxion_agentplugin_docs/cluxion-Docs/README.md +22 -0
- cluxion_agentplugin_docs/cluxion-Docs/architecture.md +36 -0
- cluxion_agentplugin_docs/cluxion-Docs/harness-logic.md +51 -0
- cluxion_agentplugin_docs/cluxion-Docs/honesty-preprocessing.md +40 -0
- cluxion_agentplugin_docs/cluxion-Docs/install-and-operations.md +36 -0
- cluxion_agentplugin_docs/cluxion-Docs/security.md +27 -0
- cluxion_agentplugin_docs/github-profile/README.md +67 -0
- cluxion_agentplugin_preprocessing/__init__.py +7 -0
- cluxion_agentplugin_preprocessing/cli.py +124 -0
- cluxion_agentplugin_preprocessing/hermes_config.py +163 -0
- cluxion_agentplugin_preprocessing/plugin.py +135 -0
- cluxion_agentplugin_preprocessing/plugin.yaml +13 -0
- cluxion_agentplugin_preprocessing/runner.py +241 -0
- cluxion_agentplugin_preprocessing/schemas.py +148 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/METADATA +115 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/RECORD +48 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/WHEEL +4 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/entry_points.txt +8 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/licenses/LICENSE +197 -0
- cluxion_runtime/__init__.py +16 -0
- cluxion_runtime/__main__.py +5 -0
- cluxion_runtime/adapters/__init__.py +25 -0
- cluxion_runtime/adapters/contract.py +82 -0
- cluxion_runtime/adapters/grok_build.py +35 -0
- cluxion_runtime/adapters/hermes.py +161 -0
- cluxion_runtime/adapters/spec.py +35 -0
- cluxion_runtime/bootstrap.py +270 -0
- cluxion_runtime/cli.py +282 -0
- cluxion_runtime/core/__init__.py +36 -0
- cluxion_runtime/core/clarification.py +192 -0
- cluxion_runtime/core/dispatch_store.py +270 -0
- cluxion_runtime/core/harness.py +320 -0
- cluxion_runtime/core/intent.py +55 -0
- cluxion_runtime/core/ledger.py +189 -0
- cluxion_runtime/core/ledger_codec.py +38 -0
- cluxion_runtime/core/plan_codec.py +121 -0
- cluxion_runtime/core/preprocess.py +497 -0
- cluxion_runtime/core/types.py +220 -0
- cluxion_runtime/core/work_queue.py +73 -0
- cluxion_runtime/models/__init__.py +15 -0
- cluxion_runtime/models/supervisor.py +156 -0
- cluxion_runtime/models/vllm_mlx.py +87 -0
- cluxion_runtime/resources/__init__.py +7 -0
- cluxion_runtime/resources/queue_bridge.py +128 -0
- cluxion_runtime/resources/rust_bridge.py +82 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""새 Cluxion 런타임의 작은 데이터 계약."""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from enum import IntEnum, StrEnum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AgentSurface(StrEnum):
|
|
10
|
+
"""Cluxion 앞단에 붙는 에이전트 표면."""
|
|
11
|
+
|
|
12
|
+
HERMES = "hermes"
|
|
13
|
+
CODEX = "codex"
|
|
14
|
+
CLAUDE = "claude"
|
|
15
|
+
GROK_BUILD = "grok_build"
|
|
16
|
+
LOCAL = "local"
|
|
17
|
+
API = "api"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RuntimeKind(StrEnum):
|
|
21
|
+
"""실제 모델 실행 백엔드."""
|
|
22
|
+
|
|
23
|
+
HOST_MANAGED = "host_managed"
|
|
24
|
+
VLLM_MLX = "vllm_mlx"
|
|
25
|
+
MLX_LM = "mlx_lm"
|
|
26
|
+
OLLAMA = "ollama"
|
|
27
|
+
OPENAI_COMPAT = "openai_compat"
|
|
28
|
+
GENERIC = "generic"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class WorkPriority(IntEnum):
|
|
32
|
+
"""작업큐 우선순위. 숫자가 작을수록 먼저 실행된다."""
|
|
33
|
+
|
|
34
|
+
CRITICAL = 0
|
|
35
|
+
HIGH = 1
|
|
36
|
+
NORMAL = 2
|
|
37
|
+
LOW = 3
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class WorkItem:
|
|
42
|
+
"""외부 에이전트에서 Cluxion으로 들어온 단일 작업."""
|
|
43
|
+
|
|
44
|
+
work_id: str
|
|
45
|
+
prompt: str
|
|
46
|
+
surface: AgentSurface = AgentSurface.API
|
|
47
|
+
priority: WorkPriority = WorkPriority.NORMAL
|
|
48
|
+
model_route: str = "host/default"
|
|
49
|
+
expected_ram_mb: int = 0
|
|
50
|
+
context_tokens: int = 0
|
|
51
|
+
metadata: dict[str, str] = field(default_factory=dict)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class QueueSegment:
|
|
56
|
+
"""장문 작업을 실행 가능한 단위로 쪼갠 세그먼트."""
|
|
57
|
+
|
|
58
|
+
segment_id: str
|
|
59
|
+
char_start: int
|
|
60
|
+
char_end: int
|
|
61
|
+
token_estimate: int
|
|
62
|
+
checksum: str
|
|
63
|
+
preview: str
|
|
64
|
+
content: str
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(frozen=True)
|
|
68
|
+
class AnswerPolicy:
|
|
69
|
+
"""Host agent가 모델 호출 전 지켜야 하는 응답 안전 계약."""
|
|
70
|
+
|
|
71
|
+
unknown_behavior: str = "say_unknown_if_insufficient_context"
|
|
72
|
+
source_policy: str = "do_not_invent_sources_or_facts"
|
|
73
|
+
scope: str = "answer_only_from_available_context_and_verified_runtime_state"
|
|
74
|
+
response_contract: str = "direct_answer_with_uncertainty_boundary"
|
|
75
|
+
verification_required: bool = False
|
|
76
|
+
citation_required: bool = False
|
|
77
|
+
uncertainty_level: str = "low"
|
|
78
|
+
required_checks: tuple[str, ...] = ()
|
|
79
|
+
grounding: tuple[str, ...] = (
|
|
80
|
+
"verified_facts",
|
|
81
|
+
"explicit_user_context",
|
|
82
|
+
"tool_results",
|
|
83
|
+
"clearly_labeled_inferences",
|
|
84
|
+
)
|
|
85
|
+
rules: tuple[str, ...] = (
|
|
86
|
+
"If the available context is insufficient, say that clearly before proceeding.",
|
|
87
|
+
"Do not fabricate file state, external facts, tool results, or model availability.",
|
|
88
|
+
"For current or environment-specific claims, verify through tools before presenting them as facts.",
|
|
89
|
+
"Separate verified facts from inferences and unknowns when accuracy matters.",
|
|
90
|
+
"If a check was not run, say it was not run; do not imply that it passed.",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass(frozen=True)
|
|
95
|
+
class PreprocessResult:
|
|
96
|
+
"""모델 호출 전 결정론 전처리 결과."""
|
|
97
|
+
|
|
98
|
+
normalized_prompt: str
|
|
99
|
+
segments: tuple[QueueSegment, ...]
|
|
100
|
+
token_estimate: int
|
|
101
|
+
split_required: bool
|
|
102
|
+
effort: str
|
|
103
|
+
evidence: tuple[str, ...]
|
|
104
|
+
mode: str = "standard"
|
|
105
|
+
preprocess_required: bool = True
|
|
106
|
+
reason_codes: tuple[str, ...] = ()
|
|
107
|
+
answer_policy: AnswerPolicy = field(default_factory=AnswerPolicy)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass(frozen=True)
|
|
111
|
+
class WorkIntent:
|
|
112
|
+
"""Deterministic user intent and routing direction."""
|
|
113
|
+
|
|
114
|
+
category: str
|
|
115
|
+
operation: str
|
|
116
|
+
local_model_requested: bool
|
|
117
|
+
direction: str
|
|
118
|
+
confidence: float
|
|
119
|
+
signals: tuple[str, ...] = ()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass(frozen=True)
|
|
123
|
+
class ResourceSnapshot:
|
|
124
|
+
"""Rust 또는 Python fallback으로 얻은 리소스 스냅샷."""
|
|
125
|
+
|
|
126
|
+
total_ram_mb: int
|
|
127
|
+
available_ram_mb: int
|
|
128
|
+
swap_used_mb: int
|
|
129
|
+
cpu_percent: float
|
|
130
|
+
zombie_pids: tuple[int, ...] = ()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass(frozen=True)
|
|
134
|
+
class ResourceDecision:
|
|
135
|
+
"""실행 admission 판정 결과."""
|
|
136
|
+
|
|
137
|
+
allowed: bool
|
|
138
|
+
mode: str
|
|
139
|
+
reason: str
|
|
140
|
+
recommended_parallel: int
|
|
141
|
+
work_kind: str
|
|
142
|
+
dispatch_memory_budget_mb: int
|
|
143
|
+
reason_codes: tuple[str, ...] = ()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass(frozen=True)
|
|
147
|
+
class ModelRuntimeProfile:
|
|
148
|
+
"""로컬 모델 서버 실행 프로필."""
|
|
149
|
+
|
|
150
|
+
kind: RuntimeKind
|
|
151
|
+
model: str
|
|
152
|
+
base_url: str
|
|
153
|
+
command: tuple[str, ...]
|
|
154
|
+
health_path: str = "/v1/models"
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@dataclass(frozen=True)
|
|
158
|
+
class HostExecutionStep:
|
|
159
|
+
"""Hermes의 현재 모델이 수행해야 하는 host-owned 실행 단계."""
|
|
160
|
+
|
|
161
|
+
step_id: str
|
|
162
|
+
kind: str
|
|
163
|
+
prompt: str
|
|
164
|
+
segment_id: str = ""
|
|
165
|
+
checksum: str = ""
|
|
166
|
+
token_estimate: int = 0
|
|
167
|
+
depends_on: tuple[str, ...] = ()
|
|
168
|
+
required_checks: tuple[str, ...] = ()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dataclass(frozen=True)
|
|
172
|
+
class HostExecutionPlan:
|
|
173
|
+
"""Cluxion이 모델을 직접 호출하지 않고 host 모델 사용법만 지정하는 계약."""
|
|
174
|
+
|
|
175
|
+
model_owner: str
|
|
176
|
+
provider_policy: str
|
|
177
|
+
strategy: str
|
|
178
|
+
queue_required: bool
|
|
179
|
+
synthesis_required: bool
|
|
180
|
+
preflight_required: bool
|
|
181
|
+
max_extra_model_calls: int
|
|
182
|
+
steps: tuple[HostExecutionStep, ...]
|
|
183
|
+
next_tool: str = ""
|
|
184
|
+
record_tool: str = ""
|
|
185
|
+
brief_tool: str = ""
|
|
186
|
+
performance_notes: tuple[str, ...] = ()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@dataclass(frozen=True)
|
|
190
|
+
class HarnessPlan:
|
|
191
|
+
"""에이전트 표면이 Cluxion에 넘긴 작업의 실행 계획."""
|
|
192
|
+
|
|
193
|
+
item: WorkItem
|
|
194
|
+
intent: WorkIntent
|
|
195
|
+
preprocessing: PreprocessResult
|
|
196
|
+
resource: ResourceDecision
|
|
197
|
+
runtime: ModelRuntimeProfile
|
|
198
|
+
execution: HostExecutionPlan
|
|
199
|
+
queue_position: int = 0
|
|
200
|
+
clarification_required: bool = False
|
|
201
|
+
clarification_questions: tuple[str, ...] = ()
|
|
202
|
+
queue_backend: str = "python"
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
__all__ = [
|
|
206
|
+
"AgentSurface",
|
|
207
|
+
"AnswerPolicy",
|
|
208
|
+
"HarnessPlan",
|
|
209
|
+
"HostExecutionPlan",
|
|
210
|
+
"HostExecutionStep",
|
|
211
|
+
"ModelRuntimeProfile",
|
|
212
|
+
"PreprocessResult",
|
|
213
|
+
"QueueSegment",
|
|
214
|
+
"ResourceDecision",
|
|
215
|
+
"ResourceSnapshot",
|
|
216
|
+
"RuntimeKind",
|
|
217
|
+
"WorkIntent",
|
|
218
|
+
"WorkItem",
|
|
219
|
+
"WorkPriority",
|
|
220
|
+
]
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""우선순위 기반 Agent 작업큐."""
|
|
4
|
+
|
|
5
|
+
import heapq
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from cluxion_runtime.core.types import WorkItem
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class QueueAdmission:
|
|
15
|
+
"""작업큐 삽입 결과."""
|
|
16
|
+
|
|
17
|
+
accepted: bool
|
|
18
|
+
reason: str
|
|
19
|
+
evicted_work_id: str = ""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(order=True)
|
|
23
|
+
class _QueueEntry:
|
|
24
|
+
priority: int
|
|
25
|
+
sequence: int
|
|
26
|
+
item: WorkItem = field(compare=False)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AgentWorkQueue:
|
|
30
|
+
"""작업 중요도와 FIFO를 함께 지키는 작은 큐."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, max_size: int = 256) -> None:
|
|
33
|
+
if max_size < 1:
|
|
34
|
+
raise ValueError("max_size는 1 이상이어야 한다.")
|
|
35
|
+
self._max_size = max_size
|
|
36
|
+
self._sequence = 0
|
|
37
|
+
self._heap: list[_QueueEntry] = []
|
|
38
|
+
|
|
39
|
+
def enqueue(self, item: WorkItem) -> QueueAdmission:
|
|
40
|
+
"""용량이 차면 낮은 우선순위 작업만 밀어낸다."""
|
|
41
|
+
entry = _QueueEntry(int(item.priority), self._sequence, item)
|
|
42
|
+
self._sequence += 1
|
|
43
|
+
if len(self._heap) < self._max_size:
|
|
44
|
+
heapq.heappush(self._heap, entry)
|
|
45
|
+
return QueueAdmission(True, "queued")
|
|
46
|
+
worst_index = self._worst_index()
|
|
47
|
+
worst = self._heap[worst_index]
|
|
48
|
+
if entry.priority >= worst.priority:
|
|
49
|
+
return QueueAdmission(False, "queue_full_lower_or_equal_priority")
|
|
50
|
+
evicted = worst.item.work_id
|
|
51
|
+
self._heap[worst_index] = entry
|
|
52
|
+
heapq.heapify(self._heap)
|
|
53
|
+
return QueueAdmission(True, "queued_after_eviction", evicted)
|
|
54
|
+
|
|
55
|
+
def dequeue(self) -> WorkItem | None:
|
|
56
|
+
"""가장 높은 우선순위의 작업을 꺼낸다."""
|
|
57
|
+
if not self._heap:
|
|
58
|
+
return None
|
|
59
|
+
return heapq.heappop(self._heap).item
|
|
60
|
+
|
|
61
|
+
def peek_order(self) -> tuple[str, ...]:
|
|
62
|
+
"""실행 예정 순서를 작업 ID로 반환한다."""
|
|
63
|
+
return tuple(entry.item.work_id for entry in sorted(self._heap))
|
|
64
|
+
|
|
65
|
+
def size(self) -> int:
|
|
66
|
+
"""현재 대기 작업 수를 반환한다."""
|
|
67
|
+
return len(self._heap)
|
|
68
|
+
|
|
69
|
+
def _worst_index(self) -> int:
|
|
70
|
+
return max(range(len(self._heap)), key=lambda index: (self._heap[index].priority, self._heap[index].sequence))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
__all__ = ["AgentWorkQueue", "QueueAdmission"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""로컬 모델 런타임 프로필."""
|
|
4
|
+
|
|
5
|
+
from cluxion_runtime.models.supervisor import LocalModelSupervisor, ModelServerHealth, SupervisorStartResult
|
|
6
|
+
from cluxion_runtime.models.vllm_mlx import VllmMlxProfile, build_vllm_mlx_profile, select_mac_local_profile
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"LocalModelSupervisor",
|
|
10
|
+
"ModelServerHealth",
|
|
11
|
+
"SupervisorStartResult",
|
|
12
|
+
"VllmMlxProfile",
|
|
13
|
+
"build_vllm_mlx_profile",
|
|
14
|
+
"select_mac_local_profile",
|
|
15
|
+
]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""로컬 모델 서버 프로세스와 health check를 관리하는 supervisor."""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import subprocess
|
|
8
|
+
from collections.abc import Callable, Mapping
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING, Protocol
|
|
12
|
+
from urllib.request import urlopen
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from cluxion_runtime.core.types import ModelRuntimeProfile
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ManagedProcess(Protocol):
|
|
19
|
+
"""subprocess.Popen이 제공해야 하는 최소 프로세스 계약."""
|
|
20
|
+
|
|
21
|
+
pid: int
|
|
22
|
+
|
|
23
|
+
def poll(self) -> int | None: ...
|
|
24
|
+
|
|
25
|
+
def terminate(self) -> None: ...
|
|
26
|
+
|
|
27
|
+
def wait(self, timeout: float | None = None) -> int: ...
|
|
28
|
+
|
|
29
|
+
def kill(self) -> None: ...
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
ProcessFactory = Callable[[tuple[str, ...], Path | None, Mapping[str, str] | None], ManagedProcess]
|
|
33
|
+
HealthGetter = Callable[[str, float], tuple[int, str]]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class SupervisorStartResult:
|
|
38
|
+
"""모델 서버 시작 결과."""
|
|
39
|
+
|
|
40
|
+
started: bool
|
|
41
|
+
pid: int
|
|
42
|
+
reason: str
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class ModelServerHealth:
|
|
47
|
+
"""OpenAI-compatible 모델 서버 health 결과."""
|
|
48
|
+
|
|
49
|
+
reachable: bool
|
|
50
|
+
status_code: int
|
|
51
|
+
reason: str
|
|
52
|
+
models: tuple[str, ...] = ()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class LocalModelSupervisor:
|
|
56
|
+
"""vLLM-MLX 같은 로컬 모델 서버의 생명주기를 관리한다."""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
profile: ModelRuntimeProfile,
|
|
61
|
+
*,
|
|
62
|
+
cwd: Path | None = None,
|
|
63
|
+
env: Mapping[str, str] | None = None,
|
|
64
|
+
process_factory: ProcessFactory | None = None,
|
|
65
|
+
health_getter: HealthGetter | None = None,
|
|
66
|
+
) -> None:
|
|
67
|
+
self._profile = profile
|
|
68
|
+
self._cwd = cwd
|
|
69
|
+
self._env = env
|
|
70
|
+
self._process_factory = _spawn_process if process_factory is None else process_factory
|
|
71
|
+
self._health_getter = _default_health_get if health_getter is None else health_getter
|
|
72
|
+
self._process: ManagedProcess | None = None
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def profile(self) -> ModelRuntimeProfile:
|
|
76
|
+
"""감시 대상 런타임 프로필을 반환한다."""
|
|
77
|
+
return self._profile
|
|
78
|
+
|
|
79
|
+
def start(self) -> SupervisorStartResult:
|
|
80
|
+
"""프로세스가 없으면 모델 서버를 시작한다."""
|
|
81
|
+
if self.is_running():
|
|
82
|
+
pid = 0 if self._process is None else self._process.pid
|
|
83
|
+
return SupervisorStartResult(False, pid, "already_running")
|
|
84
|
+
if not self._profile.command:
|
|
85
|
+
return SupervisorStartResult(False, 0, "empty_command")
|
|
86
|
+
self._process = self._process_factory(self._profile.command, self._cwd, self._env)
|
|
87
|
+
return SupervisorStartResult(True, self._process.pid, "started")
|
|
88
|
+
|
|
89
|
+
def is_running(self) -> bool:
|
|
90
|
+
"""프로세스가 살아 있으면 true를 반환한다."""
|
|
91
|
+
return self._process is not None and self._process.poll() is None
|
|
92
|
+
|
|
93
|
+
def stop(self, *, timeout_sec: float = 5.0) -> bool:
|
|
94
|
+
"""정상 종료를 시도하고, 시간 초과 시 강제 종료한다."""
|
|
95
|
+
if self._process is None:
|
|
96
|
+
return False
|
|
97
|
+
process = self._process
|
|
98
|
+
if process.poll() is None:
|
|
99
|
+
process.terminate()
|
|
100
|
+
try:
|
|
101
|
+
process.wait(timeout=timeout_sec)
|
|
102
|
+
except subprocess.TimeoutExpired:
|
|
103
|
+
process.kill()
|
|
104
|
+
process.wait(timeout=timeout_sec)
|
|
105
|
+
self._process = None
|
|
106
|
+
return True
|
|
107
|
+
|
|
108
|
+
def health_check(self, *, timeout_sec: float = 1.0) -> ModelServerHealth:
|
|
109
|
+
"""OpenAI-compatible `/v1/models` 응답을 점검한다."""
|
|
110
|
+
url = _health_url(self._profile)
|
|
111
|
+
try:
|
|
112
|
+
status, body = self._health_getter(url, timeout_sec)
|
|
113
|
+
models = _parse_models(body)
|
|
114
|
+
return ModelServerHealth(status < 500, status, "ok" if status < 500 else "server_error", models)
|
|
115
|
+
except (OSError, TimeoutError, ValueError, json.JSONDecodeError) as exc:
|
|
116
|
+
return ModelServerHealth(False, 0, f"health_check_failed:{exc}")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _spawn_process(command: tuple[str, ...], cwd: Path | None, env: Mapping[str, str] | None) -> ManagedProcess:
|
|
120
|
+
child_env = None if env is None else {**os.environ, **dict(env)}
|
|
121
|
+
return subprocess.Popen(
|
|
122
|
+
command,
|
|
123
|
+
cwd=cwd,
|
|
124
|
+
env=child_env,
|
|
125
|
+
stdin=subprocess.DEVNULL,
|
|
126
|
+
stdout=subprocess.DEVNULL,
|
|
127
|
+
stderr=subprocess.DEVNULL,
|
|
128
|
+
start_new_session=True,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _default_health_get(url: str, timeout_sec: float) -> tuple[int, str]:
|
|
133
|
+
with urlopen(url, timeout=timeout_sec) as response:
|
|
134
|
+
return int(response.status), response.read().decode("utf-8")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _health_url(profile: ModelRuntimeProfile) -> str:
|
|
138
|
+
base = profile.base_url.rstrip("/")
|
|
139
|
+
path = profile.health_path
|
|
140
|
+
if base.endswith("/v1") and path.startswith("/v1/"):
|
|
141
|
+
path = path.removeprefix("/v1")
|
|
142
|
+
return base + path
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _parse_models(body: str) -> tuple[str, ...]:
|
|
146
|
+
payload = json.loads(body)
|
|
147
|
+
if not isinstance(payload, dict):
|
|
148
|
+
return ()
|
|
149
|
+
data = payload.get("data")
|
|
150
|
+
if not isinstance(data, list):
|
|
151
|
+
return ()
|
|
152
|
+
ids = [item.get("id") for item in data if isinstance(item, dict)]
|
|
153
|
+
return tuple(str(model_id) for model_id in ids if model_id)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
__all__ = ["LocalModelSupervisor", "ManagedProcess", "ModelServerHealth", "SupervisorStartResult"]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Apple Silicon용 vLLM-MLX 실행 프로필."""
|
|
4
|
+
|
|
5
|
+
import platform
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from cluxion_runtime.core.types import ModelRuntimeProfile, RuntimeKind
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class VllmMlxProfile:
|
|
13
|
+
"""vLLM-MLX OpenAI-compatible server 실행 옵션."""
|
|
14
|
+
|
|
15
|
+
model: str
|
|
16
|
+
host: str = "127.0.0.1"
|
|
17
|
+
port: int = 8000
|
|
18
|
+
max_tokens: int = 128_000
|
|
19
|
+
continuous_batching: bool = True
|
|
20
|
+
prefix_cache: bool = True
|
|
21
|
+
executable: str = "vllm-mlx"
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def base_url(self) -> str:
|
|
25
|
+
"""OpenAI-compatible base URL."""
|
|
26
|
+
return f"http://{self.host}:{self.port}/v1"
|
|
27
|
+
|
|
28
|
+
def command(self) -> tuple[str, ...]:
|
|
29
|
+
"""vLLM-MLX 서버 시작 명령을 만든다."""
|
|
30
|
+
cmd = [
|
|
31
|
+
self.executable,
|
|
32
|
+
"serve",
|
|
33
|
+
self.model,
|
|
34
|
+
"--host",
|
|
35
|
+
self.host,
|
|
36
|
+
"--port",
|
|
37
|
+
str(self.port),
|
|
38
|
+
"--max-tokens",
|
|
39
|
+
str(self.max_tokens),
|
|
40
|
+
]
|
|
41
|
+
if self.continuous_batching:
|
|
42
|
+
cmd.append("--continuous-batching")
|
|
43
|
+
if self.prefix_cache:
|
|
44
|
+
cmd.append("--enable-prefix-cache")
|
|
45
|
+
return tuple(cmd)
|
|
46
|
+
|
|
47
|
+
def runtime_profile(self) -> ModelRuntimeProfile:
|
|
48
|
+
"""공통 하네스가 쓰는 모델 실행 프로필로 변환한다."""
|
|
49
|
+
return ModelRuntimeProfile(
|
|
50
|
+
kind=RuntimeKind.VLLM_MLX,
|
|
51
|
+
model=self.model,
|
|
52
|
+
base_url=self.base_url,
|
|
53
|
+
command=self.command(),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def build_vllm_mlx_profile(
|
|
58
|
+
model: str,
|
|
59
|
+
*,
|
|
60
|
+
host: str = "127.0.0.1",
|
|
61
|
+
port: int = 8000,
|
|
62
|
+
max_tokens: int = 128_000,
|
|
63
|
+
executable: str = "vllm-mlx",
|
|
64
|
+
) -> ModelRuntimeProfile:
|
|
65
|
+
"""명시 모델 ID로 vLLM-MLX 프로필을 만든다."""
|
|
66
|
+
return VllmMlxProfile(
|
|
67
|
+
model=model,
|
|
68
|
+
host=host,
|
|
69
|
+
port=port,
|
|
70
|
+
max_tokens=max_tokens,
|
|
71
|
+
executable=executable,
|
|
72
|
+
).runtime_profile()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def select_mac_local_profile(model: str, *, port: int = 8000) -> ModelRuntimeProfile:
|
|
76
|
+
"""Mac Apple Silicon이면 vLLM-MLX를 우선 선택하고, 그 외에는 일반 OpenAI endpoint로 둔다."""
|
|
77
|
+
if platform.system() == "Darwin" and platform.machine().lower() in {"arm64", "aarch64"}:
|
|
78
|
+
return build_vllm_mlx_profile(model, port=port)
|
|
79
|
+
return ModelRuntimeProfile(
|
|
80
|
+
kind=RuntimeKind.OPENAI_COMPAT,
|
|
81
|
+
model=model,
|
|
82
|
+
base_url=f"http://127.0.0.1:{port}/v1",
|
|
83
|
+
command=("vllm", "serve", model, "--host", "127.0.0.1", "--port", str(port)),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
__all__ = ["VllmMlxProfile", "build_vllm_mlx_profile", "select_mac_local_profile"]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Rust-backed durable queue with Python fallback."""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Mapping
|
|
14
|
+
|
|
15
|
+
QUEUE_BIN_ENV = "CLUXION_QUEUE_BIN"
|
|
16
|
+
QUEUE_STORE_ENV = "CLUXION_QUEUE_STORE_DIR"
|
|
17
|
+
_DEFAULT_STORE = Path.home() / ".local" / "share" / "cluxion-agentplugin-preprocessing" / "queue"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def queue_available() -> bool:
|
|
21
|
+
"""Return True when the Rust queue binary is callable."""
|
|
22
|
+
return shutil.which(_queue_binary()) is not None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def default_store_dir() -> Path:
|
|
26
|
+
return Path(os.environ.get(QUEUE_STORE_ENV, str(_DEFAULT_STORE))).expanduser()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def enqueue_work(payload: Mapping[str, object], *, store_dir: Path | None = None) -> dict[str, object]:
|
|
30
|
+
"""Enqueue work via Rust when available, else raise for Python fallback."""
|
|
31
|
+
return _invoke("enqueue", payload, store_dir=store_dir)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def dequeue_work(*, store_dir: Path | None = None) -> dict[str, object]:
|
|
35
|
+
return _invoke("dequeue", {}, store_dir=store_dir)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def peek_order(*, store_dir: Path | None = None, limit: int = 16) -> dict[str, object]:
|
|
39
|
+
return _invoke("peek", {"limit": limit}, store_dir=store_dir)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def persist_dispatch_bundle(work_id: str, bundle: Mapping[str, object], *, store_dir: Path | None = None) -> dict[str, object]:
|
|
43
|
+
return _invoke(
|
|
44
|
+
"persist",
|
|
45
|
+
{"work_id": work_id, "bundle": dict(bundle)},
|
|
46
|
+
store_dir=store_dir,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def next_dispatch_step(work_id: str, *, store_dir: Path | None = None) -> dict[str, object]:
|
|
51
|
+
return _invoke("next", {"work_id": work_id}, store_dir=store_dir)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def record_dispatch_step(
|
|
55
|
+
work_id: str,
|
|
56
|
+
step_id: str,
|
|
57
|
+
*,
|
|
58
|
+
result: str = "",
|
|
59
|
+
error: str = "",
|
|
60
|
+
failed: bool = False,
|
|
61
|
+
store_dir: Path | None = None,
|
|
62
|
+
) -> dict[str, object]:
|
|
63
|
+
return _invoke(
|
|
64
|
+
"record",
|
|
65
|
+
{
|
|
66
|
+
"work_id": work_id,
|
|
67
|
+
"step_id": step_id,
|
|
68
|
+
"result": result,
|
|
69
|
+
"error": error,
|
|
70
|
+
"failed": failed,
|
|
71
|
+
},
|
|
72
|
+
store_dir=store_dir,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def build_briefing(work_id: str, *, store_dir: Path | None = None) -> dict[str, object]:
|
|
77
|
+
return _invoke("brief", {"work_id": work_id}, store_dir=store_dir)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def queue_status(*, store_dir: Path | None = None) -> dict[str, object]:
|
|
81
|
+
return _invoke("status", {}, store_dir=store_dir)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _invoke(command: str, payload: Mapping[str, object], *, store_dir: Path | None) -> dict[str, object]:
|
|
85
|
+
binary = _queue_binary()
|
|
86
|
+
if shutil.which(binary) is None:
|
|
87
|
+
raise RuntimeError("cluxion-queue binary not found")
|
|
88
|
+
body = dict(payload)
|
|
89
|
+
body["store_dir"] = str(default_store_dir() if store_dir is None else store_dir)
|
|
90
|
+
completed = subprocess.run(
|
|
91
|
+
[binary, command],
|
|
92
|
+
input=json.dumps(body, ensure_ascii=False),
|
|
93
|
+
text=True,
|
|
94
|
+
capture_output=True,
|
|
95
|
+
check=False,
|
|
96
|
+
)
|
|
97
|
+
if completed.returncode != 0:
|
|
98
|
+
raise RuntimeError(completed.stderr.strip() or f"cluxion-queue {command} failed")
|
|
99
|
+
parsed = json.loads(completed.stdout)
|
|
100
|
+
if not isinstance(parsed, dict):
|
|
101
|
+
raise RuntimeError(f"cluxion-queue {command} returned non-object JSON")
|
|
102
|
+
return parsed
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _queue_binary() -> str:
|
|
106
|
+
configured = os.environ.get(QUEUE_BIN_ENV, "").strip()
|
|
107
|
+
if configured:
|
|
108
|
+
return configured
|
|
109
|
+
local = Path(__file__).resolve().parents[3] / "rust" / "cluxion_queue" / "target" / "release" / "cluxion-queue"
|
|
110
|
+
if local.exists():
|
|
111
|
+
return str(local)
|
|
112
|
+
return "cluxion-queue"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
__all__ = [
|
|
116
|
+
"QUEUE_BIN_ENV",
|
|
117
|
+
"QUEUE_STORE_ENV",
|
|
118
|
+
"build_briefing",
|
|
119
|
+
"default_store_dir",
|
|
120
|
+
"dequeue_work",
|
|
121
|
+
"enqueue_work",
|
|
122
|
+
"next_dispatch_step",
|
|
123
|
+
"peek_order",
|
|
124
|
+
"persist_dispatch_bundle",
|
|
125
|
+
"queue_available",
|
|
126
|
+
"queue_status",
|
|
127
|
+
"record_dispatch_step",
|
|
128
|
+
]
|