cluxion-agentplugin-preprocessing 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cluxion_agentplugin_adapters/claude/.claude-plugin/plugin.json +8 -0
- cluxion_agentplugin_adapters/claude/skills/preprocess/SKILL.md +33 -0
- cluxion_agentplugin_adapters/codex/config-snippet.toml +5 -0
- cluxion_agentplugin_docs/cluxion-Docs/README.md +22 -0
- cluxion_agentplugin_docs/cluxion-Docs/architecture.md +36 -0
- cluxion_agentplugin_docs/cluxion-Docs/harness-logic.md +51 -0
- cluxion_agentplugin_docs/cluxion-Docs/honesty-preprocessing.md +40 -0
- cluxion_agentplugin_docs/cluxion-Docs/install-and-operations.md +36 -0
- cluxion_agentplugin_docs/cluxion-Docs/security.md +27 -0
- cluxion_agentplugin_docs/github-profile/README.md +67 -0
- cluxion_agentplugin_preprocessing/__init__.py +7 -0
- cluxion_agentplugin_preprocessing/cli.py +124 -0
- cluxion_agentplugin_preprocessing/hermes_config.py +163 -0
- cluxion_agentplugin_preprocessing/plugin.py +135 -0
- cluxion_agentplugin_preprocessing/plugin.yaml +13 -0
- cluxion_agentplugin_preprocessing/runner.py +241 -0
- cluxion_agentplugin_preprocessing/schemas.py +148 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/METADATA +115 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/RECORD +48 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/WHEEL +4 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/entry_points.txt +8 -0
- cluxion_agentplugin_preprocessing-0.2.0.dist-info/licenses/LICENSE +197 -0
- cluxion_runtime/__init__.py +16 -0
- cluxion_runtime/__main__.py +5 -0
- cluxion_runtime/adapters/__init__.py +25 -0
- cluxion_runtime/adapters/contract.py +82 -0
- cluxion_runtime/adapters/grok_build.py +35 -0
- cluxion_runtime/adapters/hermes.py +161 -0
- cluxion_runtime/adapters/spec.py +35 -0
- cluxion_runtime/bootstrap.py +270 -0
- cluxion_runtime/cli.py +282 -0
- cluxion_runtime/core/__init__.py +36 -0
- cluxion_runtime/core/clarification.py +192 -0
- cluxion_runtime/core/dispatch_store.py +270 -0
- cluxion_runtime/core/harness.py +320 -0
- cluxion_runtime/core/intent.py +55 -0
- cluxion_runtime/core/ledger.py +189 -0
- cluxion_runtime/core/ledger_codec.py +38 -0
- cluxion_runtime/core/plan_codec.py +121 -0
- cluxion_runtime/core/preprocess.py +497 -0
- cluxion_runtime/core/types.py +220 -0
- cluxion_runtime/core/work_queue.py +73 -0
- cluxion_runtime/models/__init__.py +15 -0
- cluxion_runtime/models/supervisor.py +156 -0
- cluxion_runtime/models/vllm_mlx.py +87 -0
- cluxion_runtime/resources/__init__.py +7 -0
- cluxion_runtime/resources/queue_bridge.py +128 -0
- cluxion_runtime/resources/rust_bridge.py +82 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""전처리, 작업큐, Rust admission, 로컬 모델 프로필을 묶는 하네스."""
|
|
4
|
+
|
|
5
|
+
from cluxion_runtime.core.clarification import assess_clarification
|
|
6
|
+
from cluxion_runtime.core.intent import classify_intent
|
|
7
|
+
from cluxion_runtime.core.preprocess import preprocess_work
|
|
8
|
+
from cluxion_runtime.core.types import (
|
|
9
|
+
AgentSurface,
|
|
10
|
+
HarnessPlan,
|
|
11
|
+
HostExecutionPlan,
|
|
12
|
+
HostExecutionStep,
|
|
13
|
+
ModelRuntimeProfile,
|
|
14
|
+
PreprocessResult,
|
|
15
|
+
ResourceDecision,
|
|
16
|
+
ResourceSnapshot,
|
|
17
|
+
RuntimeKind,
|
|
18
|
+
WorkItem,
|
|
19
|
+
)
|
|
20
|
+
from cluxion_runtime.models.vllm_mlx import select_mac_local_profile
|
|
21
|
+
from cluxion_runtime.resources.queue_bridge import queue_available
|
|
22
|
+
from cluxion_runtime.resources.rust_bridge import capacity_decision, collect_resource_snapshot
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_harness_plan(
|
|
26
|
+
item: WorkItem,
|
|
27
|
+
*,
|
|
28
|
+
snapshot: ResourceSnapshot | None = None,
|
|
29
|
+
queue_position: int = 0,
|
|
30
|
+
) -> HarnessPlan:
|
|
31
|
+
"""외부 에이전트 요청을 Cluxion 내부 실행 계획으로 변환한다."""
|
|
32
|
+
intent = classify_intent(item)
|
|
33
|
+
clarification = assess_clarification(item, intent)
|
|
34
|
+
if clarification.required:
|
|
35
|
+
preprocessed = preprocess_work(
|
|
36
|
+
item,
|
|
37
|
+
intent_category=intent.category,
|
|
38
|
+
local_model_requested=intent.local_model_requested,
|
|
39
|
+
force_mode="needs_clarification",
|
|
40
|
+
)
|
|
41
|
+
runtime = _runtime_profile_for(item)
|
|
42
|
+
return HarnessPlan(
|
|
43
|
+
item=item,
|
|
44
|
+
intent=intent,
|
|
45
|
+
preprocessing=preprocessed,
|
|
46
|
+
resource=_fast_answer_decision("needs_clarification"),
|
|
47
|
+
runtime=runtime,
|
|
48
|
+
execution=_clarification_execution_plan(item, clarification),
|
|
49
|
+
queue_position=queue_position,
|
|
50
|
+
clarification_required=True,
|
|
51
|
+
clarification_questions=tuple(question.prompt for question in clarification.questions),
|
|
52
|
+
queue_backend="rust" if queue_available() else "python",
|
|
53
|
+
)
|
|
54
|
+
work_kind = _work_kind_for(item, intent_category=intent.category)
|
|
55
|
+
preprocessed = preprocess_work(
|
|
56
|
+
item,
|
|
57
|
+
intent_category=intent.category,
|
|
58
|
+
local_model_requested=intent.local_model_requested,
|
|
59
|
+
)
|
|
60
|
+
runtime = _runtime_profile_for(item)
|
|
61
|
+
resource = (
|
|
62
|
+
_fast_answer_decision(preprocessed.mode)
|
|
63
|
+
if _uses_fast_answer_resource_path(preprocessed.mode, runtime.kind)
|
|
64
|
+
else _capacity_decision_for(item, preprocessed.token_estimate, work_kind=work_kind, snapshot=snapshot)
|
|
65
|
+
)
|
|
66
|
+
return HarnessPlan(
|
|
67
|
+
item=item,
|
|
68
|
+
intent=intent,
|
|
69
|
+
preprocessing=preprocessed,
|
|
70
|
+
resource=resource,
|
|
71
|
+
runtime=runtime,
|
|
72
|
+
execution=_host_execution_plan_for(item, preprocessed, runtime),
|
|
73
|
+
queue_position=queue_position,
|
|
74
|
+
clarification_required=False,
|
|
75
|
+
queue_backend="rust" if queue_available() else "python",
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _clarification_execution_plan(item: WorkItem, clarification: object) -> HostExecutionPlan:
|
|
80
|
+
questions = getattr(clarification, "questions", ())
|
|
81
|
+
question_lines = "\n".join(f"- {question.prompt}" for question in questions)
|
|
82
|
+
prompt = (
|
|
83
|
+
"[cluxion_needs_clarification]\n"
|
|
84
|
+
"Do not guess or start work yet. Ask the user these questions and wait for answers.\n"
|
|
85
|
+
"If you still do not know after available context, say you do not know.\n"
|
|
86
|
+
f"{question_lines}"
|
|
87
|
+
)
|
|
88
|
+
return HostExecutionPlan(
|
|
89
|
+
model_owner="host_current_model",
|
|
90
|
+
provider_policy="ask_user_before_queue; do_not_enqueue_until_direction_is_clear",
|
|
91
|
+
strategy="ask_user_before_queue",
|
|
92
|
+
queue_required=False,
|
|
93
|
+
synthesis_required=False,
|
|
94
|
+
preflight_required=False,
|
|
95
|
+
max_extra_model_calls=0,
|
|
96
|
+
steps=(
|
|
97
|
+
HostExecutionStep(
|
|
98
|
+
"clarify",
|
|
99
|
+
"ask_user_questions",
|
|
100
|
+
prompt,
|
|
101
|
+
required_checks=("say_unknown_if_insufficient_context", "do_not_start_work_without_user_direction"),
|
|
102
|
+
),
|
|
103
|
+
),
|
|
104
|
+
performance_notes=("clarification_blocks_queue_until_user_answers",),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _host_execution_plan_for(
|
|
109
|
+
item: WorkItem,
|
|
110
|
+
preprocessed: PreprocessResult,
|
|
111
|
+
runtime: ModelRuntimeProfile,
|
|
112
|
+
) -> HostExecutionPlan:
|
|
113
|
+
mode = preprocessed.mode
|
|
114
|
+
policy = preprocessed.answer_policy
|
|
115
|
+
if runtime.kind == RuntimeKind.HOST_MANAGED:
|
|
116
|
+
provider_policy = (
|
|
117
|
+
"use_current_hermes_model_and_oauth; cluxion_must_not_call_or_configure_cloud_model_provider"
|
|
118
|
+
if item.surface == AgentSurface.HERMES
|
|
119
|
+
else "use_current_host_agent_model; cluxion_only_returns_execution_contract"
|
|
120
|
+
)
|
|
121
|
+
else:
|
|
122
|
+
provider_policy = (
|
|
123
|
+
"start_or_verify_cluxion_local_endpoint_then_switch_hermes_to_custom_provider; "
|
|
124
|
+
"all_completion_calls_still_belong_to_hermes"
|
|
125
|
+
)
|
|
126
|
+
base_notes = (
|
|
127
|
+
"simple_and_verification_modes_skip_resource_snapshot",
|
|
128
|
+
"do_not_add_extra_model_call_unless_queue_requires_segment_processing",
|
|
129
|
+
)
|
|
130
|
+
if mode == "simple_answer":
|
|
131
|
+
return HostExecutionPlan(
|
|
132
|
+
model_owner="hermes_current_model" if item.surface == AgentSurface.HERMES else "host_current_model",
|
|
133
|
+
provider_policy=provider_policy,
|
|
134
|
+
strategy="current_turn_direct_answer",
|
|
135
|
+
queue_required=False,
|
|
136
|
+
synthesis_required=False,
|
|
137
|
+
preflight_required=False,
|
|
138
|
+
max_extra_model_calls=0,
|
|
139
|
+
steps=(
|
|
140
|
+
HostExecutionStep(
|
|
141
|
+
"answer",
|
|
142
|
+
"direct_answer",
|
|
143
|
+
item.prompt,
|
|
144
|
+
required_checks=policy.required_checks,
|
|
145
|
+
token_estimate=preprocessed.token_estimate,
|
|
146
|
+
),
|
|
147
|
+
),
|
|
148
|
+
performance_notes=base_notes,
|
|
149
|
+
)
|
|
150
|
+
if mode == "verification_answer":
|
|
151
|
+
return HostExecutionPlan(
|
|
152
|
+
model_owner="hermes_current_model" if item.surface == AgentSurface.HERMES else "host_current_model",
|
|
153
|
+
provider_policy=provider_policy,
|
|
154
|
+
strategy="current_turn_verify_then_answer",
|
|
155
|
+
queue_required=False,
|
|
156
|
+
synthesis_required=False,
|
|
157
|
+
preflight_required=False,
|
|
158
|
+
max_extra_model_calls=0,
|
|
159
|
+
steps=(
|
|
160
|
+
HostExecutionStep(
|
|
161
|
+
"verify",
|
|
162
|
+
"verify_then_answer",
|
|
163
|
+
item.prompt,
|
|
164
|
+
required_checks=policy.required_checks,
|
|
165
|
+
token_estimate=preprocessed.token_estimate,
|
|
166
|
+
),
|
|
167
|
+
),
|
|
168
|
+
performance_notes=base_notes,
|
|
169
|
+
)
|
|
170
|
+
if mode == "queued":
|
|
171
|
+
segment_steps = tuple(
|
|
172
|
+
HostExecutionStep(
|
|
173
|
+
f"exec_{segment.segment_id}",
|
|
174
|
+
"execute_segment",
|
|
175
|
+
"Fetch this step through cluxion_queue_next, process it with the current Hermes model, "
|
|
176
|
+
"then store the result with cluxion_queue_record.",
|
|
177
|
+
segment_id=segment.segment_id,
|
|
178
|
+
checksum=segment.checksum,
|
|
179
|
+
token_estimate=segment.token_estimate,
|
|
180
|
+
required_checks=policy.required_checks,
|
|
181
|
+
)
|
|
182
|
+
for segment in preprocessed.segments
|
|
183
|
+
)
|
|
184
|
+
final_step = HostExecutionStep(
|
|
185
|
+
"brief",
|
|
186
|
+
"synthesize_briefing",
|
|
187
|
+
"After all segment steps are recorded, call cluxion_queue_brief and answer from its briefing_prompt.",
|
|
188
|
+
depends_on=tuple(step.step_id for step in segment_steps),
|
|
189
|
+
required_checks=policy.required_checks,
|
|
190
|
+
token_estimate=preprocessed.token_estimate,
|
|
191
|
+
)
|
|
192
|
+
return HostExecutionPlan(
|
|
193
|
+
model_owner="hermes_current_model" if item.surface == AgentSurface.HERMES else "host_current_model",
|
|
194
|
+
provider_policy=provider_policy,
|
|
195
|
+
strategy="durable_segment_queue",
|
|
196
|
+
queue_required=True,
|
|
197
|
+
synthesis_required=True,
|
|
198
|
+
preflight_required=False,
|
|
199
|
+
max_extra_model_calls=len(segment_steps) + 1,
|
|
200
|
+
steps=(*segment_steps, final_step),
|
|
201
|
+
next_tool="cluxion_queue_next",
|
|
202
|
+
record_tool="cluxion_queue_record",
|
|
203
|
+
brief_tool="cluxion_queue_brief",
|
|
204
|
+
performance_notes=(
|
|
205
|
+
*base_notes,
|
|
206
|
+
"queued_plan_stores_segment_content_out_of_band",
|
|
207
|
+
"initial_plan_returns_metadata_not_full_segment_payload",
|
|
208
|
+
),
|
|
209
|
+
)
|
|
210
|
+
return HostExecutionPlan(
|
|
211
|
+
model_owner="hermes_current_model" if item.surface == AgentSurface.HERMES else "host_current_model",
|
|
212
|
+
provider_policy=provider_policy,
|
|
213
|
+
strategy="single_host_task",
|
|
214
|
+
queue_required=False,
|
|
215
|
+
synthesis_required=False,
|
|
216
|
+
preflight_required=False,
|
|
217
|
+
max_extra_model_calls=0,
|
|
218
|
+
steps=(
|
|
219
|
+
HostExecutionStep(
|
|
220
|
+
"execute",
|
|
221
|
+
"execute_task",
|
|
222
|
+
preprocessed.normalized_prompt,
|
|
223
|
+
required_checks=policy.required_checks,
|
|
224
|
+
token_estimate=preprocessed.token_estimate,
|
|
225
|
+
),
|
|
226
|
+
),
|
|
227
|
+
performance_notes=base_notes,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _capacity_decision_for(
|
|
232
|
+
item: WorkItem,
|
|
233
|
+
token_estimate: int,
|
|
234
|
+
*,
|
|
235
|
+
work_kind: str,
|
|
236
|
+
snapshot: ResourceSnapshot | None,
|
|
237
|
+
) -> ResourceDecision:
|
|
238
|
+
current = collect_resource_snapshot() if snapshot is None else snapshot
|
|
239
|
+
return capacity_decision(
|
|
240
|
+
work_kind,
|
|
241
|
+
current,
|
|
242
|
+
expected_ram_mb=_expected_ram(item, token_estimate),
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _uses_fast_answer_resource_path(mode: str, runtime_kind: RuntimeKind) -> bool:
|
|
247
|
+
return mode in {"simple_answer", "verification_answer"} and runtime_kind == RuntimeKind.HOST_MANAGED
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _fast_answer_decision(mode: str) -> ResourceDecision:
|
|
251
|
+
reason = "verification_required" if mode == "verification_answer" else "preprocess_not_required"
|
|
252
|
+
return ResourceDecision(
|
|
253
|
+
True,
|
|
254
|
+
mode,
|
|
255
|
+
reason,
|
|
256
|
+
1,
|
|
257
|
+
mode,
|
|
258
|
+
0,
|
|
259
|
+
("fast_path", "resource_snapshot_skipped"),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _work_kind_for(item: WorkItem, *, intent_category: str) -> str:
|
|
264
|
+
route = item.model_route.lower()
|
|
265
|
+
if _explicit_local_route(route):
|
|
266
|
+
return "qwen"
|
|
267
|
+
if intent_category == "security":
|
|
268
|
+
return "security"
|
|
269
|
+
if item.surface == AgentSurface.CODEX:
|
|
270
|
+
return "codex"
|
|
271
|
+
if item.surface == AgentSurface.GROK_BUILD:
|
|
272
|
+
return "grok"
|
|
273
|
+
if item.surface == AgentSurface.CLAUDE:
|
|
274
|
+
return "claude"
|
|
275
|
+
return "generic"
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _expected_ram(item: WorkItem, token_estimate: int) -> int:
|
|
279
|
+
if item.expected_ram_mb > 0:
|
|
280
|
+
return item.expected_ram_mb
|
|
281
|
+
if not _explicit_local_route(item.model_route.lower()):
|
|
282
|
+
return 512
|
|
283
|
+
route = item.model_route.lower()
|
|
284
|
+
if "35b" in route or "32b" in route:
|
|
285
|
+
return 24_000
|
|
286
|
+
if "14b" in route or "13b" in route:
|
|
287
|
+
return 12_000
|
|
288
|
+
if "7b" in route or "8b" in route:
|
|
289
|
+
return 8_000
|
|
290
|
+
return 2_000 + min(8_000, token_estimate // 2)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _model_name(model_route: str) -> str:
|
|
294
|
+
for prefix in ("local/", "mlx/", "vllm-mlx/", "vllm_mlx/"):
|
|
295
|
+
if model_route.startswith(prefix):
|
|
296
|
+
return model_route.removeprefix(prefix)
|
|
297
|
+
return model_route
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _runtime_profile_for(item: WorkItem) -> ModelRuntimeProfile:
|
|
301
|
+
if not _explicit_local_route(item.model_route.lower()):
|
|
302
|
+
return _host_managed_profile(item)
|
|
303
|
+
return select_mac_local_profile(_model_name(item.model_route))
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _host_managed_profile(item: WorkItem) -> ModelRuntimeProfile:
|
|
307
|
+
return ModelRuntimeProfile(
|
|
308
|
+
kind=RuntimeKind.HOST_MANAGED,
|
|
309
|
+
model=item.model_route,
|
|
310
|
+
base_url="",
|
|
311
|
+
command=(),
|
|
312
|
+
health_path="",
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _explicit_local_route(route: str) -> bool:
|
|
317
|
+
return route.startswith(("local/", "mlx/", "vllm-mlx/", "vllm_mlx/")) and route != "local/default"
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
__all__ = ["build_harness_plan"]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Deterministic intent and direction classification for agent work."""
|
|
4
|
+
|
|
5
|
+
from cluxion_runtime.core.types import AgentSurface, WorkIntent, WorkItem
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def classify_intent(item: WorkItem) -> WorkIntent:
|
|
9
|
+
"""Classify user intent before the host model spends context."""
|
|
10
|
+
text = f"{item.prompt}\n{item.model_route}".lower()
|
|
11
|
+
signals: list[str] = []
|
|
12
|
+
local = _has_any(text, ("vllm-mlx", "mlx", "local model", "로컬모델", "로컬 모델", "serve-local"))
|
|
13
|
+
if item.model_route.lower().startswith(("local/", "mlx/", "vllm-mlx/", "vllm_mlx/")):
|
|
14
|
+
local = True
|
|
15
|
+
signals.append("explicit_local_route")
|
|
16
|
+
if local:
|
|
17
|
+
signals.append("local_model")
|
|
18
|
+
return WorkIntent("local_model", "serve_endpoint", True, "local_runtime", 0.92, tuple(signals))
|
|
19
|
+
|
|
20
|
+
if _has_any(text, ("security", "audit", "vulnerability", "보안", "취약점", "시크릿")):
|
|
21
|
+
signals.append("security")
|
|
22
|
+
return WorkIntent("security", "review_risk", False, "host_managed", 0.86, tuple(signals))
|
|
23
|
+
|
|
24
|
+
if _has_any(text, ("test", "pytest", "unit", "테스트", "검증")):
|
|
25
|
+
signals.append("test")
|
|
26
|
+
return WorkIntent("engineering", "verify_or_fix", False, _direction_for_surface(item.surface), 0.82, tuple(signals))
|
|
27
|
+
|
|
28
|
+
if _has_any(text, ("code", "implement", "fix", "refactor", "패치", "수정", "구현", "리팩터")):
|
|
29
|
+
signals.append("coding")
|
|
30
|
+
return WorkIntent("engineering", "code_change", False, _direction_for_surface(item.surface), 0.80, tuple(signals))
|
|
31
|
+
|
|
32
|
+
if _has_any(text, ("docs", "readme", "문서", "가이드", "사용법")):
|
|
33
|
+
signals.append("documentation")
|
|
34
|
+
return WorkIntent("documentation", "write_or_update_docs", False, "host_managed", 0.78, tuple(signals))
|
|
35
|
+
|
|
36
|
+
return WorkIntent("general", "plan_task", False, _direction_for_surface(item.surface), 0.55, tuple(signals))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _direction_for_surface(surface: AgentSurface) -> str:
|
|
40
|
+
if surface == AgentSurface.HERMES:
|
|
41
|
+
return "hermes_harness"
|
|
42
|
+
if surface == AgentSurface.CODEX:
|
|
43
|
+
return "codex_harness"
|
|
44
|
+
if surface == AgentSurface.CLAUDE:
|
|
45
|
+
return "claude_harness"
|
|
46
|
+
if surface == AgentSurface.GROK_BUILD:
|
|
47
|
+
return "grok_build_harness"
|
|
48
|
+
return "host_managed"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _has_any(text: str, needles: tuple[str, ...]) -> bool:
|
|
52
|
+
return any(needle in text for needle in needles)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
__all__ = ["classify_intent"]
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""작업큐 상태를 JSONL 이벤트로 남기는 durable ledger."""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from enum import StrEnum
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from cluxion_runtime.core.ledger_codec import item_from_dict, item_to_dict
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from cluxion_runtime.core.types import WorkItem
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class WorkStatus(StrEnum):
|
|
21
|
+
"""작업 ledger의 닫힌 상태 집합."""
|
|
22
|
+
|
|
23
|
+
QUEUED = "queued"
|
|
24
|
+
RUNNING = "running"
|
|
25
|
+
RETRY_WAIT = "retry_wait"
|
|
26
|
+
SUCCEEDED = "succeeded"
|
|
27
|
+
FAILED = "failed"
|
|
28
|
+
DEAD = "dead"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class RetryDecision:
|
|
33
|
+
"""실패 후 재시도 가능 여부."""
|
|
34
|
+
|
|
35
|
+
retryable: bool
|
|
36
|
+
next_after_epoch: float
|
|
37
|
+
attempt: int
|
|
38
|
+
reason: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class LedgerEntry:
|
|
43
|
+
"""특정 work_id의 최신 ledger 상태."""
|
|
44
|
+
|
|
45
|
+
work_id: str
|
|
46
|
+
status: WorkStatus
|
|
47
|
+
attempt: int
|
|
48
|
+
max_attempts: int
|
|
49
|
+
next_after_epoch: float
|
|
50
|
+
reason: str
|
|
51
|
+
item: WorkItem
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class LedgerCorruptionError(RuntimeError):
|
|
55
|
+
"""ledger JSONL이 손상됐을 때 조용한 통과를 막는 오류."""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DurableWorkLedger:
|
|
59
|
+
"""프로세스 재시작 후에도 작업 상태를 복원하는 JSONL ledger."""
|
|
60
|
+
|
|
61
|
+
def __init__(self, path: Path, *, sync_on_write: bool = True) -> None:
|
|
62
|
+
self._path = path
|
|
63
|
+
self._sync_on_write = sync_on_write
|
|
64
|
+
|
|
65
|
+
def record_enqueued(self, item: WorkItem, *, max_attempts: int = 3) -> LedgerEntry:
|
|
66
|
+
"""신규 작업을 durable queue에 등록한다."""
|
|
67
|
+
event = self._event(item, WorkStatus.QUEUED, attempt=0, max_attempts=max_attempts, reason="queued")
|
|
68
|
+
self._append(event)
|
|
69
|
+
return self._entry_from_event(event)
|
|
70
|
+
|
|
71
|
+
def record_started(self, work_id: str, *, now: float | None = None) -> LedgerEntry:
|
|
72
|
+
"""실행 시작 이벤트를 기록한다."""
|
|
73
|
+
entry = self.latest()[work_id]
|
|
74
|
+
event = self._event(
|
|
75
|
+
entry.item,
|
|
76
|
+
WorkStatus.RUNNING,
|
|
77
|
+
attempt=entry.attempt + 1,
|
|
78
|
+
max_attempts=entry.max_attempts,
|
|
79
|
+
reason="started",
|
|
80
|
+
now=now,
|
|
81
|
+
)
|
|
82
|
+
self._append(event)
|
|
83
|
+
return self._entry_from_event(event)
|
|
84
|
+
|
|
85
|
+
def record_finished(self, work_id: str, *, reason: str = "succeeded") -> LedgerEntry:
|
|
86
|
+
"""성공 종료 이벤트를 기록한다."""
|
|
87
|
+
entry = self.latest()[work_id]
|
|
88
|
+
event = self._event(entry.item, WorkStatus.SUCCEEDED, attempt=entry.attempt, max_attempts=entry.max_attempts, reason=reason)
|
|
89
|
+
self._append(event)
|
|
90
|
+
return self._entry_from_event(event)
|
|
91
|
+
|
|
92
|
+
def record_failed(
|
|
93
|
+
self,
|
|
94
|
+
work_id: str,
|
|
95
|
+
*,
|
|
96
|
+
reason: str,
|
|
97
|
+
retryable: bool = True,
|
|
98
|
+
backoff_base_sec: float = 1.0,
|
|
99
|
+
now: float | None = None,
|
|
100
|
+
) -> RetryDecision:
|
|
101
|
+
"""실패 이벤트와 재시도 대기 또는 dead 상태를 기록한다."""
|
|
102
|
+
current = time.time() if now is None else now
|
|
103
|
+
entry = self.latest()[work_id]
|
|
104
|
+
can_retry = retryable and entry.attempt < entry.max_attempts
|
|
105
|
+
delay = backoff_base_sec * (2 ** max(0, entry.attempt - 1))
|
|
106
|
+
status = WorkStatus.RETRY_WAIT if can_retry else WorkStatus.DEAD
|
|
107
|
+
next_after = current + delay if can_retry else 0.0
|
|
108
|
+
event = self._event(
|
|
109
|
+
entry.item,
|
|
110
|
+
status,
|
|
111
|
+
attempt=entry.attempt,
|
|
112
|
+
max_attempts=entry.max_attempts,
|
|
113
|
+
next_after_epoch=next_after,
|
|
114
|
+
reason=reason,
|
|
115
|
+
now=current,
|
|
116
|
+
)
|
|
117
|
+
self._append(event)
|
|
118
|
+
return RetryDecision(can_retry, next_after, entry.attempt, reason)
|
|
119
|
+
|
|
120
|
+
def latest(self) -> dict[str, LedgerEntry]:
|
|
121
|
+
"""JSONL 전체를 접어서 work_id별 최신 상태를 반환한다."""
|
|
122
|
+
states: dict[str, LedgerEntry] = {}
|
|
123
|
+
if not self._path.exists():
|
|
124
|
+
return states
|
|
125
|
+
for number, line in enumerate(self._path.read_text(encoding="utf-8").splitlines(), start=1):
|
|
126
|
+
if not line.strip():
|
|
127
|
+
continue
|
|
128
|
+
try:
|
|
129
|
+
event = json.loads(line)
|
|
130
|
+
entry = self._entry_from_event(event)
|
|
131
|
+
except (json.JSONDecodeError, KeyError, TypeError, ValueError) as exc:
|
|
132
|
+
raise LedgerCorruptionError(f"ledger line {number} is invalid: {exc}") from exc
|
|
133
|
+
states[entry.work_id] = entry
|
|
134
|
+
return states
|
|
135
|
+
|
|
136
|
+
def ready_to_retry(self, *, now: float | None = None) -> tuple[LedgerEntry, ...]:
|
|
137
|
+
"""재시도 시간이 도래한 작업만 우선순위 순서로 반환한다."""
|
|
138
|
+
current = time.time() if now is None else now
|
|
139
|
+
ready = [
|
|
140
|
+
entry
|
|
141
|
+
for entry in self.latest().values()
|
|
142
|
+
if entry.status == WorkStatus.RETRY_WAIT and entry.next_after_epoch <= current
|
|
143
|
+
]
|
|
144
|
+
return tuple(sorted(ready, key=lambda entry: (int(entry.item.priority), entry.work_id)))
|
|
145
|
+
|
|
146
|
+
def _append(self, event: dict[str, object]) -> None:
|
|
147
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
payload = json.dumps(event, ensure_ascii=False, sort_keys=True)
|
|
149
|
+
with self._path.open("a", encoding="utf-8") as handle:
|
|
150
|
+
handle.write(payload + "\n")
|
|
151
|
+
if self._sync_on_write:
|
|
152
|
+
handle.flush()
|
|
153
|
+
os.fsync(handle.fileno())
|
|
154
|
+
|
|
155
|
+
def _event(
|
|
156
|
+
self,
|
|
157
|
+
item: WorkItem,
|
|
158
|
+
status: WorkStatus,
|
|
159
|
+
*,
|
|
160
|
+
attempt: int,
|
|
161
|
+
max_attempts: int,
|
|
162
|
+
reason: str,
|
|
163
|
+
next_after_epoch: float = 0.0,
|
|
164
|
+
now: float | None = None,
|
|
165
|
+
) -> dict[str, object]:
|
|
166
|
+
return {
|
|
167
|
+
"created_at": time.time() if now is None else now,
|
|
168
|
+
"work_id": item.work_id,
|
|
169
|
+
"status": status.value,
|
|
170
|
+
"attempt": max(0, attempt),
|
|
171
|
+
"max_attempts": max(1, max_attempts),
|
|
172
|
+
"next_after_epoch": max(0.0, next_after_epoch),
|
|
173
|
+
"reason": reason,
|
|
174
|
+
"item": item_to_dict(item),
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
def _entry_from_event(self, event: dict[str, object]) -> LedgerEntry:
|
|
178
|
+
return LedgerEntry(
|
|
179
|
+
work_id=str(event["work_id"]),
|
|
180
|
+
status=WorkStatus(str(event["status"])),
|
|
181
|
+
attempt=int(event["attempt"]),
|
|
182
|
+
max_attempts=int(event["max_attempts"]),
|
|
183
|
+
next_after_epoch=float(event["next_after_epoch"]),
|
|
184
|
+
reason=str(event["reason"]),
|
|
185
|
+
item=item_from_dict(event["item"]),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
__all__ = ["DurableWorkLedger", "LedgerCorruptionError", "LedgerEntry", "RetryDecision", "WorkStatus"]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""durable ledger가 쓰는 WorkItem 직렬화 코덱."""
|
|
4
|
+
|
|
5
|
+
from cluxion_runtime.core.types import AgentSurface, WorkItem, WorkPriority
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def item_to_dict(item: WorkItem) -> dict[str, object]:
|
|
9
|
+
"""WorkItem을 JSON 안전 객체로 바꾼다."""
|
|
10
|
+
return {
|
|
11
|
+
"work_id": item.work_id,
|
|
12
|
+
"prompt": item.prompt,
|
|
13
|
+
"surface": item.surface.value,
|
|
14
|
+
"priority": int(item.priority),
|
|
15
|
+
"model_route": item.model_route,
|
|
16
|
+
"expected_ram_mb": item.expected_ram_mb,
|
|
17
|
+
"context_tokens": item.context_tokens,
|
|
18
|
+
"metadata": dict(item.metadata),
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def item_from_dict(payload: object) -> WorkItem:
|
|
23
|
+
"""JSON 객체에서 WorkItem을 복원한다."""
|
|
24
|
+
if not isinstance(payload, dict):
|
|
25
|
+
raise TypeError("item payload must be an object")
|
|
26
|
+
return WorkItem(
|
|
27
|
+
work_id=str(payload["work_id"]),
|
|
28
|
+
prompt=str(payload["prompt"]),
|
|
29
|
+
surface=AgentSurface(str(payload["surface"])),
|
|
30
|
+
priority=WorkPriority(int(payload["priority"])),
|
|
31
|
+
model_route=str(payload["model_route"]),
|
|
32
|
+
expected_ram_mb=int(payload["expected_ram_mb"]),
|
|
33
|
+
context_tokens=int(payload["context_tokens"]),
|
|
34
|
+
metadata={str(key): str(value) for key, value in dict(payload["metadata"]).items()},
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
__all__ = ["item_from_dict", "item_to_dict"]
|