loopllm 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loopllm/__init__.py +69 -0
- loopllm/__main__.py +5 -0
- loopllm/adaptive_exit.py +78 -0
- loopllm/agent_loop.py +299 -0
- loopllm/cli.py +521 -0
- loopllm/elicitation.py +519 -0
- loopllm/engine.py +376 -0
- loopllm/evaluator_factory.py +72 -0
- loopllm/evaluators.py +419 -0
- loopllm/guards.py +254 -0
- loopllm/local_loop.py +273 -0
- loopllm/mcp_server.py +2657 -0
- loopllm/plan_registry.py +412 -0
- loopllm/priors.py +604 -0
- loopllm/provider.py +51 -0
- loopllm/providers/__init__.py +15 -0
- loopllm/providers/agent.py +64 -0
- loopllm/providers/mock.py +64 -0
- loopllm/providers/ollama.py +95 -0
- loopllm/providers/openrouter.py +101 -0
- loopllm/serve.py +297 -0
- loopllm/step_scorer.py +190 -0
- loopllm/store.py +1126 -0
- loopllm/tasks.py +599 -0
- loopllm-0.7.0.dist-info/METADATA +454 -0
- loopllm-0.7.0.dist-info/RECORD +29 -0
- loopllm-0.7.0.dist-info/WHEEL +4 -0
- loopllm-0.7.0.dist-info/entry_points.txt +3 -0
- loopllm-0.7.0.dist-info/licenses/LICENSE +21 -0
loopllm/elicitation.py
ADDED
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
"""Bayesian intent elicitation layer.
|
|
2
|
+
|
|
3
|
+
Decomposes vague user prompts into structured specs through
|
|
4
|
+
information-gain-ranked clarifying questions. Learns which
|
|
5
|
+
questions are most valuable per task type over time.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import uuid
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import structlog
|
|
15
|
+
|
|
16
|
+
from loopllm.priors import AdaptivePriors, BetaPrior
|
|
17
|
+
from loopllm.provider import LLMProvider, LLMResponse
|
|
18
|
+
|
|
19
|
+
logger = structlog.get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Question taxonomy — each type carries its own prior
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
QUESTION_TYPES: list[str] = [
|
|
26
|
+
"scope", # What exactly should the output cover?
|
|
27
|
+
"format", # Desired output format / structure
|
|
28
|
+
"constraints", # Hard requirements, rules, boundaries
|
|
29
|
+
"examples", # Could you give an example of what you want?
|
|
30
|
+
"edge_cases", # How should corner cases be handled?
|
|
31
|
+
"audience", # Who is the target audience / consumer?
|
|
32
|
+
"priority", # What matters most if trade-offs are needed?
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
# Default priors for question effectiveness (positive-skew: asking is
|
|
36
|
+
# usually somewhat helpful, but we're uncertain).
|
|
37
|
+
_DEFAULT_QUESTION_PRIORS: dict[str, BetaPrior] = {
|
|
38
|
+
"scope": BetaPrior(alpha=3.0, beta=1.5),
|
|
39
|
+
"format": BetaPrior(alpha=2.5, beta=1.5),
|
|
40
|
+
"constraints": BetaPrior(alpha=2.0, beta=1.5),
|
|
41
|
+
"examples": BetaPrior(alpha=2.5, beta=2.0),
|
|
42
|
+
"edge_cases": BetaPrior(alpha=1.5, beta=2.0),
|
|
43
|
+
"audience": BetaPrior(alpha=2.0, beta=2.5),
|
|
44
|
+
"priority": BetaPrior(alpha=2.0, beta=2.0),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Data classes
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class ClarifyingQuestion:
|
|
55
|
+
"""A question to ask the user, ranked by expected information gain.
|
|
56
|
+
|
|
57
|
+
Attributes:
|
|
58
|
+
text: The question text to present.
|
|
59
|
+
question_type: Category from :data:`QUESTION_TYPES`.
|
|
60
|
+
options: Optional multiple-choice options.
|
|
61
|
+
information_gain: Expected information gain (higher = ask first).
|
|
62
|
+
prior: The Beta prior tracking this question type's effectiveness.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
text: str
|
|
66
|
+
question_type: str
|
|
67
|
+
options: list[str] | None = None
|
|
68
|
+
information_gain: float = 0.0
|
|
69
|
+
prior: BetaPrior = field(default_factory=BetaPrior)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class IntentSpec:
|
|
74
|
+
"""Structured specification produced by elicitation.
|
|
75
|
+
|
|
76
|
+
This is the refined, unambiguous description of what the user wants.
|
|
77
|
+
It feeds directly into the refinement loop and task orchestrator.
|
|
78
|
+
|
|
79
|
+
Attributes:
|
|
80
|
+
task_type: Classified task type (e.g. ``"code_generation"``).
|
|
81
|
+
original_prompt: The user's original input.
|
|
82
|
+
refined_prompt: The improved, expanded prompt.
|
|
83
|
+
constraints: Key constraints extracted from answers.
|
|
84
|
+
quality_criteria: How to judge the output.
|
|
85
|
+
decomposition_hints: Suggested subtask breakdown.
|
|
86
|
+
model_preference: Optional model preference.
|
|
87
|
+
estimated_complexity: Estimated difficulty (0.0–1.0).
|
|
88
|
+
context: Additional context from elicitation answers.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
task_type: str = "general"
|
|
92
|
+
original_prompt: str = ""
|
|
93
|
+
refined_prompt: str = ""
|
|
94
|
+
constraints: dict[str, Any] = field(default_factory=dict)
|
|
95
|
+
quality_criteria: list[str] = field(default_factory=list)
|
|
96
|
+
decomposition_hints: list[str] = field(default_factory=list)
|
|
97
|
+
model_preference: str | None = None
|
|
98
|
+
estimated_complexity: float = 0.5
|
|
99
|
+
context: dict[str, Any] = field(default_factory=dict)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class ElicitationSession:
|
|
104
|
+
"""Tracks the state of a Bayesian elicitation conversation.
|
|
105
|
+
|
|
106
|
+
Attributes:
|
|
107
|
+
session_id: Unique identifier.
|
|
108
|
+
original_prompt: The user's verbatim input.
|
|
109
|
+
questions_asked: Questions that have been posed so far.
|
|
110
|
+
answers: Mapping of question_type → user's answer.
|
|
111
|
+
refined_spec: The final spec (populated after :meth:`IntentRefiner.refine`).
|
|
112
|
+
task_type: Detected task type.
|
|
113
|
+
model_id: Model in use.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
session_id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])
|
|
117
|
+
original_prompt: str = ""
|
|
118
|
+
questions_asked: list[ClarifyingQuestion] = field(default_factory=list)
|
|
119
|
+
answers: dict[str, str] = field(default_factory=dict)
|
|
120
|
+
refined_spec: IntentSpec | None = None
|
|
121
|
+
task_type: str = "general"
|
|
122
|
+
model_id: str = ""
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Prompt templates
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
_ANALYZE_PROMPT = """\
|
|
130
|
+
You are an intent-analysis assistant. Given the user's prompt below,
|
|
131
|
+
identify what is AMBIGUOUS, MISSING, or ASSUMED.
|
|
132
|
+
|
|
133
|
+
For each gap you find, produce a JSON object with:
|
|
134
|
+
- "question_type": one of {question_types}
|
|
135
|
+
- "question": a concise clarifying question to ask the user
|
|
136
|
+
- "options": optional list of 2-4 suggested answers (or null)
|
|
137
|
+
|
|
138
|
+
Return a JSON array of these objects. Return at most 5 questions.
|
|
139
|
+
Order them by importance (most important first).
|
|
140
|
+
|
|
141
|
+
User prompt:
|
|
142
|
+
\"\"\"
|
|
143
|
+
{prompt}
|
|
144
|
+
\"\"\"
|
|
145
|
+
|
|
146
|
+
Respond ONLY with the JSON array, no other text.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
_REFINE_PROMPT = """\
|
|
150
|
+
You are a prompt-engineering assistant. Given the original user prompt
|
|
151
|
+
and their answers to clarifying questions, produce a structured
|
|
152
|
+
specification.
|
|
153
|
+
|
|
154
|
+
Original prompt:
|
|
155
|
+
\"\"\"
|
|
156
|
+
{prompt}
|
|
157
|
+
\"\"\"
|
|
158
|
+
|
|
159
|
+
Clarifying Q&A:
|
|
160
|
+
{qa_text}
|
|
161
|
+
|
|
162
|
+
Produce a JSON object with these fields:
|
|
163
|
+
- "task_type": a short category label (e.g. "code_generation", "summarization", "data_extraction")
|
|
164
|
+
- "refined_prompt": an improved, unambiguous version of the user's prompt that incorporates all their answers
|
|
165
|
+
- "constraints": an object of key-value constraints extracted from answers
|
|
166
|
+
- "quality_criteria": a list of 2-5 criteria for judging output quality
|
|
167
|
+
- "decomposition_hints": a list of subtask descriptions if the task should be broken down (empty list if atomic)
|
|
168
|
+
- "estimated_complexity": a float 0.0-1.0 (0=trivial, 1=very complex)
|
|
169
|
+
|
|
170
|
+
Respond ONLY with the JSON object, no other text.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
_CLASSIFY_PROMPT = """\
|
|
174
|
+
Classify the following user prompt into exactly one task type.
|
|
175
|
+
Choose from: code_generation, summarization, data_extraction, question_answering,
|
|
176
|
+
creative_writing, analysis, transformation, general.
|
|
177
|
+
|
|
178
|
+
User prompt: "{prompt}"
|
|
179
|
+
|
|
180
|
+
Respond with ONLY the task type label, nothing else.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
# IntentRefiner
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class IntentRefiner:
|
|
190
|
+
"""Bayesian elicitation engine that turns vague prompts into structured specs.
|
|
191
|
+
|
|
192
|
+
Uses an LLM to generate clarifying questions, ranks them by expected
|
|
193
|
+
information gain (drawn from per-type Beta priors), and converges to
|
|
194
|
+
a structured :class:`IntentSpec` once marginal gain drops below a
|
|
195
|
+
learned threshold.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
provider: LLM provider for meta-prompting.
|
|
199
|
+
priors: Adaptive priors for learning question effectiveness.
|
|
200
|
+
model: Default model to use for meta-prompts.
|
|
201
|
+
min_info_gain: Stop asking when the best remaining question has
|
|
202
|
+
information gain below this threshold.
|
|
203
|
+
max_questions: Hard cap on questions per session.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
def __init__(
|
|
207
|
+
self,
|
|
208
|
+
provider: LLMProvider,
|
|
209
|
+
priors: AdaptivePriors | None = None,
|
|
210
|
+
model: str = "gpt-4o-mini",
|
|
211
|
+
min_info_gain: float = 0.15,
|
|
212
|
+
max_questions: int = 5,
|
|
213
|
+
) -> None:
|
|
214
|
+
self.provider = provider
|
|
215
|
+
self.priors = priors or AdaptivePriors()
|
|
216
|
+
self.model = model
|
|
217
|
+
self.min_info_gain = min_info_gain
|
|
218
|
+
self.max_questions = max_questions
|
|
219
|
+
self._question_priors: dict[str, BetaPrior] = {
|
|
220
|
+
qt: BetaPrior(alpha=p.alpha, beta=p.beta)
|
|
221
|
+
for qt, p in _DEFAULT_QUESTION_PRIORS.items()
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
# -- question prior management -------------------------------------------
|
|
225
|
+
|
|
226
|
+
def _get_question_prior(self, question_type: str) -> BetaPrior:
|
|
227
|
+
"""Get or create the effectiveness prior for a question type."""
|
|
228
|
+
if question_type not in self._question_priors:
|
|
229
|
+
self._question_priors[question_type] = BetaPrior(alpha=1.5, beta=1.5)
|
|
230
|
+
return self._question_priors[question_type]
|
|
231
|
+
|
|
232
|
+
def _compute_info_gain(self, question_type: str) -> float:
|
|
233
|
+
"""Compute expected information gain for a question type.
|
|
234
|
+
|
|
235
|
+
Information gain = prior_mean * (1 - confidence).
|
|
236
|
+
High impact + high uncertainty = high gain (explore).
|
|
237
|
+
High impact + high confidence = moderate gain (exploit).
|
|
238
|
+
Low impact = low gain regardless.
|
|
239
|
+
"""
|
|
240
|
+
prior = self._get_question_prior(question_type)
|
|
241
|
+
return prior.mean * (1.0 - prior.confidence)
|
|
242
|
+
|
|
243
|
+
# -- core API ------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
def classify_task(self, prompt: str) -> str:
|
|
246
|
+
"""Classify a prompt into a task type using the LLM.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
prompt: The user's original prompt.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
A task type string.
|
|
253
|
+
"""
|
|
254
|
+
classify_prompt = _CLASSIFY_PROMPT.format(prompt=prompt)
|
|
255
|
+
response: LLMResponse = self.provider.complete(classify_prompt, self.model)
|
|
256
|
+
task_type = response.content.strip().lower().replace('"', "").replace("'", "")
|
|
257
|
+
# Validate against known types
|
|
258
|
+
known_types = {
|
|
259
|
+
"code_generation", "summarization", "data_extraction",
|
|
260
|
+
"question_answering", "creative_writing", "analysis",
|
|
261
|
+
"transformation", "general",
|
|
262
|
+
}
|
|
263
|
+
if task_type not in known_types:
|
|
264
|
+
task_type = "general"
|
|
265
|
+
return task_type
|
|
266
|
+
|
|
267
|
+
def analyze(self, prompt: str) -> list[ClarifyingQuestion]:
|
|
268
|
+
"""Generate clarifying questions ranked by expected information gain.
|
|
269
|
+
|
|
270
|
+
Sends the prompt to the LLM with a meta-prompt asking it to
|
|
271
|
+
identify ambiguities, then ranks the resulting questions using
|
|
272
|
+
per-type Bayesian priors.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
prompt: The user's original prompt.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
List of :class:`ClarifyingQuestion` objects, sorted by
|
|
279
|
+
information gain (highest first).
|
|
280
|
+
"""
|
|
281
|
+
question_types_str = ", ".join(QUESTION_TYPES)
|
|
282
|
+
analyze_prompt = _ANALYZE_PROMPT.format(
|
|
283
|
+
prompt=prompt, question_types=question_types_str
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
response: LLMResponse = self.provider.complete(analyze_prompt, self.model)
|
|
287
|
+
raw = response.content.strip()
|
|
288
|
+
|
|
289
|
+
# Parse the JSON response
|
|
290
|
+
questions = self._parse_questions(raw)
|
|
291
|
+
|
|
292
|
+
# Score and sort by information gain
|
|
293
|
+
for q in questions:
|
|
294
|
+
q.prior = self._get_question_prior(q.question_type)
|
|
295
|
+
q.information_gain = self._compute_info_gain(q.question_type)
|
|
296
|
+
|
|
297
|
+
questions.sort(key=lambda q: q.information_gain, reverse=True)
|
|
298
|
+
return questions
|
|
299
|
+
|
|
300
|
+
def ask(self, session: ElicitationSession) -> ClarifyingQuestion | None:
|
|
301
|
+
"""Return the next best question to ask, or ``None`` if enough info gathered.
|
|
302
|
+
|
|
303
|
+
Implements Bayesian stopping: stops when marginal info gain drops
|
|
304
|
+
below :attr:`min_info_gain` or :attr:`max_questions` is reached.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
session: The current elicitation session.
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
The next question to ask, or ``None`` to stop.
|
|
311
|
+
"""
|
|
312
|
+
# Check hard cap
|
|
313
|
+
if len(session.questions_asked) >= self.max_questions:
|
|
314
|
+
return None
|
|
315
|
+
|
|
316
|
+
# Generate fresh questions if this is the first call
|
|
317
|
+
if not session.questions_asked:
|
|
318
|
+
candidates = self.analyze(session.original_prompt)
|
|
319
|
+
else:
|
|
320
|
+
# Re-analyze with context of previous answers
|
|
321
|
+
qa_context = "\n".join(
|
|
322
|
+
f"Q ({qt}): {q.text}\nA: {session.answers.get(qt, '(unanswered)')}"
|
|
323
|
+
for q in session.questions_asked
|
|
324
|
+
for qt in [q.question_type]
|
|
325
|
+
)
|
|
326
|
+
enriched = (
|
|
327
|
+
f"{session.original_prompt}\n\n"
|
|
328
|
+
f"Already answered:\n{qa_context}\n\n"
|
|
329
|
+
f"What else is still unclear or ambiguous?"
|
|
330
|
+
)
|
|
331
|
+
candidates = self.analyze(enriched)
|
|
332
|
+
|
|
333
|
+
# Filter out already-asked types
|
|
334
|
+
asked_types = {q.question_type for q in session.questions_asked}
|
|
335
|
+
candidates = [q for q in candidates if q.question_type not in asked_types]
|
|
336
|
+
|
|
337
|
+
if not candidates:
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
best = candidates[0]
|
|
341
|
+
if best.information_gain < self.min_info_gain:
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
return best
|
|
345
|
+
|
|
346
|
+
def refine(
|
|
347
|
+
self, prompt: str, answers: dict[str, str]
|
|
348
|
+
) -> IntentSpec:
|
|
349
|
+
"""Synthesize a structured :class:`IntentSpec` from prompt + answers.
|
|
350
|
+
|
|
351
|
+
Uses the LLM to combine the original prompt with all gathered
|
|
352
|
+
answers into a well-formed specification.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
prompt: The user's original prompt.
|
|
356
|
+
answers: Dict mapping question_type → user answer.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
A structured :class:`IntentSpec`.
|
|
360
|
+
"""
|
|
361
|
+
qa_text = "\n".join(
|
|
362
|
+
f"Q ({qt}): {ans}" for qt, ans in answers.items()
|
|
363
|
+
)
|
|
364
|
+
refine_prompt = _REFINE_PROMPT.format(prompt=prompt, qa_text=qa_text)
|
|
365
|
+
|
|
366
|
+
response: LLMResponse = self.provider.complete(refine_prompt, self.model)
|
|
367
|
+
raw = response.content.strip()
|
|
368
|
+
|
|
369
|
+
spec = self._parse_spec(raw, prompt)
|
|
370
|
+
return spec
|
|
371
|
+
|
|
372
|
+
def run_session(
|
|
373
|
+
self,
|
|
374
|
+
prompt: str,
|
|
375
|
+
answer_func: Any | None = None,
|
|
376
|
+
) -> ElicitationSession:
|
|
377
|
+
"""Run a complete elicitation session programmatically.
|
|
378
|
+
|
|
379
|
+
If *answer_func* is ``None``, gathers questions without answers
|
|
380
|
+
(useful for getting the question list). Otherwise, calls
|
|
381
|
+
``answer_func(question)`` for each question to get the answer.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
prompt: The user's original prompt.
|
|
385
|
+
answer_func: Optional callable ``(ClarifyingQuestion) -> str``.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
The completed :class:`ElicitationSession`.
|
|
389
|
+
"""
|
|
390
|
+
session = ElicitationSession(original_prompt=prompt)
|
|
391
|
+
session.task_type = self.classify_task(prompt)
|
|
392
|
+
|
|
393
|
+
while True:
|
|
394
|
+
question = self.ask(session)
|
|
395
|
+
if question is None:
|
|
396
|
+
break
|
|
397
|
+
|
|
398
|
+
session.questions_asked.append(question)
|
|
399
|
+
|
|
400
|
+
if answer_func is not None:
|
|
401
|
+
answer = answer_func(question)
|
|
402
|
+
session.answers[question.question_type] = answer
|
|
403
|
+
|
|
404
|
+
if session.answers:
|
|
405
|
+
session.refined_spec = self.refine(prompt, session.answers)
|
|
406
|
+
else:
|
|
407
|
+
# No questions asked or no answers — create a minimal spec
|
|
408
|
+
session.refined_spec = IntentSpec(
|
|
409
|
+
task_type=session.task_type,
|
|
410
|
+
original_prompt=prompt,
|
|
411
|
+
refined_prompt=prompt,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
return session
|
|
415
|
+
|
|
416
|
+
def observe_outcome(
|
|
417
|
+
self,
|
|
418
|
+
session: ElicitationSession,
|
|
419
|
+
final_score: float,
|
|
420
|
+
) -> None:
|
|
421
|
+
"""Update question priors based on the final outcome score.
|
|
422
|
+
|
|
423
|
+
Questions asked in sessions with high final scores get positive
|
|
424
|
+
updates; those in low-scoring sessions get negative updates.
|
|
425
|
+
The threshold is 0.7 (above = positive).
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
session: The completed elicitation session.
|
|
429
|
+
final_score: Final quality score of the task output.
|
|
430
|
+
"""
|
|
431
|
+
success = final_score >= 0.7
|
|
432
|
+
for q in session.questions_asked:
|
|
433
|
+
prior = self._get_question_prior(q.question_type)
|
|
434
|
+
prior.update(success)
|
|
435
|
+
logger.debug(
|
|
436
|
+
"question_prior_updated",
|
|
437
|
+
question_type=q.question_type,
|
|
438
|
+
success=success,
|
|
439
|
+
new_mean=prior.mean,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# -- parsing helpers -----------------------------------------------------
|
|
443
|
+
|
|
444
|
+
def _parse_questions(self, raw: str) -> list[ClarifyingQuestion]:
|
|
445
|
+
"""Parse LLM response into ClarifyingQuestion objects."""
|
|
446
|
+
# Try to extract JSON array from the response
|
|
447
|
+
raw = raw.strip()
|
|
448
|
+
if not raw.startswith("["):
|
|
449
|
+
# Try to find JSON array in the response
|
|
450
|
+
start = raw.find("[")
|
|
451
|
+
end = raw.rfind("]")
|
|
452
|
+
if start >= 0 and end > start:
|
|
453
|
+
raw = raw[start : end + 1]
|
|
454
|
+
else:
|
|
455
|
+
logger.warning("failed_to_parse_questions", raw=raw[:200])
|
|
456
|
+
return []
|
|
457
|
+
|
|
458
|
+
try:
|
|
459
|
+
items = json.loads(raw)
|
|
460
|
+
except json.JSONDecodeError:
|
|
461
|
+
logger.warning("json_parse_failed", raw=raw[:200])
|
|
462
|
+
return []
|
|
463
|
+
|
|
464
|
+
questions: list[ClarifyingQuestion] = []
|
|
465
|
+
for item in items:
|
|
466
|
+
if not isinstance(item, dict):
|
|
467
|
+
continue
|
|
468
|
+
qt = item.get("question_type", "general")
|
|
469
|
+
if qt not in QUESTION_TYPES:
|
|
470
|
+
qt = "scope" # Default to scope for unknown types
|
|
471
|
+
questions.append(
|
|
472
|
+
ClarifyingQuestion(
|
|
473
|
+
text=item.get("question", ""),
|
|
474
|
+
question_type=qt,
|
|
475
|
+
options=item.get("options"),
|
|
476
|
+
)
|
|
477
|
+
)
|
|
478
|
+
return questions
|
|
479
|
+
|
|
480
|
+
def _parse_spec(self, raw: str, original_prompt: str) -> IntentSpec:
|
|
481
|
+
"""Parse LLM response into an IntentSpec."""
|
|
482
|
+
raw = raw.strip()
|
|
483
|
+
if not raw.startswith("{"):
|
|
484
|
+
start = raw.find("{")
|
|
485
|
+
end = raw.rfind("}")
|
|
486
|
+
if start >= 0 and end > start:
|
|
487
|
+
raw = raw[start : end + 1]
|
|
488
|
+
else:
|
|
489
|
+
# Fallback: treat entire response as refined prompt
|
|
490
|
+
return IntentSpec(
|
|
491
|
+
original_prompt=original_prompt,
|
|
492
|
+
refined_prompt=raw or original_prompt,
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
try:
|
|
496
|
+
data = json.loads(raw)
|
|
497
|
+
except json.JSONDecodeError:
|
|
498
|
+
return IntentSpec(
|
|
499
|
+
original_prompt=original_prompt,
|
|
500
|
+
refined_prompt=raw or original_prompt,
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
complexity = data.get("estimated_complexity", 0.5)
|
|
504
|
+
if isinstance(complexity, str):
|
|
505
|
+
try:
|
|
506
|
+
complexity = float(complexity)
|
|
507
|
+
except ValueError:
|
|
508
|
+
complexity = 0.5
|
|
509
|
+
complexity = max(0.0, min(1.0, complexity))
|
|
510
|
+
|
|
511
|
+
return IntentSpec(
|
|
512
|
+
task_type=data.get("task_type", "general"),
|
|
513
|
+
original_prompt=original_prompt,
|
|
514
|
+
refined_prompt=data.get("refined_prompt", original_prompt),
|
|
515
|
+
constraints=data.get("constraints", {}),
|
|
516
|
+
quality_criteria=data.get("quality_criteria", []),
|
|
517
|
+
decomposition_hints=data.get("decomposition_hints", []),
|
|
518
|
+
estimated_complexity=complexity,
|
|
519
|
+
)
|