loopllm 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loopllm/tasks.py ADDED
@@ -0,0 +1,599 @@
1
+ """Task model and orchestrator for multi-step workflows.
2
+
3
+ Decomposes an :class:`IntentSpec` into a dependency-ordered graph
4
+ of subtasks, executes each through :class:`LoopedLLM`, and assembles
5
+ the final result.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import uuid
11
+ from dataclasses import dataclass, field
12
+ from enum import Enum
13
+ from typing import Any
14
+
15
+ import structlog
16
+
17
+ from loopllm.elicitation import IntentRefiner, IntentSpec
18
+ from loopllm.engine import (
19
+ EvaluationResult,
20
+ LoopConfig,
21
+ LoopedLLM,
22
+ RefinementResult,
23
+ )
24
+ from loopllm.evaluators import LengthEvaluator
25
+ from loopllm.priors import AdaptivePriors, CallObservation
26
+ from loopllm.provider import LLMProvider
27
+ from loopllm.store import LoopStore
28
+
29
+ logger = structlog.get_logger(__name__)
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Task state machine
34
+ # ---------------------------------------------------------------------------
35
+
36
+
37
+ class TaskState(str, Enum):
38
+ """Lifecycle state of a task."""
39
+
40
+ PENDING = "pending"
41
+ IN_PROGRESS = "in_progress"
42
+ COMPLETED = "completed"
43
+ VERIFIED = "verified"
44
+ FAILED = "failed"
45
+ BLOCKED = "blocked"
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Task data model
50
+ # ---------------------------------------------------------------------------
51
+
52
+
53
+ @dataclass
54
+ class Task:
55
+ """A single unit of work in a task plan.
56
+
57
+ Attributes:
58
+ id: Unique identifier.
59
+ parent_id: ID of the parent task (``None`` for root tasks).
60
+ title: Short description.
61
+ description: Full description / instructions.
62
+ state: Current lifecycle state.
63
+ dependencies: IDs of tasks that must complete before this one.
64
+ intent_spec: Optional structured spec for this subtask.
65
+ result: Refinement result once executed.
66
+ metadata: Arbitrary extra data.
67
+ """
68
+
69
+ id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])
70
+ parent_id: str | None = None
71
+ title: str = ""
72
+ description: str = ""
73
+ state: TaskState = TaskState.PENDING
74
+ dependencies: list[str] = field(default_factory=list)
75
+ intent_spec: IntentSpec | None = None
76
+ result: RefinementResult | None = None
77
+ metadata: dict[str, Any] = field(default_factory=dict)
78
+
79
+
80
+ @dataclass
81
+ class TaskPlan:
82
+ """An ordered collection of tasks with dependency information.
83
+
84
+ Attributes:
85
+ tasks: All tasks in the plan.
86
+ dependency_graph: Mapping of task ID → list of dependency IDs.
87
+ estimated_total_cost: Rough cost estimate (token-based).
88
+ session_id: ID of the elicitation session that produced this plan.
89
+ """
90
+
91
+ tasks: list[Task] = field(default_factory=list)
92
+ dependency_graph: dict[str, list[str]] = field(default_factory=dict)
93
+ estimated_total_cost: float = 0.0
94
+ session_id: str = ""
95
+
96
+ def execution_order(self) -> list[Task]:
97
+ """Return tasks in topological order respecting dependencies.
98
+
99
+ Uses Kahn's algorithm. Raises :class:`ValueError` if the
100
+ dependency graph contains cycles.
101
+
102
+ Returns:
103
+ Tasks sorted so that each task appears after all its dependencies.
104
+ """
105
+ task_map = {t.id: t for t in self.tasks}
106
+ in_degree: dict[str, int] = {t.id: 0 for t in self.tasks}
107
+ adj: dict[str, list[str]] = {t.id: [] for t in self.tasks}
108
+
109
+ for task in self.tasks:
110
+ deps = self.dependency_graph.get(task.id, task.dependencies)
111
+ for dep_id in deps:
112
+ if dep_id in adj:
113
+ adj[dep_id].append(task.id)
114
+ in_degree[task.id] += 1
115
+
116
+ queue = [tid for tid, deg in in_degree.items() if deg == 0]
117
+ ordered: list[Task] = []
118
+
119
+ while queue:
120
+ tid = queue.pop(0)
121
+ ordered.append(task_map[tid])
122
+ for next_id in adj.get(tid, []):
123
+ in_degree[next_id] -= 1
124
+ if in_degree[next_id] == 0:
125
+ queue.append(next_id)
126
+
127
+ if len(ordered) != len(self.tasks):
128
+ msg = "Dependency graph contains a cycle"
129
+ raise ValueError(msg)
130
+
131
+ return ordered
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # Prompt templates for task decomposition
136
+ # ---------------------------------------------------------------------------
137
+
138
+ _DECOMPOSE_PROMPT = """\
139
+ You are a task decomposition assistant. Given the following structured
140
+ specification, break it into discrete subtasks that can be executed
141
+ independently (with explicit dependencies where needed).
142
+
143
+ Specification:
144
+ - Task type: {task_type}
145
+ - Prompt: {refined_prompt}
146
+ - Constraints: {constraints}
147
+ - Quality criteria: {quality_criteria}
148
+ - Decomposition hints: {decomposition_hints}
149
+ - Estimated complexity: {estimated_complexity}
150
+
151
+ Produce a JSON array of subtask objects. Each subtask has:
152
+ - "title": short label (3-8 words)
153
+ - "description": detailed instructions for the subtask
154
+ - "dependencies": list of titles of subtasks this one depends on (empty for independent)
155
+ - "estimated_complexity": float 0.0-1.0
156
+
157
+ If the task is simple enough to do in one step, return a single-element array.
158
+ Return ONLY the JSON array.
159
+ """
160
+
161
+ _ASSEMBLE_PROMPT = """\
162
+ You are a result assembler. Given the following subtask results,
163
+ combine them into a single coherent output that addresses the original
164
+ prompt.
165
+
166
+ Original prompt: {original_prompt}
167
+
168
+ Subtask results:
169
+ {subtask_results}
170
+
171
+ Produce a single, cohesive output that integrates all subtask results.
172
+ Do not repeat the prompt or explain what you did — just produce the final output.
173
+ """
174
+
175
+ _VERIFY_PROMPT = """\
176
+ You are a quality verifier. Check whether the following output
177
+ addresses all the requirements from the original specification.
178
+
179
+ Specification:
180
+ - Prompt: {refined_prompt}
181
+ - Quality criteria: {quality_criteria}
182
+
183
+ Output to verify:
184
+ \"\"\"
185
+ {output}
186
+ \"\"\"
187
+
188
+ For each quality criterion, rate it 0.0-1.0.
189
+ Return a JSON object with:
190
+ - "overall_score": float 0.0-1.0
191
+ - "criteria_scores": object mapping criterion → score
192
+ - "issues": list of strings describing problems (empty if none)
193
+
194
+ Return ONLY the JSON object.
195
+ """
196
+
197
+
198
+ # ---------------------------------------------------------------------------
199
+ # TaskOrchestrator
200
+ # ---------------------------------------------------------------------------
201
+
202
+
203
+ class TaskOrchestrator:
204
+ """Decompose, execute, and verify multi-step LLM tasks.
205
+
206
+ Integrates :class:`IntentRefiner` for elicitation, :class:`LoopedLLM`
207
+ for per-subtask refinement, and :class:`AdaptivePriors` for learning
208
+ optimal decomposition strategies.
209
+
210
+ Args:
211
+ provider: LLM provider for all calls.
212
+ priors: Adaptive priors for learning.
213
+ store: Optional persistent store.
214
+ refiner: Optional intent refiner (created automatically if not given).
215
+ model: Default model to use.
216
+ """
217
+
218
+ def __init__(
219
+ self,
220
+ provider: LLMProvider,
221
+ priors: AdaptivePriors | None = None,
222
+ store: LoopStore | None = None,
223
+ refiner: IntentRefiner | None = None,
224
+ model: str = "gpt-4o-mini",
225
+ ) -> None:
226
+ self.provider = provider
227
+ self.priors = priors or AdaptivePriors()
228
+ self.store = store
229
+ self.refiner = refiner or IntentRefiner(
230
+ provider=provider, priors=self.priors, model=model
231
+ )
232
+ self.model = model
233
+
234
+ # -- decomposition -------------------------------------------------------
235
+
236
+ def plan(self, spec: IntentSpec) -> TaskPlan:
237
+ """Decompose an :class:`IntentSpec` into a :class:`TaskPlan`.
238
+
239
+ Uses the LLM to generate subtasks with dependency ordering.
240
+ Simple tasks (complexity < 0.3) are kept as a single task.
241
+
242
+ Args:
243
+ spec: The structured specification.
244
+
245
+ Returns:
246
+ A :class:`TaskPlan` with ordered subtasks.
247
+ """
248
+ # Simple tasks don't need decomposition
249
+ if spec.estimated_complexity < 0.3 and not spec.decomposition_hints:
250
+ task = Task(
251
+ title="Execute task",
252
+ description=spec.refined_prompt,
253
+ intent_spec=spec,
254
+ )
255
+ return TaskPlan(
256
+ tasks=[task],
257
+ dependency_graph={task.id: []},
258
+ )
259
+
260
+ # Use the LLM to decompose
261
+ decompose_prompt = _DECOMPOSE_PROMPT.format(
262
+ task_type=spec.task_type,
263
+ refined_prompt=spec.refined_prompt,
264
+ constraints=json.dumps(spec.constraints),
265
+ quality_criteria=json.dumps(spec.quality_criteria),
266
+ decomposition_hints=json.dumps(spec.decomposition_hints),
267
+ estimated_complexity=spec.estimated_complexity,
268
+ )
269
+
270
+ response = self.provider.complete(decompose_prompt, self.model)
271
+ raw = response.content.strip()
272
+
273
+ tasks = self._parse_tasks(raw, spec)
274
+ dep_graph = self._build_dependency_graph(tasks)
275
+
276
+ plan = TaskPlan(
277
+ tasks=tasks,
278
+ dependency_graph=dep_graph,
279
+ )
280
+
281
+ # Persist tasks
282
+ if self.store:
283
+ for task in tasks:
284
+ self.store.save_task({
285
+ "id": task.id,
286
+ "parent_id": task.parent_id,
287
+ "title": task.title,
288
+ "description": task.description,
289
+ "state": task.state.value,
290
+ "dependencies": task.dependencies,
291
+ "spec": {
292
+ "task_type": spec.task_type,
293
+ "refined_prompt": task.description,
294
+ } if task.intent_spec else None,
295
+ })
296
+
297
+ logger.info("task_plan_created", task_count=len(tasks))
298
+ return plan
299
+
300
+ # -- execution -----------------------------------------------------------
301
+
302
+ def execute(
303
+ self, plan: TaskPlan, model: str | None = None
304
+ ) -> dict[str, RefinementResult]:
305
+ """Execute all tasks in a plan in dependency order.
306
+
307
+ Each task is refined using :class:`LoopedLLM` with adaptive
308
+ exit conditions. Prior task outputs are passed as context
309
+ to dependent tasks.
310
+
311
+ Args:
312
+ plan: The task plan to execute.
313
+ model: Model override (defaults to ``self.model``).
314
+
315
+ Returns:
316
+ Dict mapping task ID to :class:`RefinementResult`.
317
+ """
318
+ model = model or self.model
319
+ results: dict[str, RefinementResult] = {}
320
+
321
+ depth = self.priors.predict_optimal_depth(
322
+ "orchestrated_subtask", model
323
+ )
324
+ config = LoopConfig(
325
+ max_iterations=max(depth, 2),
326
+ quality_threshold=0.75,
327
+ )
328
+
329
+ for task in plan.execution_order():
330
+ task.state = TaskState.IN_PROGRESS
331
+ if self.store:
332
+ self.store.update_task_state(task.id, task.state.value)
333
+
334
+ logger.info("executing_task", task_id=task.id, title=task.title)
335
+
336
+ # Build context from dependency results
337
+ dep_context = ""
338
+ for dep_id in plan.dependency_graph.get(task.id, task.dependencies):
339
+ if dep_id in results:
340
+ dep_context += f"\n--- Result from '{dep_id}' ---\n"
341
+ dep_context += results[dep_id].output + "\n"
342
+
343
+ prompt = task.description
344
+ if dep_context:
345
+ prompt = (
346
+ f"{task.description}\n\n"
347
+ f"Context from previous steps:\n{dep_context}"
348
+ )
349
+
350
+ evaluator = LengthEvaluator(min_words=5, max_words=10_000)
351
+ loop = LoopedLLM(provider=self.provider, config=config)
352
+
353
+ try:
354
+ result = loop.refine(prompt, evaluator, model=model)
355
+ task.result = result
356
+ task.state = TaskState.COMPLETED
357
+ results[task.id] = result
358
+
359
+ # Learn from this subtask execution
360
+ obs = CallObservation(
361
+ task_type="orchestrated_subtask",
362
+ model_id=model,
363
+ scores=result.metrics.score_trajectory,
364
+ latencies_ms=[it.latency_ms for it in result.iterations],
365
+ converged=result.metrics.converged,
366
+ total_iterations=result.metrics.total_iterations,
367
+ max_iterations=config.max_iterations,
368
+ quality_threshold=config.quality_threshold,
369
+ )
370
+ self.priors.observe(obs)
371
+
372
+ except Exception as exc:
373
+ logger.error("task_failed", task_id=task.id, error=str(exc))
374
+ task.state = TaskState.FAILED
375
+ task.metadata["error"] = str(exc)
376
+
377
+ if self.store:
378
+ self.store.update_task_state(task.id, task.state.value)
379
+
380
+ return results
381
+
382
+ # -- verification --------------------------------------------------------
383
+
384
+ def verify(
385
+ self,
386
+ spec: IntentSpec,
387
+ output: str,
388
+ ) -> EvaluationResult:
389
+ """Verify a combined output against the original spec.
390
+
391
+ Uses the LLM to check quality criteria, then parses
392
+ the structured response into an :class:`EvaluationResult`.
393
+
394
+ Args:
395
+ spec: The original specification.
396
+ output: The assembled output to verify.
397
+
398
+ Returns:
399
+ An :class:`EvaluationResult` with per-criterion scores.
400
+ """
401
+ verify_prompt = _VERIFY_PROMPT.format(
402
+ refined_prompt=spec.refined_prompt,
403
+ quality_criteria=json.dumps(spec.quality_criteria),
404
+ output=output,
405
+ )
406
+
407
+ response = self.provider.complete(verify_prompt, self.model)
408
+ return self._parse_verification(response.content)
409
+
410
+ # -- full pipeline -------------------------------------------------------
411
+
412
+ def run(
413
+ self,
414
+ prompt: str,
415
+ model: str | None = None,
416
+ answer_func: Any | None = None,
417
+ ) -> RefinementResult:
418
+ """Run the full pipeline: elicit → plan → execute → assemble.
419
+
420
+ This is the main entry point for end-to-end task processing.
421
+
422
+ Args:
423
+ prompt: The user's original prompt.
424
+ model: Model override.
425
+ answer_func: Optional function for interactive elicitation.
426
+
427
+ Returns:
428
+ The final assembled :class:`RefinementResult`.
429
+ """
430
+ model = model or self.model
431
+
432
+ # Step 1: Elicit intent
433
+ logger.info("pipeline_elicit", prompt=prompt[:80])
434
+ session = self.refiner.run_session(prompt, answer_func=answer_func)
435
+ spec = session.refined_spec or IntentSpec(
436
+ original_prompt=prompt, refined_prompt=prompt
437
+ )
438
+
439
+ # Step 2: Plan
440
+ logger.info("pipeline_plan", task_type=spec.task_type)
441
+ plan = self.plan(spec)
442
+ plan.session_id = session.session_id
443
+
444
+ # Step 3: Execute
445
+ logger.info("pipeline_execute", task_count=len(plan.tasks))
446
+ results = self.execute(plan, model=model)
447
+
448
+ # Step 4: Assemble
449
+ if len(results) == 1:
450
+ # Single task — return directly
451
+ final_result = next(iter(results.values()))
452
+ else:
453
+ # Multiple tasks — assemble results
454
+ final_result = self._assemble(spec, plan, results, model)
455
+
456
+ # Step 5: Learn from outcome
457
+ self.refiner.observe_outcome(
458
+ session, final_score=final_result.metrics.best_score
459
+ )
460
+
461
+ logger.info(
462
+ "pipeline_complete",
463
+ tasks=len(plan.tasks),
464
+ best_score=final_result.metrics.best_score,
465
+ )
466
+ return final_result
467
+
468
+ # -- assembly ------------------------------------------------------------
469
+
470
+ def _assemble(
471
+ self,
472
+ spec: IntentSpec,
473
+ plan: TaskPlan,
474
+ results: dict[str, RefinementResult],
475
+ model: str,
476
+ ) -> RefinementResult:
477
+ """Assemble subtask results into a single output."""
478
+ subtask_text = ""
479
+ for task in plan.execution_order():
480
+ if task.id in results:
481
+ subtask_text += (
482
+ f"\n--- {task.title} ---\n"
483
+ f"{results[task.id].output}\n"
484
+ )
485
+
486
+ assemble_prompt = _ASSEMBLE_PROMPT.format(
487
+ original_prompt=spec.original_prompt,
488
+ subtask_results=subtask_text,
489
+ )
490
+
491
+ evaluator = LengthEvaluator(min_words=10, max_words=10_000)
492
+ config = LoopConfig(max_iterations=2, quality_threshold=0.8)
493
+ loop = LoopedLLM(provider=self.provider, config=config)
494
+
495
+ return loop.refine(assemble_prompt, evaluator, model=model)
496
+
497
+ # -- parsing helpers -----------------------------------------------------
498
+
499
+ def _parse_tasks(
500
+ self, raw: str, spec: IntentSpec
501
+ ) -> list[Task]:
502
+ """Parse LLM decomposition response into Task objects."""
503
+ raw = raw.strip()
504
+ if not raw.startswith("["):
505
+ start = raw.find("[")
506
+ end = raw.rfind("]")
507
+ if start >= 0 and end > start:
508
+ raw = raw[start : end + 1]
509
+ else:
510
+ # Can't parse — create a single task
511
+ return [
512
+ Task(
513
+ title="Execute task",
514
+ description=spec.refined_prompt,
515
+ intent_spec=spec,
516
+ )
517
+ ]
518
+
519
+ try:
520
+ items = json.loads(raw)
521
+ except json.JSONDecodeError:
522
+ return [
523
+ Task(
524
+ title="Execute task",
525
+ description=spec.refined_prompt,
526
+ intent_spec=spec,
527
+ )
528
+ ]
529
+
530
+ tasks: list[Task] = []
531
+ title_to_id: dict[str, str] = {}
532
+
533
+ for item in items:
534
+ if not isinstance(item, dict):
535
+ continue
536
+ task = Task(
537
+ title=item.get("title", "Subtask"),
538
+ description=item.get("description", ""),
539
+ intent_spec=spec,
540
+ metadata={"estimated_complexity": item.get("estimated_complexity", 0.5)},
541
+ )
542
+ title_to_id[task.title] = task.id
543
+ tasks.append(task)
544
+
545
+ # Resolve title-based dependencies to IDs
546
+ for item, task in zip(items, tasks):
547
+ if not isinstance(item, dict):
548
+ continue
549
+ dep_titles = item.get("dependencies", [])
550
+ for dt in dep_titles:
551
+ if dt in title_to_id:
552
+ task.dependencies.append(title_to_id[dt])
553
+
554
+ return tasks if tasks else [
555
+ Task(title="Execute task", description=spec.refined_prompt, intent_spec=spec)
556
+ ]
557
+
558
+ def _build_dependency_graph(
559
+ self, tasks: list[Task]
560
+ ) -> dict[str, list[str]]:
561
+ """Build a dependency graph from task objects."""
562
+ return {task.id: list(task.dependencies) for task in tasks}
563
+
564
+ def _parse_verification(self, raw: str) -> EvaluationResult:
565
+ """Parse LLM verification response into an EvaluationResult."""
566
+ raw = raw.strip()
567
+ if not raw.startswith("{"):
568
+ start = raw.find("{")
569
+ end = raw.rfind("}")
570
+ if start >= 0 and end > start:
571
+ raw = raw[start : end + 1]
572
+ else:
573
+ return EvaluationResult(
574
+ score=0.5, passed=False,
575
+ deficiencies=["Could not parse verification response"],
576
+ )
577
+
578
+ try:
579
+ data = json.loads(raw)
580
+ except json.JSONDecodeError:
581
+ return EvaluationResult(
582
+ score=0.5, passed=False,
583
+ deficiencies=["Invalid verification JSON"],
584
+ )
585
+
586
+ score = float(data.get("overall_score", 0.5))
587
+ score = max(0.0, min(1.0, score))
588
+ issues = data.get("issues", [])
589
+ sub_scores = {
590
+ str(k): float(v)
591
+ for k, v in data.get("criteria_scores", {}).items()
592
+ }
593
+
594
+ return EvaluationResult(
595
+ score=score,
596
+ passed=score >= 0.7 and not issues,
597
+ deficiencies=issues,
598
+ sub_scores=sub_scores,
599
+ )