deepwork 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. deepwork/__init__.py +1 -1
  2. deepwork/cli/hook.py +3 -4
  3. deepwork/cli/install.py +70 -117
  4. deepwork/cli/main.py +2 -2
  5. deepwork/cli/serve.py +133 -0
  6. deepwork/cli/sync.py +93 -58
  7. deepwork/core/adapters.py +91 -102
  8. deepwork/core/generator.py +19 -386
  9. deepwork/core/hooks_syncer.py +1 -1
  10. deepwork/core/parser.py +270 -1
  11. deepwork/hooks/README.md +0 -44
  12. deepwork/hooks/__init__.py +3 -6
  13. deepwork/hooks/check_version.sh +54 -21
  14. deepwork/mcp/__init__.py +23 -0
  15. deepwork/mcp/quality_gate.py +347 -0
  16. deepwork/mcp/schemas.py +263 -0
  17. deepwork/mcp/server.py +253 -0
  18. deepwork/mcp/state.py +422 -0
  19. deepwork/mcp/tools.py +394 -0
  20. deepwork/schemas/job.schema.json +347 -0
  21. deepwork/schemas/job_schema.py +27 -239
  22. deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md +9 -15
  23. deepwork/standard_jobs/deepwork_jobs/job.yml +146 -46
  24. deepwork/standard_jobs/deepwork_jobs/steps/define.md +100 -33
  25. deepwork/standard_jobs/deepwork_jobs/steps/errata.md +154 -0
  26. deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md +207 -0
  27. deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md +177 -0
  28. deepwork/standard_jobs/deepwork_jobs/steps/implement.md +22 -138
  29. deepwork/standard_jobs/deepwork_jobs/steps/iterate.md +221 -0
  30. deepwork/standard_jobs/deepwork_jobs/steps/learn.md +2 -26
  31. deepwork/standard_jobs/deepwork_jobs/steps/test.md +154 -0
  32. deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template +2 -0
  33. deepwork/templates/claude/settings.json +16 -0
  34. deepwork/templates/claude/skill-deepwork.md.jinja +37 -0
  35. deepwork/templates/gemini/skill-deepwork.md.jinja +37 -0
  36. deepwork-0.7.0.dist-info/METADATA +317 -0
  37. deepwork-0.7.0.dist-info/RECORD +64 -0
  38. deepwork/cli/rules.py +0 -32
  39. deepwork/core/command_executor.py +0 -190
  40. deepwork/core/pattern_matcher.py +0 -271
  41. deepwork/core/rules_parser.py +0 -559
  42. deepwork/core/rules_queue.py +0 -321
  43. deepwork/hooks/rules_check.py +0 -759
  44. deepwork/schemas/rules_schema.py +0 -135
  45. deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md +0 -208
  46. deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.example +0 -86
  47. deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh +0 -38
  48. deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml +0 -8
  49. deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh +0 -16
  50. deepwork/standard_jobs/deepwork_rules/job.yml +0 -49
  51. deepwork/standard_jobs/deepwork_rules/rules/.gitkeep +0 -13
  52. deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example +0 -10
  53. deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example +0 -10
  54. deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example +0 -11
  55. deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md +0 -46
  56. deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example +0 -13
  57. deepwork/standard_jobs/deepwork_rules/steps/define.md +0 -249
  58. deepwork/templates/claude/skill-job-meta.md.jinja +0 -77
  59. deepwork/templates/claude/skill-job-step.md.jinja +0 -235
  60. deepwork/templates/gemini/skill-job-meta.toml.jinja +0 -76
  61. deepwork/templates/gemini/skill-job-step.toml.jinja +0 -162
  62. deepwork-0.5.1.dist-info/METADATA +0 -381
  63. deepwork-0.5.1.dist-info/RECORD +0 -72
  64. {deepwork-0.5.1.dist-info → deepwork-0.7.0.dist-info}/WHEEL +0 -0
  65. {deepwork-0.5.1.dist-info → deepwork-0.7.0.dist-info}/entry_points.txt +0 -0
  66. {deepwork-0.5.1.dist-info → deepwork-0.7.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,347 @@
1
+ """Quality gate for evaluating step outputs.
2
+
3
+ The quality gate invokes a review agent (via subprocess) to evaluate
4
+ step outputs against quality criteria.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import json
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ import aiofiles
15
+
16
+ from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
17
+
18
+ # JSON Schema for quality gate response validation
19
+ QUALITY_GATE_RESPONSE_SCHEMA: dict[str, Any] = {
20
+ "type": "object",
21
+ "required": ["passed", "feedback"],
22
+ "properties": {
23
+ "passed": {"type": "boolean"},
24
+ "feedback": {"type": "string"},
25
+ "criteria_results": {
26
+ "type": "array",
27
+ "items": {
28
+ "type": "object",
29
+ "required": ["criterion", "passed"],
30
+ "properties": {
31
+ "criterion": {"type": "string"},
32
+ "passed": {"type": "boolean"},
33
+ "feedback": {"type": ["string", "null"]},
34
+ },
35
+ },
36
+ },
37
+ },
38
+ }
39
+
40
+ # File separator format: 20 dashes, filename, 20 dashes
41
+ FILE_SEPARATOR = "-" * 20
42
+
43
+
44
+ class QualityGateError(Exception):
45
+ """Exception raised for quality gate errors."""
46
+
47
+ pass
48
+
49
+
50
+ class QualityGate:
51
+ """Evaluates step outputs against quality criteria.
52
+
53
+ Uses a subprocess to invoke a review agent (e.g., Claude CLI) that
54
+ evaluates outputs and returns structured feedback.
55
+
56
+ See doc/reference/calling_claude_in_print_mode.md for details on
57
+ proper CLI invocation with structured output.
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ timeout: int = 120,
63
+ *,
64
+ _test_command: list[str] | None = None,
65
+ ):
66
+ """Initialize quality gate.
67
+
68
+ Args:
69
+ timeout: Timeout in seconds for review agent
70
+ _test_command: Internal testing only - override the subprocess command.
71
+ When set, skips adding --json-schema flag (test mock handles it).
72
+ """
73
+ self.timeout = timeout
74
+ self._test_command = _test_command
75
+
76
+ def _build_instructions(self, quality_criteria: list[str]) -> str:
77
+ """Build the system instructions for the review agent.
78
+
79
+ Args:
80
+ quality_criteria: List of quality criteria to evaluate
81
+
82
+ Returns:
83
+ System instructions string
84
+ """
85
+ criteria_list = "\n".join(f"- {c}" for c in quality_criteria)
86
+
87
+ return f"""You are a quality gate reviewer. Your job is to evaluate whether outputs meet the specified quality criteria.
88
+
89
+ ## Quality Criteria to Evaluate
90
+
91
+ {criteria_list}
92
+
93
+ ## Response Format
94
+
95
+ You must respond with JSON in this exact structure:
96
+ ```json
97
+ {{
98
+ "passed": true/false,
99
+ "feedback": "Brief overall summary of evaluation",
100
+ "criteria_results": [
101
+ {{
102
+ "criterion": "The criterion text",
103
+ "passed": true/false,
104
+ "feedback": "Specific feedback for this criterion (null if passed)"
105
+ }}
106
+ ]
107
+ }}
108
+ ```
109
+
110
+ ## Guidelines
111
+
112
+ - Be strict but fair
113
+ - Only mark a criterion as passed if it is clearly met
114
+ - Provide specific, actionable feedback for failed criteria
115
+ - The overall "passed" should be true only if ALL criteria pass"""
116
+
117
+ async def _build_payload(
118
+ self,
119
+ outputs: list[str],
120
+ project_root: Path,
121
+ ) -> str:
122
+ """Build the user prompt payload with file contents.
123
+
124
+ Args:
125
+ outputs: List of output file paths
126
+ project_root: Project root path for reading files
127
+
128
+ Returns:
129
+ Formatted payload with file contents
130
+ """
131
+ output_sections: list[str] = []
132
+
133
+ for output_path in outputs:
134
+ full_path = project_root / output_path
135
+ header = f"{FILE_SEPARATOR} {output_path} {FILE_SEPARATOR}"
136
+
137
+ if full_path.exists():
138
+ try:
139
+ async with aiofiles.open(full_path, encoding="utf-8") as f:
140
+ content = await f.read()
141
+ output_sections.append(f"{header}\n{content}")
142
+ except Exception as e:
143
+ output_sections.append(f"{header}\n[Error reading file: {e}]")
144
+ else:
145
+ output_sections.append(f"{header}\n[File not found]")
146
+
147
+ if not output_sections:
148
+ return "[No output files provided]"
149
+
150
+ return "\n\n".join(output_sections)
151
+
152
+ def _parse_response(self, response_text: str) -> QualityGateResult:
153
+ """Parse the review agent's response.
154
+
155
+ When using --print --output-format json --json-schema, Claude CLI returns
156
+ a wrapper object with the structured output in the 'structured_output' field.
157
+
158
+ Args:
159
+ response_text: Raw response from review agent (JSON wrapper)
160
+
161
+ Returns:
162
+ Parsed QualityGateResult
163
+
164
+ Raises:
165
+ QualityGateError: If response cannot be parsed
166
+ """
167
+ try:
168
+ wrapper = json.loads(response_text.strip())
169
+
170
+ # Check for errors in the wrapper
171
+ if wrapper.get("is_error"):
172
+ raise QualityGateError(
173
+ f"Review agent returned error: {wrapper.get('result', 'Unknown error')}"
174
+ )
175
+
176
+ # Extract structured_output - this is where --json-schema puts the result
177
+ data = wrapper.get("structured_output")
178
+ if data is None:
179
+ raise QualityGateError(
180
+ "Review agent response missing 'structured_output' field. "
181
+ f"Response was: {response_text[:500]}..."
182
+ )
183
+
184
+ # Parse criteria results
185
+ criteria_results = [
186
+ QualityCriteriaResult(
187
+ criterion=cr.get("criterion", ""),
188
+ passed=cr.get("passed", False),
189
+ feedback=cr.get("feedback"),
190
+ )
191
+ for cr in data.get("criteria_results", [])
192
+ ]
193
+
194
+ return QualityGateResult(
195
+ passed=data.get("passed", False),
196
+ feedback=data.get("feedback", "No feedback provided"),
197
+ criteria_results=criteria_results,
198
+ )
199
+
200
+ except json.JSONDecodeError as e:
201
+ raise QualityGateError(
202
+ f"Failed to parse review agent response as JSON: {e}\n"
203
+ f"Response was: {response_text[:500]}..."
204
+ ) from e
205
+ except (ValueError, KeyError) as e:
206
+ raise QualityGateError(
207
+ f"Failed to extract quality gate result: {e}\n"
208
+ f"Response was: {response_text[:500]}..."
209
+ ) from e
210
+
211
+ async def evaluate(
212
+ self,
213
+ quality_criteria: list[str],
214
+ outputs: list[str],
215
+ project_root: Path,
216
+ ) -> QualityGateResult:
217
+ """Evaluate step outputs against quality criteria.
218
+
219
+ Args:
220
+ quality_criteria: List of quality criteria to evaluate
221
+ outputs: List of output file paths
222
+ project_root: Project root path
223
+
224
+ Returns:
225
+ QualityGateResult with pass/fail and feedback
226
+
227
+ Raises:
228
+ QualityGateError: If evaluation fails
229
+ """
230
+ if not quality_criteria:
231
+ # No criteria = auto-pass
232
+ return QualityGateResult(
233
+ passed=True,
234
+ feedback="No quality criteria defined - auto-passing",
235
+ criteria_results=[],
236
+ )
237
+
238
+ # Build system instructions and payload separately
239
+ instructions = self._build_instructions(quality_criteria)
240
+ payload = await self._build_payload(outputs, project_root)
241
+
242
+ # Build command with proper flag ordering for Claude CLI
243
+ # See doc/reference/calling_claude_in_print_mode.md for details
244
+ #
245
+ # Key insight: flags must come BEFORE `-p --` because:
246
+ # - `-p` expects a prompt argument immediately after
247
+ # - `--` marks the end of flags, everything after is the prompt
248
+ # - When piping via stdin, we use `-p --` to read from stdin
249
+ if self._test_command:
250
+ # Testing mode: use provided command, add system prompt only
251
+ full_cmd = self._test_command + ["--system-prompt", instructions]
252
+ else:
253
+ # Production mode: use Claude CLI with proper flags
254
+ schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
255
+ full_cmd = [
256
+ "claude",
257
+ "--print", # Non-interactive mode
258
+ "--output-format",
259
+ "json", # JSON output wrapper
260
+ "--system-prompt",
261
+ instructions,
262
+ "--json-schema",
263
+ schema_json, # Structured output - result in 'structured_output' field
264
+ "-p",
265
+ "--", # Read prompt from stdin
266
+ ]
267
+
268
+ try:
269
+ # Run review agent with payload piped via stdin
270
+ process = await asyncio.create_subprocess_exec(
271
+ *full_cmd,
272
+ stdin=asyncio.subprocess.PIPE,
273
+ stdout=asyncio.subprocess.PIPE,
274
+ stderr=asyncio.subprocess.PIPE,
275
+ cwd=str(project_root),
276
+ )
277
+
278
+ try:
279
+ stdout, stderr = await asyncio.wait_for(
280
+ process.communicate(input=payload.encode()),
281
+ timeout=self.timeout,
282
+ )
283
+ except TimeoutError:
284
+ process.kill()
285
+ await process.wait()
286
+ raise QualityGateError(
287
+ f"Review agent timed out after {self.timeout} seconds"
288
+ ) from None
289
+
290
+ if process.returncode != 0:
291
+ raise QualityGateError(
292
+ f"Review agent failed with exit code {process.returncode}:\n"
293
+ f"stderr: {stderr.decode()}"
294
+ )
295
+
296
+ return self._parse_response(stdout.decode())
297
+
298
+ except FileNotFoundError as e:
299
+ raise QualityGateError("Review agent command not found: claude") from e
300
+
301
+
302
+ class MockQualityGate(QualityGate):
303
+ """Mock quality gate for testing.
304
+
305
+ Always passes unless configured otherwise.
306
+ """
307
+
308
+ def __init__(self, should_pass: bool = True, feedback: str = "Mock evaluation"):
309
+ """Initialize mock quality gate.
310
+
311
+ Args:
312
+ should_pass: Whether evaluations should pass
313
+ feedback: Feedback message to return
314
+ """
315
+ super().__init__()
316
+ self.should_pass = should_pass
317
+ self.feedback = feedback
318
+ self.evaluations: list[dict[str, Any]] = []
319
+
320
+ async def evaluate(
321
+ self,
322
+ quality_criteria: list[str],
323
+ outputs: list[str],
324
+ project_root: Path,
325
+ ) -> QualityGateResult:
326
+ """Mock evaluation - records call and returns configured result."""
327
+ self.evaluations.append(
328
+ {
329
+ "quality_criteria": quality_criteria,
330
+ "outputs": outputs,
331
+ }
332
+ )
333
+
334
+ criteria_results = [
335
+ QualityCriteriaResult(
336
+ criterion=c,
337
+ passed=self.should_pass,
338
+ feedback=None if self.should_pass else self.feedback,
339
+ )
340
+ for c in quality_criteria
341
+ ]
342
+
343
+ return QualityGateResult(
344
+ passed=self.should_pass,
345
+ feedback=self.feedback,
346
+ criteria_results=criteria_results,
347
+ )
@@ -0,0 +1,263 @@
1
+ """Pydantic models for MCP tool inputs and outputs.
2
+
3
+ IMPORTANT: If you modify any models in this file that affect the MCP tool
4
+ interfaces (input models, output models, or their fields), you MUST also
5
+ update the documentation in doc/mcp_interface.md to keep it in sync with
6
+ the implementation.
7
+ """
8
+
9
+ from enum import Enum
10
+ from typing import Any
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+ # =============================================================================
15
+ # Enums
16
+ # =============================================================================
17
+
18
+
19
+ class StepStatus(str, Enum):
20
+ """Status returned from finished_step."""
21
+
22
+ NEEDS_WORK = "needs_work"
23
+ NEXT_STEP = "next_step"
24
+ WORKFLOW_COMPLETE = "workflow_complete"
25
+
26
+
27
+ # =============================================================================
28
+ # Workflow Info Models
29
+ # NOTE: These models are returned by get_workflows tool.
30
+ # Update doc/mcp_interface.md when modifying.
31
+ # =============================================================================
32
+
33
+
34
+ class StepInfo(BaseModel):
35
+ """Information about a single step."""
36
+
37
+ id: str = Field(description="Step identifier")
38
+ name: str = Field(description="Human-readable step name")
39
+ description: str = Field(description="What the step does")
40
+ dependencies: list[str] = Field(default_factory=list, description="Required prior steps")
41
+
42
+
43
+ class ConcurrentStepGroup(BaseModel):
44
+ """A group of steps that can be executed concurrently."""
45
+
46
+ step_ids: list[str] = Field(description="Steps that run in parallel")
47
+ is_concurrent: bool = Field(default=True)
48
+
49
+
50
+ class WorkflowStepEntryInfo(BaseModel):
51
+ """Information about a workflow step entry (sequential or concurrent)."""
52
+
53
+ step_ids: list[str] = Field(description="Step ID(s) in this entry")
54
+ is_concurrent: bool = Field(default=False, description="True if steps run in parallel")
55
+
56
+
57
+ class WorkflowInfo(BaseModel):
58
+ """Information about a workflow."""
59
+
60
+ name: str = Field(description="Workflow identifier")
61
+ summary: str = Field(description="Short description of workflow")
62
+
63
+
64
+ class JobInfo(BaseModel):
65
+ """Information about a job and its workflows."""
66
+
67
+ name: str = Field(description="Job identifier")
68
+ summary: str = Field(description="Short summary of the job")
69
+ description: str | None = Field(default=None, description="Full description")
70
+ workflows: list[WorkflowInfo] = Field(default_factory=list)
71
+
72
+
73
+ # =============================================================================
74
+ # Tool Input Models
75
+ # NOTE: Changes to these models affect MCP tool parameters.
76
+ # Update doc/mcp_interface.md when modifying.
77
+ # =============================================================================
78
+
79
+
80
+ class StartWorkflowInput(BaseModel):
81
+ """Input for start_workflow tool."""
82
+
83
+ goal: str = Field(description="What the user wants to accomplish")
84
+ job_name: str = Field(description="Name of the job")
85
+ workflow_name: str = Field(description="Name of the workflow within the job")
86
+ instance_id: str | None = Field(
87
+ default=None,
88
+ description="Optional identifier (e.g., 'acme', 'q1-2026')",
89
+ )
90
+
91
+
92
+ class FinishedStepInput(BaseModel):
93
+ """Input for finished_step tool."""
94
+
95
+ outputs: list[str] = Field(description="List of output file paths created")
96
+ notes: str | None = Field(default=None, description="Optional notes about work done")
97
+ quality_review_override_reason: str | None = Field(
98
+ default=None,
99
+ description="If provided, skips the quality gate review. Must explain why the review is being bypassed.",
100
+ )
101
+
102
+
103
+ class AbortWorkflowInput(BaseModel):
104
+ """Input for abort_workflow tool."""
105
+
106
+ explanation: str = Field(description="Explanation of why the workflow is being aborted")
107
+
108
+
109
+ # =============================================================================
110
+ # Quality Gate Models
111
+ # =============================================================================
112
+
113
+
114
+ class QualityCriteriaResult(BaseModel):
115
+ """Result for a single quality criterion."""
116
+
117
+ criterion: str = Field(description="The quality criterion text")
118
+ passed: bool = Field(description="Whether this criterion passed")
119
+ feedback: str | None = Field(default=None, description="Feedback if failed")
120
+
121
+
122
+ class QualityGateResult(BaseModel):
123
+ """Result from quality gate evaluation."""
124
+
125
+ passed: bool = Field(description="Overall pass/fail")
126
+ feedback: str = Field(description="Summary feedback")
127
+ criteria_results: list[QualityCriteriaResult] = Field(
128
+ default_factory=list, description="Per-criterion results"
129
+ )
130
+
131
+
132
+ # =============================================================================
133
+ # Tool Output Models
134
+ # NOTE: Changes to these models affect MCP tool return types.
135
+ # Update doc/mcp_interface.md when modifying.
136
+ # =============================================================================
137
+
138
+
139
+ class ActiveStepInfo(BaseModel):
140
+ """Information about the step to begin working on."""
141
+
142
+ session_id: str = Field(description="Unique session identifier")
143
+ branch_name: str = Field(description="Git branch for this workflow instance")
144
+ step_id: str = Field(description="ID of the current step")
145
+ step_expected_outputs: list[str] = Field(description="Expected output files for this step")
146
+ step_quality_criteria: list[str] = Field(
147
+ default_factory=list, description="Criteria for step completion"
148
+ )
149
+ step_instructions: str = Field(description="Instructions for the step")
150
+
151
+
152
+ class GetWorkflowsResponse(BaseModel):
153
+ """Response from get_workflows tool."""
154
+
155
+ jobs: list[JobInfo] = Field(description="List of all jobs with their workflows")
156
+
157
+
158
+ class StackEntry(BaseModel):
159
+ """An entry in the workflow stack."""
160
+
161
+ workflow: str = Field(description="Workflow identifier (job_name/workflow_name)")
162
+ step: str = Field(description="Current step ID in this workflow")
163
+
164
+
165
+ class StartWorkflowResponse(BaseModel):
166
+ """Response from start_workflow tool."""
167
+
168
+ begin_step: ActiveStepInfo = Field(description="Information about the first step to begin")
169
+ stack: list[StackEntry] = Field(
170
+ default_factory=list, description="Current workflow stack after starting"
171
+ )
172
+
173
+
174
+ class FinishedStepResponse(BaseModel):
175
+ """Response from finished_step tool."""
176
+
177
+ status: StepStatus = Field(description="Result status")
178
+
179
+ # For needs_work status
180
+ feedback: str | None = Field(default=None, description="Feedback from quality gate")
181
+ failed_criteria: list[QualityCriteriaResult] | None = Field(
182
+ default=None, description="Failed quality criteria"
183
+ )
184
+
185
+ # For next_step status
186
+ begin_step: ActiveStepInfo | None = Field(
187
+ default=None, description="Information about the next step to begin"
188
+ )
189
+
190
+ # For workflow_complete status
191
+ summary: str | None = Field(default=None, description="Summary of completed workflow")
192
+ all_outputs: list[str] | None = Field(default=None, description="All outputs from all steps")
193
+
194
+ # Stack info (included in all responses)
195
+ stack: list[StackEntry] = Field(
196
+ default_factory=list, description="Current workflow stack after this operation"
197
+ )
198
+
199
+
200
+ class AbortWorkflowResponse(BaseModel):
201
+ """Response from abort_workflow tool."""
202
+
203
+ aborted_workflow: str = Field(
204
+ description="The workflow that was aborted (job_name/workflow_name)"
205
+ )
206
+ aborted_step: str = Field(description="The step that was active when aborted")
207
+ explanation: str = Field(description="The explanation provided for aborting")
208
+ stack: list[StackEntry] = Field(
209
+ default_factory=list, description="Current workflow stack after abort"
210
+ )
211
+ resumed_workflow: str | None = Field(
212
+ default=None, description="The workflow now active (if any)"
213
+ )
214
+ resumed_step: str | None = Field(default=None, description="The step now active (if any)")
215
+
216
+
217
+ # =============================================================================
218
+ # Session State Models
219
+ # =============================================================================
220
+
221
+
222
+ class StepProgress(BaseModel):
223
+ """Progress for a single step in a workflow."""
224
+
225
+ step_id: str = Field(description="Step identifier")
226
+ started_at: str | None = Field(default=None, description="ISO timestamp when started")
227
+ completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
228
+ outputs: list[str] = Field(default_factory=list, description="Output files created")
229
+ notes: str | None = Field(default=None, description="Notes from agent")
230
+ quality_attempts: int = Field(default=0, description="Number of quality gate attempts")
231
+
232
+
233
+ class WorkflowSession(BaseModel):
234
+ """State for an active workflow session."""
235
+
236
+ session_id: str = Field(description="Unique session identifier")
237
+ job_name: str = Field(description="Name of the job")
238
+ workflow_name: str = Field(description="Name of the workflow")
239
+ instance_id: str | None = Field(default=None, description="Instance identifier")
240
+ goal: str = Field(description="User's goal for this workflow")
241
+ branch_name: str = Field(description="Git branch name")
242
+ current_step_id: str = Field(description="Current step in workflow")
243
+ current_entry_index: int = Field(
244
+ default=0, description="Index of current entry in step_entries"
245
+ )
246
+ step_progress: dict[str, StepProgress] = Field(
247
+ default_factory=dict, description="Progress for each step"
248
+ )
249
+ started_at: str = Field(description="ISO timestamp when session started")
250
+ completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
251
+ status: str = Field(default="active", description="Session status: active, completed, aborted")
252
+ abort_reason: str | None = Field(
253
+ default=None, description="Explanation if workflow was aborted"
254
+ )
255
+
256
+ def to_dict(self) -> dict[str, Any]:
257
+ """Convert to dictionary for JSON serialization."""
258
+ return self.model_dump()
259
+
260
+ @classmethod
261
+ def from_dict(cls, data: dict[str, Any]) -> "WorkflowSession":
262
+ """Create from dictionary."""
263
+ return cls.model_validate(data)