@nestbox-ai/cli 1.0.63 → 1.0.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,905 @@
1
+ "$schema": https://json-schema.org/draft/2020-12/schema
2
+ "$id": https://github.com/nestbox-ai/report-generator/report-config.schema.yaml
3
+ title: GraphRAG Report Configuration
4
+ description: >-
5
+ Schema v2.2 for YAML-driven report generation using GraphRAG and LlamaIndex
6
+ ReAct Agent. Adds template pipes and document repository downloads.
7
+ type: object
8
+ required: [schema_version, report, docsets, llamaindex, computations, template]
9
+ additionalProperties: false
10
+
11
+ properties:
12
+ schema_version:
13
+ type: string
14
+ pattern: "^\\d+\\.\\d+(\\.\\d+)?$"
15
+ description: Semantic version of the config schema. Use '2.2' for current version.
16
+ examples: ["2.2", "2.2.0"]
17
+
18
+ report:
19
+ type: object
20
+ description: Report metadata - identifies and describes the report
21
+ required: [id, name]
22
+ additionalProperties: false
23
+ properties:
24
+ id:
25
+ type: string
26
+ minLength: 1
27
+ pattern: "^[a-z0-9][a-z0-9_-]*[a-z0-9]$|^[a-z0-9]$"
28
+ description: Unique identifier for the report (lowercase, underscores/hyphens allowed)
29
+ examples: [shipwell_cfo_kpi_pack_25q4, quarterly_board_report]
30
+ name:
31
+ type: string
32
+ minLength: 1
33
+ description: Human-readable report name displayed in outputs
34
+ examples: ["Shipwell CFO KPI Pack", "Q4 2025 Board Report"]
35
+ description:
36
+ type: string
37
+ description: Detailed description of the report purpose and scope
38
+ examples: ["Quarterly financial KPI extraction from board meeting presentations."]
39
+ version:
40
+ type: string
41
+ description: Report version, typically tied to reporting period
42
+ examples: ["2025.Q4", v1.0]
43
+
44
+ context:
45
+ type: object
46
+ description: >-
47
+ Runtime context variables, policies, and system integration settings.
48
+ Variables can be referenced in prompts as {{context.variable_name}}
49
+ additionalProperties: true
50
+ properties:
51
+ company_name:
52
+ type: string
53
+ description: Company name for prompt substitution
54
+ examples: ["Shipwell, Inc."]
55
+ currency:
56
+ type: string
57
+ default: USD
58
+ description: Default currency for financial values
59
+ examples: [USD, EUR]
60
+ source_deck:
61
+ type: object
62
+ description: Primary source document metadata
63
+ additionalProperties: false
64
+ properties:
65
+ document_id:
66
+ type: string
67
+ description: Reference to document ID in docsets
68
+ title:
69
+ type: string
70
+ description: Human-readable title of the source
71
+ units_policy:
72
+ type: string
73
+ description: Policy for handling units and normalization
74
+ examples: ["Normalize all currency values to USD. Express large values in millions with 'M' suffix."]
75
+ answer_quality_policy:
76
+ type: object
77
+ description: Quality requirements for extracted answers
78
+ additionalProperties: false
79
+ properties:
80
+ numeric_requirements:
81
+ type: array
82
+ items: { type: string }
83
+ description: Requirements for numeric value extraction
84
+ labeling_requirements:
85
+ type: array
86
+ items: { type: string }
87
+ description: Requirements for labeling/citation
88
+ system_of_record_policy:
89
+ type: object
90
+ description: Policy for MCP system of record integration
91
+ additionalProperties: false
92
+ properties:
93
+ kpi_namespace:
94
+ type: string
95
+ description: Namespace for KPI storage
96
+ examples: [shipwell.board.25q4]
97
+ upsert_granularity:
98
+ type: string
99
+ enum: [metric_by_period, bulk]
100
+ description: Granularity of upsert operations
101
+ idempotency_key_template:
102
+ type: string
103
+ description: Template for generating idempotency keys
104
+ examples: ["{{namespace}}:{{metric_id}}:{{period}}"]
105
+ value_types:
106
+ type: array
107
+ items: { type: string }
108
+ description: Allowed value type labels
109
+ examples: [[currency, percentage, count, ratio]]
110
+
111
+ prompts:
112
+ type: object
113
+ description: >-
114
+ Custom named prompts that can be referenced throughout the config.
115
+ Define prompts here and reference them by name in llamaindex settings
116
+ or computation prompts.
117
+ additionalProperties:
118
+ type: string
119
+ minLength: 1
120
+ description: "Prompt text. Can contain {{variable}} placeholders."
121
+ examples:
122
+ - my_system_prompt: "You are a financial analyst extracting KPIs..."
123
+ custom_synthesis: "Synthesize the following results into a structured output..."
124
+ strict_citation_rules: "Every value MUST have a citation. Never leave citations empty..."
125
+
126
+ storage:
127
+ type: object
128
+ description: >-
129
+ Storage configuration for resolving document_id references.
130
+ Required when using document_id in docs instead of locator.
131
+ additionalProperties: false
132
+ required: [base_path]
133
+ properties:
134
+ base_path:
135
+ type: string
136
+ minLength: 1
137
+ description: Base path for resolving document_id references (relative to config file or absolute)
138
+ examples: [./documents, ../documents, /data/graphrag]
139
+ graphrag_subpath:
140
+ type: string
141
+ default: graphrag/output
142
+ description: Subpath within each document folder to the GraphRAG output
143
+ examples: [graphrag/output, output]
144
+
145
+ doc_repository:
146
+ type: object
147
+ description: >-
148
+ Document repository for automatic artifact downloads. When configured,
149
+ documents not found locally will be downloaded from the API and cached.
150
+ additionalProperties: false
151
+ required: [api_base_url, api_key]
152
+ properties:
153
+ api_base_url:
154
+ type: string
155
+ minLength: 1
156
+ description: Base URL of the document repository API
157
+ examples: ["http://130.211.209.186", "${DOC_REPO_URL}"]
158
+ api_key:
159
+ type: string
160
+ minLength: 1
161
+ description: "API key for authentication (supports ${ENV_VAR})"
162
+ examples: ["${DOC_REPO_API_KEY}"]
163
+ rotation:
164
+ type: integer
165
+ minimum: 1
166
+ default: 200
167
+ description: Maximum number of cached documents (FIFO eviction)
168
+
169
+ mcp:
170
+ type: array
171
+ description: Model Context Protocol (MCP) server endpoints for external system integration
172
+ items: { "$ref": "#/$defs/mcpEndpoint" }
173
+
174
+ docsets:
175
+ type: array
176
+ description: Named collections of documents for GraphRAG querying. Each docset groups related documents.
177
+ minItems: 1
178
+ items: { "$ref": "#/$defs/docset" }
179
+
180
+ llamaindex:
181
+ type: object
182
+ description: LlamaIndex ReAct agent configuration - single config for all agent operations
183
+ required: [model, api_key]
184
+ additionalProperties: false
185
+ properties:
186
+ model:
187
+ type: string
188
+ minLength: 1
189
+ description: Model identifier for agent operations
190
+ examples: [gpt-4o, gpt-4.1-mini, claude-3-5-sonnet]
191
+ base_url:
192
+ type: string
193
+ description: "Custom base URL for API calls (supports ${ENV_VAR})"
194
+ examples: ["${OPENAI_BASE_URL:-https://api.openai.com/v1}", "https://api.openai.com/v1"]
195
+ api_key:
196
+ type: string
197
+ minLength: 1
198
+ description: "API key (supports ${ENV_VAR} substitution)"
199
+ examples: ["${OPENAI_API_KEY}", "${LLAMAINDEX_API_KEY}"]
200
+ max_tool_calls:
201
+ type: integer
202
+ minimum: 1
203
+ maximum: 100
204
+ default: 20
205
+ description: Maximum tool calls per agent execution
206
+ examples: [18, 20]
207
+ tool_timeout_seconds:
208
+ type: integer
209
+ minimum: 1
210
+ maximum: 600
211
+ default: 120
212
+ description: Timeout for individual tool calls in seconds
213
+ examples: [120, 180]
214
+ system_prompt:
215
+ type: string
216
+ description: >-
217
+ Main system prompt for the ReAct agent. Defines the agent's role and behavior.
218
+ When omitted, uses default CFO-grade KPI extraction prompt.
219
+ examples: ["You are a CFO-grade KPI extraction analyst..."]
220
+ autonomous_search_guidance:
221
+ type: string
222
+ description: >-
223
+ Search strategy guidance for autonomous mode. Explains when to use
224
+ each search type (basic, local, global, drift). When omitted, uses default guidance.
225
+ examples: ["You have access to multiple GraphRAG search tools. Use basic_search for exact text values..."]
226
+ synthesis_prompt:
227
+ type: string
228
+ description: >-
229
+ Prompt template for synthesizing multiple subtask results into a final value.
230
+ Use {{subtask_results}} placeholder for subtask outputs and {{output_schema}}
231
+ for the expected structure.
232
+ examples: ["Synthesize these search results into a single structured response matching the schema..."]
233
+ validation_repair_prompt:
234
+ type: string
235
+ description: >-
236
+ Prompt template for repairing outputs that fail schema validation.
237
+ Use {{raw_response}}, {{validation_errors}}, and {{output_schema}} placeholders.
238
+ examples: ["The following response failed validation. Fix the errors and return valid JSON..."]
239
+ mcp_system_prompt:
240
+ type: string
241
+ description: System prompt for MCP integration agent. If not set, uses default MCP prompt.
242
+ guardrail_system_prompt:
243
+ type: string
244
+ description: Default system prompt for guardrail LLM-judge calls. Can be overridden per-guardrail.
245
+ json_extraction_prompt:
246
+ type: string
247
+ description: Prompt for LLM fallback when converting non-JSON agent responses to JSON.
248
+ max_agent_iterations:
249
+ type: integer
250
+ minimum: 1
251
+ maximum: 100
252
+ default: 30
253
+ description: Maximum ReAct agent iterations before stopping.
254
+ max_repair_attempts:
255
+ type: integer
256
+ minimum: 1
257
+ maximum: 10
258
+ default: 2
259
+ description: Maximum schema validation repair attempts before failing.
260
+
261
+ computations:
262
+ type: object
263
+ description: Defines what values to extract. Fields produce single values, tables produce rows.
264
+ additionalProperties: false
265
+ properties:
266
+ fields:
267
+ type: array
268
+ description: "Single-value computations (e.g., Total ARR, Gross Margin)"
269
+ items: { "$ref": "#/$defs/fieldComputation" }
270
+ tables:
271
+ type: array
272
+ description: Tabular computations that produce multiple rows
273
+ items: { "$ref": "#/$defs/tableComputation" }
274
+
275
+ template:
276
+ type: object
277
+ description: "Output template configuration. Uses placeholders like {{field.id.property}} and {{table.id}}"
278
+ required: [content]
279
+ additionalProperties: false
280
+ properties:
281
+ format:
282
+ type: string
283
+ enum: [markdown]
284
+ description: Output format (currently only markdown supported)
285
+ sections:
286
+ type: object
287
+ description: Reusable template sections
288
+ additionalProperties: { type: string }
289
+ content:
290
+ type: string
291
+ minLength: 1
292
+ description: Main template content with placeholders for computed values
293
+
294
+ guardrails:
295
+ type: array
296
+ description: LLM-judge validation checks to run on computed outputs
297
+ items: { "$ref": "#/$defs/guardrail" }
298
+
299
+ execution:
300
+ "$ref": "#/$defs/executionConfig"
301
+ description: Execution settings for retries and output configuration
302
+
303
+ "$defs":
304
+ document:
305
+ type: object
306
+ description: A reference to a document with GraphRAG output
307
+ required: [id]
308
+ additionalProperties: false
309
+ properties:
310
+ id:
311
+ type: string
312
+ minLength: 1
313
+ description: Unique document identifier
314
+ examples: [board_deck_pdf, financial_statements]
315
+ locator:
316
+ type: string
317
+ description: >-
318
+ Document location. Can be: a filesystem path to GraphRAG output,
319
+ 'repo:doc-ID' to download from doc_repository, or a bare 'doc-ID'
320
+ to check local cache then download.
321
+ examples: [/data/graphrag/board_deck/output, "repo:doc-6b3c6cdf", doc-a850ad6f]
322
+ document_id:
323
+ type: string
324
+ description: "Legacy: Document folder ID under storage.base_path (deprecated, use locator)"
325
+ examples: [doc-a850ad6f]
326
+ description:
327
+ type: string
328
+ description: Human-readable description
329
+ examples: ["Q4 2025 Board Meeting Presentation"]
330
+ alias:
331
+ type: string
332
+ description: Short alias for referencing in prompts
333
+ examples: [q4_deck]
334
+
335
+ docset:
336
+ type: object
337
+ description: A named collection of documents that can be searched together
338
+ required: [id, docs]
339
+ additionalProperties: false
340
+ properties:
341
+ id:
342
+ type: string
343
+ minLength: 1
344
+ description: Unique docset identifier
345
+ examples: [shipwell_board_deck_25q4]
346
+ description:
347
+ type: string
348
+ description: Description of what this docset represents
349
+ api_key:
350
+ type: string
351
+ description: >-
352
+ API key for OpenAI used by GraphRAG search operations on this docset.
353
+ Supports environment variable substitution (e.g., ${OPENAI_API_KEY}).
354
+ If not set, falls back to the OPENAI_API_KEY environment variable.
355
+ examples: ["${OPENAI_API_KEY}"]
356
+ docs:
357
+ type: array
358
+ minItems: 1
359
+ items: { "$ref": "#/$defs/document" }
360
+ description: List of documents in this docset
361
+
362
+ mcpEndpoint:
363
+ type: object
364
+ description: Configuration for an MCP server endpoint
365
+ required: [id, url]
366
+ additionalProperties: false
367
+ properties:
368
+ id:
369
+ type: string
370
+ minLength: 1
371
+ description: Unique identifier for this MCP endpoint
372
+ examples: [system_of_record]
373
+ type:
374
+ type: string
375
+ enum: [http, streamable-http, sse, stdio]
376
+ default: http
377
+ description: MCP transport type
378
+ examples: [streamable-http]
379
+ url:
380
+ type: string
381
+ minLength: 1
382
+ description: "MCP server URL (supports ${ENV_VAR})"
383
+ examples: ["${SOR_MCP_URL}"]
384
+ headers:
385
+ type: object
386
+ description: HTTP headers for authentication
387
+ additionalProperties: { type: string }
388
+ examples: [{ Authorization: "Bearer ${SOR_MCP_TOKEN}" }]
389
+ timeout_seconds:
390
+ type: integer
391
+ minimum: 1
392
+ maximum: 300
393
+ default: 30
394
+ description: Request timeout in seconds
395
+ description:
396
+ type: string
397
+ description: "Description of this MCP endpoint's purpose"
398
+
399
+ basicSearchOptions:
400
+ type: object
401
+ description: Options for basic_search (embedding-based text retrieval)
402
+ additionalProperties: false
403
+ properties:
404
+ chat_model_id:
405
+ type: string
406
+ description: Model ID for chat completion
407
+ examples: [gpt-4o]
408
+ embedding_model_id:
409
+ type: string
410
+ default: text-embedding-3-large
411
+ description: Embedding model for semantic search
412
+ examples: [text-embedding-3-large, text-embedding-3-small]
413
+ prompt:
414
+ type: [string, "null"]
415
+ description: Custom prompt for search context
416
+ k:
417
+ type: [integer, "null"]
418
+ minimum: 1
419
+ maximum: 100
420
+ default: 10
421
+ description: Number of results to retrieve
422
+
423
+ localSearchOptions:
424
+ type: object
425
+ description: Options for local_search (entity/relationship focused)
426
+ additionalProperties: false
427
+ properties:
428
+ chat_model_id:
429
+ type: string
430
+ description: Model ID for chat completion
431
+ embedding_model_id:
432
+ type: string
433
+ description: Embedding model for entity search
434
+ prompt:
435
+ type: [string, "null"]
436
+ description: Custom prompt for search
437
+ text_unit_prop:
438
+ type: number
439
+ minimum: 0
440
+ maximum: 1
441
+ description: Proportion of context for text units
442
+ community_prop:
443
+ type: number
444
+ minimum: 0
445
+ maximum: 1
446
+ description: Proportion of context for community summaries
447
+ conversation_history_max_turns:
448
+ type: integer
449
+ minimum: 0
450
+ description: Max conversation history turns
451
+ top_k_entities:
452
+ type: integer
453
+ minimum: 1
454
+ default: 10
455
+ description: Number of top entities to retrieve
456
+ top_k_relationships:
457
+ type: integer
458
+ minimum: 1
459
+ default: 10
460
+ description: Number of top relationships to retrieve
461
+ max_context_tokens:
462
+ type: integer
463
+ minimum: 1
464
+ description: Maximum tokens for context
465
+
466
+ globalSearchOptions:
467
+ type: object
468
+ description: Options for global_search (community-level summaries)
469
+ additionalProperties: false
470
+ properties:
471
+ chat_model_id:
472
+ type: string
473
+ description: Model ID for chat completion
474
+ examples: [gpt-4.1-mini]
475
+ knowledge_prompt:
476
+ type: [string, "null"]
477
+ description: Knowledge context prompt
478
+ max_context_tokens:
479
+ type: integer
480
+ minimum: 1
481
+ default: 9000
482
+ description: Maximum tokens for context
483
+ data_max_tokens:
484
+ type: integer
485
+ minimum: 1
486
+ default: 4500
487
+ description: Maximum tokens for data
488
+ map_max_length:
489
+ type: integer
490
+ minimum: 1
491
+ default: 1800
492
+ description: Maximum length for map phase output
493
+ reduce_max_length:
494
+ type: integer
495
+ minimum: 1
496
+ default: 1600
497
+ description: Maximum length for reduce phase output
498
+ dynamic_search_threshold:
499
+ type: integer
500
+ minimum: 0
501
+ default: 2
502
+ description: Threshold for dynamic community search
503
+ dynamic_search_keep_parent:
504
+ type: boolean
505
+ default: true
506
+ description: Keep parent community in dynamic search
507
+ dynamic_search_num_repeats:
508
+ type: integer
509
+ minimum: 1
510
+ default: 1
511
+ description: Number of dynamic search repeats
512
+ dynamic_search_use_summary:
513
+ type: boolean
514
+ default: true
515
+ description: Use community summaries in dynamic search
516
+ dynamic_search_max_level:
517
+ type: integer
518
+ minimum: 0
519
+ default: 2
520
+ description: Maximum level for dynamic search
521
+
522
+ driftSearchOptions:
523
+ type: object
524
+ description: Options for drift_search (multi-hop exploration)
525
+ additionalProperties: false
526
+ properties:
527
+ chat_model_id:
528
+ type: string
529
+ description: Model ID for chat completion
530
+ embedding_model_id:
531
+ type: string
532
+ description: Embedding model for search
533
+ prompt:
534
+ type: [string, "null"]
535
+ description: Custom prompt for search
536
+ reduce_prompt:
537
+ type: [string, "null"]
538
+ description: Custom prompt for reduce phase
539
+ data_max_tokens:
540
+ type: integer
541
+ minimum: 1
542
+ description: Maximum tokens for data
543
+ concurrency:
544
+ type: integer
545
+ minimum: 1
546
+ description: Concurrent search operations
547
+ drift_k_followups:
548
+ type: integer
549
+ minimum: 1
550
+ description: Number of follow-up queries per hop
551
+ primer_folds:
552
+ type: integer
553
+ minimum: 1
554
+ description: Number of primer folds
555
+ primer_llm_max_tokens:
556
+ type: integer
557
+ minimum: 1
558
+ description: Max tokens for primer LLM
559
+ n_depth:
560
+ type: integer
561
+ minimum: 1
562
+ maximum: 5
563
+ default: 2
564
+ description: Maximum exploration depth (hops)
565
+ local_search_text_unit_prop:
566
+ type: number
567
+ minimum: 0
568
+ maximum: 1
569
+ description: Text unit proportion for local search
570
+ local_search_community_prop:
571
+ type: number
572
+ minimum: 0
573
+ maximum: 1
574
+ description: Community proportion for local search
575
+ local_search_top_k_mapped_entities:
576
+ type: integer
577
+ minimum: 1
578
+ description: Top K mapped entities for local search
579
+ local_search_top_k_relationships:
580
+ type: integer
581
+ minimum: 1
582
+ description: Top K relationships for local search
583
+ local_search_max_data_tokens:
584
+ type: integer
585
+ minimum: 1
586
+ description: Max data tokens for local search
587
+ local_search_temperature:
588
+ type: number
589
+ minimum: 0
590
+ maximum: 2
591
+ description: Temperature for local search
592
+ local_search_top_p:
593
+ type: number
594
+ minimum: 0
595
+ maximum: 1
596
+ description: Top P for local search
597
+ local_search_n:
598
+ type: integer
599
+ minimum: 1
600
+ description: N for local search
601
+
602
+ searchOptions:
603
+ type: object
604
+ description: Search options nested under the search type key
605
+ additionalProperties: false
606
+ properties:
607
+ basic_search: { "$ref": "#/$defs/basicSearchOptions" }
608
+ local_search: { "$ref": "#/$defs/localSearchOptions" }
609
+ global_search: { "$ref": "#/$defs/globalSearchOptions" }
610
+ drift_search: { "$ref": "#/$defs/driftSearchOptions" }
611
+ oneOf:
612
+ - required: [basic_search]
613
+ - required: [local_search]
614
+ - required: [global_search]
615
+ - required: [drift_search]
616
+
617
+ docsetSubtask:
618
+ type: object
619
+ description: >-
620
+ A subtask that searches a docset and produces intermediate output.
621
+ When search_type is omitted, the agent autonomously decides which
622
+ search methods to use.
623
+ required: [id, prompt, output_schema]
624
+ additionalProperties: false
625
+ properties:
626
+ id:
627
+ type: string
628
+ minLength: 1
629
+ description: Unique subtask identifier
630
+ examples: [arr_retention_extract, kpi_arr_retention]
631
+ docset_id:
632
+ type: string
633
+ minLength: 1
634
+ description: >-
635
+ Bind this subtask to a specific top-level docset. Overrides
636
+ computation-level docset_id. When set, search tools are pre-scoped.
637
+ examples: [shipwell_board_deck_25q4]
638
+ search_type:
639
+ type: string
640
+ enum: [basic, local, global, drift]
641
+ description: >-
642
+ Type of GraphRAG search. OPTIONAL - when omitted, agent autonomously
643
+ chooses search strategy with access to ALL search tools (basic, local,
644
+ global, drift).
645
+ examples: [global, basic]
646
+ options:
647
+ "$ref": "#/$defs/searchOptions"
648
+ description: Search options nested under the search type key. Optional when using autonomous mode (no search_type).
649
+ prompt:
650
+ type: string
651
+ minLength: 1
652
+ description: Subtask-specific extraction prompt
653
+ examples: ["From the ARR & Logo Waterfall slides, extract..."]
654
+ output_schema:
655
+ type: object
656
+ description: "JSON Schema for this subtask's output"
657
+ required: [type]
658
+ properties:
659
+ type: { type: string }
660
+ properties: { type: object }
661
+ required:
662
+ type: array
663
+ items: { type: string }
664
+
665
+ mcpSubtask:
666
+ type: object
667
+ description: MCP integration subtask with per-MCP prompt
668
+ required: [id, prompt]
669
+ additionalProperties: false
670
+ properties:
671
+ id:
672
+ type: string
673
+ minLength: 1
674
+ description: MCP endpoint ID (must match an id in the mcp array)
675
+ examples: [system_of_record]
676
+ prompt:
677
+ type: string
678
+ minLength: 1
679
+ description: Prompt describing what to do with this MCP endpoint
680
+ examples: ["Upsert the KPI rows to the system of record..."]
681
+
682
+ fieldComputation:
683
+ type: object
684
+ description: A computation that produces a single structured value
685
+ required: [id, label, prompt]
686
+ additionalProperties: false
687
+ properties:
688
+ id:
689
+ type: string
690
+ minLength: 1
691
+ pattern: "^[a-z][a-z0-9_]*$"
692
+ description: "Unique computation ID (used in template as {{field.id}})"
693
+ examples: [arr_retention_snapshot, net_new_arr]
694
+ label:
695
+ type: string
696
+ minLength: 1
697
+ description: Human-readable label for the field
698
+ examples: ["ARR & Retention Snapshot", "Net New ARR"]
699
+ type:
700
+ type: string
701
+ default: object
702
+ description: Output type hint
703
+ examples: [object, number, string]
704
+ description:
705
+ type: string
706
+ description: Detailed description
707
+ priority:
708
+ type: integer
709
+ default: 0
710
+ description: Execution priority (lower = earlier)
711
+ depends_on:
712
+ type: array
713
+ items: { type: string, minLength: 1 }
714
+ description: IDs of computations this depends on
715
+ docset_id:
716
+ type: string
717
+ minLength: 1
718
+ description: >-
719
+ Default docset for all subtasks in this computation. Overridden by
720
+ subtask-level docset_id. When set, search tools are pre-scoped.
721
+ examples: [shipwell_board_deck_25q4]
722
+ agents:
723
+ type: array
724
+ description: Agent subtasks — each spawns a ReAct agent with its own prompt and search tools
725
+ items: { "$ref": "#/$defs/docsetSubtask" }
726
+ mcp_scope:
727
+ type: array
728
+ description: Array of MCP subtasks with per-MCP prompts
729
+ items: { "$ref": "#/$defs/mcpSubtask" }
730
+ prompt:
731
+ type: string
732
+ minLength: 1
733
+ description: Synthesis prompt - combines subtask outputs into final result
734
+ examples: ["Return 25Q4 ARR snapshot with citations."]
735
+ output_schema:
736
+ type: object
737
+ description: >-
738
+ JSON Schema for the final synthesized output. Only needed for
739
+ multi-agent computations where synthesis combines different schemas.
740
+ For single-agent computations, the agent's output_schema is used automatically.
741
+ properties:
742
+ type: { type: string }
743
+ properties: { type: object }
744
+ required:
745
+ type: array
746
+ items: { type: string }
747
+
748
+ tableComputation:
749
+ type: object
750
+ description: A computation that produces a table with multiple rows
751
+ required: [id, title, prompt]
752
+ additionalProperties: false
753
+ properties:
754
+ id:
755
+ type: string
756
+ minLength: 1
757
+ pattern: "^[a-z][a-z0-9_]*$"
758
+ description: "Unique computation ID (used in template as {{table.id}})"
759
+ examples: [cfo_kpi_export, key_deals]
760
+ title:
761
+ type: string
762
+ minLength: 1
763
+ description: Human-readable title for the table
764
+ examples: ["CFO KPI Export", "Key Customer Deals"]
765
+ description:
766
+ type: string
767
+ description: Detailed description
768
+ priority:
769
+ type: integer
770
+ default: 0
771
+ description: Execution priority (lower = earlier)
772
+ depends_on:
773
+ type: array
774
+ items: { type: string, minLength: 1 }
775
+ description: IDs of computations this depends on
776
+ docset_id:
777
+ type: string
778
+ minLength: 1
779
+ description: >-
780
+ Default docset for all subtasks in this computation. Overridden by
781
+ subtask-level docset_id. When set, search tools are pre-scoped.
782
+ examples: [shipwell_board_deck_25q4]
783
+ agents:
784
+ type: array
785
+ description: Agent subtasks — each spawns a ReAct agent with its own prompt and search tools
786
+ items: { "$ref": "#/$defs/docsetSubtask" }
787
+ mcp_scope:
788
+ type: array
789
+ description: Array of MCP subtasks
790
+ items: { "$ref": "#/$defs/mcpSubtask" }
791
+ prompt:
792
+ type: string
793
+ minLength: 1
794
+ description: Synthesis prompt - combines subtask outputs into final table
795
+ output_schema:
796
+ type: object
797
+ description: >-
798
+ JSON Schema for the final table output. Only needed for multi-agent
799
+ computations. For single-agent computations, the agent's output_schema
800
+ is used automatically.
801
+ properties:
802
+ type: { type: string }
803
+ properties: { type: object }
804
+ required:
805
+ type: array
806
+ items: { type: string }
807
+
808
+ guardrail:
809
+ type: object
810
+ description: LLM-judge validation rule for computed outputs
811
+ required: [id, target, on_fail, model, api_key, prompt]
812
+ additionalProperties: false
813
+ properties:
814
+ id:
815
+ type: string
816
+ minLength: 1
817
+ pattern: "^[a-z][a-z0-9_]*$"
818
+ description: Unique guardrail identifier
819
+ examples: [gr_no_uncited_numbers, gr_arr_math]
820
+ target:
821
+ type: string
822
+ minLength: 1
823
+ description: >-
824
+ What to validate: 'computations' (all), 'field.{id}' (single field),
825
+ 'table.{id}' (single table), 'final_report' (rendered report)
826
+ examples: [computations, field.arr_retention_snapshot, table.arr_logo_waterfall_history, final_report]
827
+ on_fail:
828
+ type: string
829
+ enum: [error, warn]
830
+ description: "Action on failure: 'error' blocks output, 'warn' just reports"
831
+ examples: [warn, error]
832
+ model:
833
+ type: string
834
+ minLength: 1
835
+ description: Model ID for LLM-judge
836
+ examples: [gpt-4.1-mini, gpt-4o]
837
+ base_url:
838
+ type: string
839
+ description: "Custom base URL for guardrail LLM (supports ${ENV_VAR})"
840
+ examples: ["${OPENAI_BASE_URL:-https://api.openai.com/v1}"]
841
+ api_key:
842
+ type: string
843
+ minLength: 1
844
+ description: "API key for guardrail LLM (supports ${ENV_VAR})"
845
+ examples: ["${LLAMAINDEX_API_KEY}"]
846
+ prompt:
847
+ type: string
848
+ minLength: 1
849
+ description: "Validation prompt. Use {{content}} placeholder for the value being checked."
850
+ examples: ["Enforce that every numeric value includes a citation..."]
851
+ system_prompt:
852
+ type: string
853
+ description: "Custom system prompt for this guardrail's LLM-judge. Overrides llamaindex.guardrail_system_prompt."
854
+ description:
855
+ type: string
856
+ description: Human-readable description of what this guardrail checks
857
+
858
+ executionConfig:
859
+ type: object
860
+ description: Execution settings for retries and output configuration
861
+ additionalProperties: false
862
+ properties:
863
+ retries:
864
+ type: object
865
+ additionalProperties: false
866
+ properties:
867
+ max_attempts:
868
+ type: integer
869
+ minimum: 1
870
+ maximum: 10
871
+ default: 3
872
+ description: Maximum retry attempts per computation
873
+ backoff_seconds:
874
+ type: number
875
+ minimum: 0
876
+ default: 1.0
877
+ description: Seconds to wait between retries
878
+ output:
879
+ type: object
880
+ additionalProperties: false
881
+ properties:
882
+ directory:
883
+ type: string
884
+ default: ./output
885
+ description: Directory for output files
886
+ timestamp_suffix:
887
+ type: boolean
888
+ default: false
889
+ description: Add timestamp to output directory name
890
+ include_final_report:
891
+ type: boolean
892
+ default: true
893
+ description: Generate final markdown report
894
+ include_computed_json:
895
+ type: boolean
896
+ default: true
897
+ description: Include raw computed values as JSON
898
+ include_evidence:
899
+ type: boolean
900
+ default: true
901
+ description: Include search results and evidence
902
+ include_guardrails:
903
+ type: boolean
904
+ default: true
905
+ description: Run guardrails and include guardrails.json