npm - hazo_collab_forms - Versions diffs - 3.1.7 → 5.0.0 - Mend

hazo_collab_forms 3.1.7 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (284) hide show

package/dist/lib/validation_handler.js CHANGED Viewed

@@ -1,8 +1,9 @@
 /**
  * Server-side validation route handler factory.
  *
- * Creates a POST handler that executes validation rules against uploaded documents
- * using LLM analysis, and returns ClarificationItems for any issues found.
+ * Creates a POST handler that executes validation rules against uploaded
+ * documents or plain text using LLM analysis, and returns ClarificationItems
+ * for any issues found.
  *
  * Usage:
  *   import { create_validation_route } from 'hazo_collab_forms/lib';
@@ -13,112 +14,145 @@
  */
 import 'server-only';
 import { DEFAULT_CLARIFICATION_TEMPLATES } from '../config/clarification_templates.js';
-import { DEFAULT_VALIDATION_PROMPT_SUFFIX } from '../config/defaults.js';
 import { get_config } from './config.js';
+import { make_issue_id } from '../utils/validation_result.js';
+import { extract_expectation } from '../utils/expectation_extractor.js';
 // ============================================================================
 // Helpers
 // ============================================================================
 function generate_clarification_id() {
     return `clr_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`;
 }
-/**
- * Extract has_issue and issue_description from a parsed JSON object.
- * Supports both new format (validation_passed, validation_details) and
- * legacy format (has_issue, issue_description).
- */
-function extract_validation_fields(parsed) {
-    // Determine has_issue: prefer new `validation_passed` (inverted), fall back to legacy `has_issue`
-    const has_issue = 'validation_passed' in parsed
-        ? !parsed.validation_passed
-        : !!parsed.has_issue;
-    // Determine issue_description: prefer new `validation_details`, fall back to legacy names
-    const issue_description = parsed.validation_details ||
-        parsed.issue_description ||
-        parsed.description ||
-        parsed.issue ||
-        undefined;
-    return {
-        has_issue,
-        issue_description,
-        confidence: typeof parsed.confidence === 'number' ? parsed.confidence : undefined,
-    };
-}
-/**
- * Parse LLM response for validation results.
- * Supports new format: { validation_passed, validation_details, confidence }
- * and legacy format: { has_issue, issue_description, confidence }
- */
-function parse_validation_response(text) {
-    const cleaned = text.trim();
-    // Try direct parse
-    try {
-        const parsed = JSON.parse(cleaned);
-        return extract_validation_fields(parsed);
-    }
-    catch {
-        // Continue
-    }
-    // Strip markdown code fences
-    const fence_match = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
-    if (fence_match) {
-        try {
-            const parsed = JSON.parse(fence_match[1].trim());
-            return extract_validation_fields(parsed);
-        }
-        catch {
-            // Continue
-        }
-    }
-    // Try to find JSON in text
-    const json_match = cleaned.match(/\{[\s\S]*\}/);
-    if (json_match) {
-        try {
-            const parsed = JSON.parse(json_match[0]);
-            return extract_validation_fields(parsed);
-        }
-        catch {
-            // Continue
-        }
-    }
-    // Heuristic: if the text mentions "issue", "problem", "invalid", etc., consider it an issue
-    const issue_keywords = ['issue', 'problem', 'invalid', 'missing', 'incomplete', 'error', 'incorrect'];
-    const lower = cleaned.toLowerCase();
-    const has_issue = issue_keywords.some(k => lower.includes(k));
-    return {
-        has_issue,
-        issue_description: has_issue ? cleaned.slice(0, 500) : undefined,
-    };
-}
 function is_image_mime(mime_type) {
     return mime_type.startsWith('image/');
 }
 function is_document_mime(mime_type) {
     return mime_type === 'application/pdf';
 }
-/**
- * Substitute {{variable}} placeholders in prompt text.
- */
+/** Substitute {{variable}} placeholders in prompt text. */
 function substitute_variables(prompt, variables) {
     return prompt.replace(/\{\{(\w+)\}\}/g, (_, key) => {
         return variables[key] ?? `{{${key}}}`;
     });
 }
-/**
- * Build a validation prompt that wraps the rule prompt with a configurable response format suffix.
- * Reads `[validation] prompt_suffix` from INI config, falling back to DEFAULT_VALIDATION_PROMPT_SUFFIX.
- */
-function build_validation_prompt(rule_prompt) {
-    const suffix = get_config('validation', 'prompt_suffix') ?? DEFAULT_VALIDATION_PROMPT_SUFFIX;
-    return `You are a document validation assistant. Analyze the provided document according to the following rule and determine if there is an issue.
-VALIDATION RULE:
-${rule_prompt}
-${suffix}`;
+/** Wrap a per-rule prompt with the response envelope from hazo_prompts. */
+function wrap_rule_prompt(rule_prompt, wrapper) {
+    return substitute_variables(wrapper, { rule_prompt });
 }
+// ============================================================================
+// Parser (unified — v4)
+// ============================================================================
 /**
- * Create a ClarificationItem from a validation rule result.
+ * Parse the LLM response into a ValidationRuleResult.
+ *
+ * Expected shape (per validation/response_wrapper in hazo_prompts):
+ *   {
+ *     "summary": "...",                    // optional rule-level explanation
+ *     "issues": [                          // empty = passed
+ *       { "description": "...", "amount": "...", "date": "...", "reason": "...",
+ *         "client_comment": "...", "confidence": 0.9 }
+ *     ],
+ *     "confidence": 0.95                   // optional rule-level confidence
+ *   }
+ *
+ * Tolerates: direct JSON, fenced code block, regex-extracted object, and
+ * top-level arrays (treated as the issues array).
  */
+export function parse_response(text, rule_id) {
+    const cleaned = text.trim();
+    const try_parse = (raw) => {
+        try {
+            return JSON.parse(raw);
+        }
+        catch {
+            return null;
+        }
+    };
+    const parsed = try_parse(cleaned) ??
+        (() => {
+            const fence = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
+            return fence ? try_parse(fence[1].trim()) : null;
+        })() ??
+        (() => {
+            const obj = cleaned.match(/\{[\s\S]*\}/);
+            if (obj)
+                return try_parse(obj[0]);
+            const arr = cleaned.match(/\[[\s\S]*\]/);
+            return arr ? try_parse(arr[0]) : null;
+        })();
+    if (!parsed)
+        return { issues: [] };
+    let raw_issues = [];
+    let summary;
+    let confidence;
+    let extracted_data;
+    if (Array.isArray(parsed)) {
+        raw_issues = parsed;
+    }
+    else if (typeof parsed === 'object' && parsed !== null) {
+        const obj = parsed;
+        if (Array.isArray(obj.issues))
+            raw_issues = obj.issues;
+        else if (Array.isArray(obj.items))
+            raw_issues = obj.items;
+        if (typeof obj.summary === 'string')
+            summary = obj.summary;
+        if (typeof obj.confidence === 'number')
+            confidence = obj.confidence;
+        if (obj.extracted_data && typeof obj.extracted_data === 'object' && !Array.isArray(obj.extracted_data)) {
+            extracted_data = obj.extracted_data;
+        }
+    }
+    const issues = [];
+    for (const row of raw_issues) {
+        if (typeof row !== 'object' || row === null)
+            continue;
+        const r = row;
+        const description_raw = r.description ||
+            r.issue_description ||
+            r.issue ||
+            r.label ||
+            '';
+        if (!description_raw)
+            continue;
+        const amount = r.amount;
+        const date = r.date;
+        const reason = r.reason;
+        const client_comment = r.client_comment;
+        const issue_confidence = typeof r.confidence === 'number' ? r.confidence : undefined;
+        // Compose a richer human-readable description that includes amount/date
+        // so list views show useful context without reaching into sub-fields.
+        const parts = [description_raw];
+        if (amount !== undefined && amount !== '')
+            parts.push(`$${amount}`);
+        if (date)
+            parts.push(String(date));
+        const header = parts.join(' — ');
+        const issue_description = reason && reason !== description_raw
+            ? `${header}. ${reason}`
+            : header;
+        const id_seed = [rule_id, description_raw, amount, date];
+        const fallback = `iss-${issues.length}`;
+        issues.push({
+            issue_id: make_issue_id(id_seed, fallback),
+            issue_description,
+            ...(amount !== undefined && amount !== '' ? { amount: String(amount) } : {}),
+            ...(date ? { date: String(date) } : {}),
+            ...(reason ? { reason } : {}),
+            ...(client_comment ? { client_comment } : {}),
+            ...(issue_confidence !== undefined ? { confidence: issue_confidence } : {}),
+        });
+    }
+    return {
+        issues,
+        ...(summary ? { summary } : {}),
+        ...(confidence !== undefined ? { confidence } : {}),
+        ...(extracted_data ? { extracted_data } : {}),
+    };
+}
+// ============================================================================
+// Clarification creation
+// ============================================================================
 function create_clarification_from_result(rule, result, doc_info) {
     const template = rule.clarification_type !== 'none'
         ? DEFAULT_CLARIFICATION_TEMPLATES[rule.clarification_type]
@@ -126,6 +160,16 @@ function create_clarification_from_result(rule, result, doc_info) {
     const response_options = rule.custom_response_options ??
         template?.response_options ??
         [];
+    // Derive a human-readable issue description: prefer the first issue's
+    // client_comment (LLM-drafted for the client), fall back to its description,
+    // then the rule-level summary, then template/default.
+    const first = result.issues[0];
+    const derived_issue_description = first?.client_comment ??
+        first?.issue_description ??
+        result.summary ??
+        template?.default_issue_description ??
+        'A validation issue was detected.';
+    const expectation = first ? extract_expectation(first) : undefined;
     return {
         id: generate_clarification_id(),
         type: rule.clarification_type,
@@ -135,10 +179,8 @@ function create_clarification_from_result(rule, result, doc_info) {
         rule_name: rule.name,
         rule_id: rule.rule_id,
         issue_description: rule.custom_issue_description ??
-            result.issue_description ??
-            template?.default_issue_description ??
-            'A validation issue was detected.',
-        validation_details: result.issue_description,
+            derived_issue_description,
+        validation_details: result.summary ?? first?.issue_description,
         doc_references: doc_info ? [{
                 file_id: '', // Client enriches with actual file_id
                 file_name: doc_info.file_name,
@@ -149,6 +191,7 @@ function create_clarification_from_result(rule, result, doc_info) {
         response_options,
         response_files: [],
         created_at: new Date().toISOString(),
+        ...(expectation ? { expectation } : {}),
     };
 }
 // ============================================================================
@@ -171,6 +214,51 @@ export function create_validation_route(options) {
     // Cache the dynamic import
     let llm_api_module = null;
     let init_promise = null;
+    // Validation wrappers live in hazo_prompts (not in code). Loaded once per
+    // route-handler lifetime and cached. Updates via the Prompt Editor require a
+    // server restart to pick up — same behaviour as every other prompt loaded
+    // here. There is intentionally no hardcoded fallback: missing wrappers must
+    // surface as a 500 with a seeding hint.
+    let wrapper_cache = null;
+    async function get_validation_wrappers() {
+        if (wrapper_cache)
+            return wrapper_cache;
+        await ensure_initialized();
+        const api = await get_llm_api();
+        const connect = api.get_hazo_connect?.() ?? null;
+        if (!connect) {
+            throw new Error('hazo_llm_api hazo_connect is not available — cannot load validation wrappers from hazo_prompts');
+        }
+        const fetch_one = async (key) => {
+            const r = await connect.get_by_area_key('validation', key);
+            return r.success ? (r.data?.prompt_text_full ?? null) : null;
+        };
+        const [response, multi_doc, text, document_extract] = await Promise.all([
+            fetch_one('response_wrapper'),
+            fetch_one('multi_doc_suffix'),
+            fetch_one('text_wrapper'),
+            fetch_one('document_extract'),
+        ]);
+        const missing = [];
+        if (!response)
+            missing.push('validation/response_wrapper');
+        if (!multi_doc)
+            missing.push('validation/multi_doc_suffix');
+        if (!text)
+            missing.push('validation/text_wrapper');
+        if (!document_extract)
+            missing.push('validation/document_extract');
+        if (missing.length > 0) {
+            throw new Error(`Validation wrappers missing in hazo_prompts: ${missing.join(', ')}. Run test-app/scripts/seed-prompts.mjs.`);
+        }
+        wrapper_cache = {
+            response: response,
+            multi_doc: multi_doc,
+            text: text,
+            document_extract: document_extract,
+        };
+        return wrapper_cache;
+    }
     async function get_llm_api() {
         if (!llm_api_module) {
             try {
@@ -196,138 +284,122 @@ export function create_validation_route(options) {
         }
     }
     /**
-     * Execute a single validation rule against a document.
+     * Call the LLM for a given input. Returns the raw response text.
+     * Internal: all three input kinds converge here so run_rule can stay flat.
      */
-    async function execute_rule(rule, file_b64, mime_type, variables) {
+    async function call_llm_for_input(full_prompt, input, wrappers) {
         const api = await get_llm_api();
-        // Substitute variables in the prompt
-        const resolved_prompt = substitute_variables(rule.prompt, variables);
-        const full_prompt = build_validation_prompt(resolved_prompt);
-        // Call LLM
-        let llm_response;
-        if (is_image_mime(mime_type)) {
-            llm_response = await api.hazo_llm_image_text({
-                prompt: full_prompt,
-                image_b64: file_b64,
-                image_mime_type: mime_type,
+        if (input.kind === 'text') {
+            const text_prompt = substitute_variables(wrappers.text, {
+                full_prompt,
+                text_content: input.text_content,
             });
+            return api.hazo_llm_text_text({ prompt: text_prompt });
         }
-        else if (is_document_mime(mime_type)) {
-            llm_response = await api.hazo_llm_document_text({
+        if (input.kind === 'document') {
+            if (is_image_mime(input.mime_type)) {
+                return api.hazo_llm_image_text({
+                    prompt: full_prompt,
+                    image_b64: input.file_b64,
+                    image_mime_type: input.mime_type,
+                });
+            }
+            return api.hazo_llm_document_text({
                 prompt: full_prompt,
-                document_b64: file_b64,
-                document_mime_type: mime_type,
+                document_b64: input.file_b64,
+                document_mime_type: input.mime_type,
             });
         }
-        else {
-            // Fallback: treat as image
-            llm_response = await api.hazo_llm_image_text({
+        // input.kind === 'documents' (batch)
+        const [primary, ...additional] = input.files;
+        const additional_documents = additional.map(f => ({
+            mime_type: f.mime_type,
+            data: f.file_b64,
+        }));
+        // If the primary is an image, fall back to extract-then-evaluate since
+        // image_text doesn't support additional_documents.
+        if (is_image_mime(primary.mime_type)) {
+            return batch_via_extraction(full_prompt, input.files, wrappers);
+        }
+        try {
+            return await api.hazo_llm_document_text({
                 prompt: full_prompt,
-                image_b64: file_b64,
-                image_mime_type: mime_type,
+                document_b64: primary.file_b64,
+                document_mime_type: primary.mime_type,
+                additional_documents: additional_documents.length > 0 ? additional_documents : undefined,
             });
         }
-        if (!llm_response.success || !llm_response.text) {
-            return {
-                rule_id: rule.rule_id,
-                has_issue: false,
-                raw_response: llm_response.error ?? 'LLM call failed',
-            };
+        catch {
+            // If additional_documents isn't supported, fall back.
+            return batch_via_extraction(full_prompt, input.files, wrappers);
         }
-        // Parse response
-        const parsed = parse_validation_response(llm_response.text);
-        return {
-            rule_id: rule.rule_id,
-            has_issue: parsed.has_issue,
-            issue_description: parsed.issue_description,
-            confidence: parsed.confidence,
-            raw_response: llm_response.text,
-        };
     }
     /**
-     * Execute a validation rule against multiple documents collectively.
-     * Uses hazo_llm_document_text with additional_documents for multi-file LLM calls.
-     * Falls back to extract-then-evaluate via hazo_llm_text_text if additional_documents is not supported.
+     * Fallback: extract each file individually, then evaluate collectively via
+     * text-only LLM. Uses validation/document_extract + validation/multi_doc_suffix.
      */
-    async function execute_rule_batch(rule, files, variables) {
+    async function batch_via_extraction(full_prompt, files, wrappers) {
         const api = await get_llm_api();
-        const resolved_prompt = substitute_variables(rule.prompt, variables);
-        const full_prompt = build_validation_prompt(resolved_prompt);
-        // Primary file
-        const primary = files[0];
-        const additional = files.slice(1);
-        // Build additional_documents array for multi-file LLM call
-        const additional_documents = additional.map(f => ({
-            mime_type: f.mime_type,
-            data: f.file_b64,
-        }));
-        let llm_response;
-        try {
-            if (is_document_mime(primary.mime_type)) {
-                llm_response = await api.hazo_llm_document_text({
-                    prompt: full_prompt,
-                    document_b64: primary.file_b64,
-                    document_mime_type: primary.mime_type,
-                    additional_documents: additional_documents.length > 0 ? additional_documents : undefined,
-                });
-            }
-            else if (is_image_mime(primary.mime_type)) {
-                // For images, fall back to extract-then-evaluate since image_text doesn't support additional_documents
-                llm_response = await execute_batch_via_extraction(api, full_prompt, files);
-            }
-            else {
-                llm_response = await api.hazo_llm_document_text({
-                    prompt: full_prompt,
-                    document_b64: primary.file_b64,
-                    document_mime_type: primary.mime_type,
-                    additional_documents: additional_documents.length > 0 ? additional_documents : undefined,
-                });
-            }
-        }
-        catch {
-            // If additional_documents not supported, fall back to extract-then-evaluate
-            llm_response = await execute_batch_via_extraction(api, full_prompt, files);
+        const extractions = [];
+        for (const file of files) {
+            const resp = is_image_mime(file.mime_type)
+                ? await api.hazo_llm_image_text({ prompt: wrappers.document_extract, image_b64: file.file_b64, image_mime_type: file.mime_type })
+                : await api.hazo_llm_document_text({ prompt: wrappers.document_extract, document_b64: file.file_b64, document_mime_type: file.mime_type });
+            extractions.push(`FILE: ${file.file_name}\n${resp.success && resp.text ? resp.text : '[Extraction failed]'}`);
         }
+        const combined_prompt = substitute_variables(wrappers.multi_doc, {
+            full_prompt,
+            extractions: extractions.join('\n\n---\n\n'),
+        });
+        return api.hazo_llm_text_text({ prompt: combined_prompt });
+    }
+    /**
+     * Execute a single rule against a given input. This is the one place all
+     * three paths (single document, multi-document batch, text) converge.
+     */
+    async function run_rule(rule, input, variables, wrappers) {
+        const resolved_prompt = substitute_variables(rule.prompt, variables);
+        const full_prompt = wrap_rule_prompt(resolved_prompt, wrappers.response);
+        const llm_response = await call_llm_for_input(full_prompt, input, wrappers);
         if (!llm_response.success || !llm_response.text) {
             return {
                 rule_id: rule.rule_id,
-                has_issue: false,
-                raw_response: llm_response.error ?? 'LLM batch call failed',
+                issues: [],
+                raw_response: llm_response.error ?? 'LLM call failed',
+                check_type: rule.check_type,
             };
         }
-        const parsed = parse_validation_response(llm_response.text);
+        const parsed = parse_response(llm_response.text, rule.rule_id);
         return {
             rule_id: rule.rule_id,
-            has_issue: parsed.has_issue,
-            issue_description: parsed.issue_description,
-            confidence: parsed.confidence,
+            issues: parsed.issues,
+            ...(parsed.summary ? { summary: parsed.summary } : {}),
+            ...(parsed.confidence !== undefined ? { confidence: parsed.confidence } : {}),
+            ...(parsed.extracted_data ? { extracted_data: parsed.extracted_data } : {}),
             raw_response: llm_response.text,
+            check_type: rule.check_type,
         };
     }
-    /**
-     * Fallback: extract info from each file individually, then evaluate collectively via text-only LLM.
-     */
-    async function execute_batch_via_extraction(api, full_prompt, files) {
-        const extract_prompt = 'Extract all relevant information from this document: monetary amounts with descriptions, dates, payee/payer names, document type, and reference numbers. Return a concise structured summary.';
-        const extractions = [];
-        for (const file of files) {
-            let resp;
-            if (is_image_mime(file.mime_type)) {
-                resp = await api.hazo_llm_image_text({ prompt: extract_prompt, image_b64: file.file_b64, image_mime_type: file.mime_type });
-            }
-            else {
-                resp = await api.hazo_llm_document_text({ prompt: extract_prompt, document_b64: file.file_b64, document_mime_type: file.mime_type });
-            }
-            extractions.push(`FILE: ${file.file_name}\n${resp.success && resp.text ? resp.text : '[Extraction failed]'}`);
-        }
-        const combined_prompt = `${full_prompt}\n\nIMPORTANT: You are evaluating MULTIPLE documents collectively. For monetary amounts, compare the TOTAL/SUM across all documents.\n\nEXTRACTED DOCUMENT INFORMATION:\n${extractions.join('\n\n---\n\n')}`;
-        return api.hazo_llm_text_text({ prompt: combined_prompt });
+    // ── File fetching helpers (route-level) ──
+    async function fetch_file_b64(request_url, entry) {
+        if (entry.file_b64)
+            return entry.file_b64;
+        if (!entry.download_url)
+            throw new Error(`No file_b64 or download_url for "${entry.file_name}"`);
+        const absolute = entry.download_url.startsWith('http')
+            ? entry.download_url
+            : `${new URL(request_url).origin}${entry.download_url}`;
+        const resp = await fetch(absolute);
+        if (!resp.ok)
+            throw new Error(`Failed to fetch file "${entry.file_name}": ${resp.status}`);
+        const buffer = Buffer.from(await resp.arrayBuffer());
+        return buffer.toString('base64');
     }
+    // ── Route ──
     return async function POST(request) {
         try {
             const body = await request.json();
             const { file_name, mime_type, download_url, file_b64: request_file_b64, rules, variables = {}, content_tag, } = body;
-            // Validate required fields
             if (!rules?.length) {
                 return Response.json({
                     success: false,
@@ -336,144 +408,122 @@ export function create_validation_route(options) {
                     errors: [{ rule_id: '', error: 'rules array is required and must not be empty' }],
                 }, { status: 400 });
             }
-            // Initialize LLM API
             await ensure_initialized();
-            // ── Batch mode: validate multiple files collectively ──
+            const wrappers = await get_validation_wrappers();
+            // ── Build the ValidationInput based on request shape ──
+            let input;
+            const text_content = body.text_content;
+            const is_text_mode = !!text_content && !request_file_b64 && !download_url;
+            const rule_results = [];
+            const clarifications = [];
+            const errors = [];
             if (body.mode === 'batch' && body.files?.length) {
+                // Multi-file batch
                 const batch_files = [];
-                const fetch_errors = [];
                 for (const f of body.files) {
-                    let b64;
-                    if (f.file_b64) {
-                        b64 = f.file_b64;
-                    }
-                    else if (f.download_url) {
-                        const absolute_url = f.download_url.startsWith('http')
-                            ? f.download_url
-                            : `${new URL(request.url).origin}${f.download_url}`;
-                        const file_response = await fetch(absolute_url);
-                        if (!file_response.ok) {
-                            fetch_errors.push({ rule_id: '', error: `Failed to fetch file "${f.file_name}": ${file_response.status}` });
-                            continue;
-                        }
-                        const buffer = Buffer.from(await file_response.arrayBuffer());
-                        b64 = buffer.toString('base64');
+                    try {
+                        const b64 = await fetch_file_b64(request.url, f);
+                        batch_files.push({ file_id: f.file_id, file_name: f.file_name, mime_type: f.mime_type, file_b64: b64 });
                     }
-                    else {
-                        fetch_errors.push({ rule_id: '', error: `No file_b64 or download_url for "${f.file_name}"` });
-                        continue;
+                    catch (err) {
+                        errors.push({ rule_id: '', error: err instanceof Error ? err.message : 'Unknown file fetch error' });
                     }
-                    batch_files.push({ file_id: f.file_id, file_name: f.file_name, file_b64: b64, mime_type: f.mime_type });
                 }
                 if (batch_files.length === 0) {
                     return Response.json({
-                        success: false, clarifications: [], rule_results: [],
-                        errors: fetch_errors.length > 0 ? fetch_errors : [{ rule_id: '', error: 'No files could be fetched for batch validation' }],
+                        success: false,
+                        clarifications: [],
+                        rule_results: [],
+                        errors: errors.length > 0 ? errors : [{ rule_id: '', error: 'No files could be fetched for batch validation' }],
                     }, { status: 500 });
                 }
+                input = { kind: 'documents', files: batch_files };
                 const batch_variables = {
                     ...variables,
                     document_name: batch_files.map(f => f.file_name).join(', '),
                     ...(content_tag ? { document_type: content_tag } : {}),
                 };
-                const batch_rule_results = [];
-                const batch_clarifications = [];
-                const batch_errors = [...fetch_errors];
                 for (const rule of rules) {
                     try {
-                        const result = await execute_rule_batch(rule, batch_files, batch_variables);
-                        batch_rule_results.push(result);
-                        if (result.has_issue && rule.clarification_type !== 'none') {
+                        const result = await run_rule(rule, input, batch_variables, wrappers);
+                        rule_results.push(result);
+                        if (result.issues.length > 0 && rule.clarification_type !== 'none') {
                             const clarification = create_clarification_from_result(rule, result, {
                                 file_name: batch_files.map(f => f.file_name).join(', '),
                                 mime_type: batch_files[0].mime_type,
                             });
-                            // Set doc_references to include ALL files in the batch
                             clarification.doc_references = batch_files.map(f => ({
-                                file_id: f.file_id,
-                                file_name: f.file_name,
-                                mime_type: f.mime_type,
+                                file_id: f.file_id, file_name: f.file_name, mime_type: f.mime_type,
                             }));
-                            batch_clarifications.push(clarification);
+                            clarifications.push(clarification);
                         }
                     }
                     catch (err) {
                         const message = err instanceof Error ? err.message : 'Unknown error';
                         on_error(err, `batch rule ${rule.rule_id}: ${rule.name}`);
-                        batch_errors.push({ rule_id: rule.rule_id, error: message });
-                        batch_rule_results.push({ rule_id: rule.rule_id, has_issue: false, raw_response: message });
+                        errors.push({ rule_id: rule.rule_id, error: message });
+                        rule_results.push({ rule_id: rule.rule_id, issues: [], raw_response: message, check_type: rule.check_type });
                     }
                 }
-                return Response.json({
-                    success: batch_errors.length === 0,
-                    clarifications: batch_clarifications,
-                    rule_results: batch_rule_results,
-                    ...(batch_errors.length > 0 ? { errors: batch_errors } : {}),
-                });
             }
-            // ── Single file mode (default) ──
-            // Get file as base64
-            let file_b64;
-            if (request_file_b64) {
-                file_b64 = request_file_b64;
+            else if (is_text_mode) {
+                // Text mode
+                input = { kind: 'text', text_content: text_content, file_name };
+                const text_variables = {
+                    ...variables,
+                    document_name: file_name || 'text input',
+                    ...(content_tag ? { document_type: content_tag } : {}),
+                };
+                for (const rule of rules) {
+                    try {
+                        const result = await run_rule(rule, input, text_variables, wrappers);
+                        rule_results.push(result);
+                        if (result.issues.length > 0 && rule.clarification_type !== 'none') {
+                            clarifications.push(create_clarification_from_result(rule, result, { file_name: file_name || 'text input', mime_type: 'text/plain' }));
+                        }
+                    }
+                    catch (err) {
+                        const message = err instanceof Error ? err.message : 'Unknown error';
+                        on_error(err, `rule ${rule.rule_id}: ${rule.name}`);
+                        errors.push({ rule_id: rule.rule_id, error: message });
+                        rule_results.push({ rule_id: rule.rule_id, issues: [], raw_response: message, check_type: rule.check_type });
+                    }
+                }
             }
-            else if (download_url) {
+            else {
+                // Single document
+                let file_b64;
                 try {
-                    const absolute_url = download_url.startsWith('http')
-                        ? download_url
-                        : `${new URL(request.url).origin}${download_url}`;
-                    const file_response = await fetch(absolute_url);
-                    if (!file_response.ok) {
-                        throw new Error(`Failed to fetch file: ${file_response.status}`);
-                    }
-                    const buffer = Buffer.from(await file_response.arrayBuffer());
-                    file_b64 = buffer.toString('base64');
+                    file_b64 = await fetch_file_b64(request.url, { file_b64: request_file_b64, download_url, file_name });
                 }
                 catch (err) {
                     return Response.json({
                         success: false,
                         clarifications: [],
                         rule_results: [],
-                        errors: [{ rule_id: '', error: `Failed to fetch file "${file_name}": ${err instanceof Error ? err.message : 'Unknown error'}` }],
-                    }, { status: 500 });
+                        errors: [{ rule_id: '', error: err instanceof Error ? err.message : 'Unknown error' }],
+                    }, { status: err instanceof Error && err.message.includes('No file_b64') ? 400 : 500 });
                 }
-            }
-            else {
-                return Response.json({
-                    success: false,
-                    clarifications: [],
-                    rule_results: [],
-                    errors: [{ rule_id: '', error: 'Either file_b64 or download_url is required' }],
-                }, { status: 400 });
-            }
-            // Add file context variables
-            const all_variables = {
-                ...variables,
-                document_name: file_name,
-                ...(content_tag ? { document_type: content_tag } : {}),
-            };
-            // Execute each rule
-            const rule_results = [];
-            const clarifications = [];
-            const errors = [];
-            for (const rule of rules) {
-                try {
-                    const result = await execute_rule(rule, file_b64, mime_type, all_variables);
-                    rule_results.push(result);
-                    if (result.has_issue && rule.clarification_type !== 'none') {
-                        const clarification = create_clarification_from_result(rule, result, { file_name, mime_type });
-                        clarifications.push(clarification);
+                input = { kind: 'document', file_name, mime_type, file_b64 };
+                const doc_variables = {
+                    ...variables,
+                    document_name: file_name || 'document',
+                    ...(content_tag ? { document_type: content_tag } : {}),
+                };
+                for (const rule of rules) {
+                    try {
+                        const result = await run_rule(rule, input, doc_variables, wrappers);
+                        rule_results.push(result);
+                        if (result.issues.length > 0 && rule.clarification_type !== 'none') {
+                            clarifications.push(create_clarification_from_result(rule, result, { file_name, mime_type }));
+                        }
+                    }
+                    catch (err) {
+                        const message = err instanceof Error ? err.message : 'Unknown error';
+                        on_error(err, `rule ${rule.rule_id}: ${rule.name}`);
+                        errors.push({ rule_id: rule.rule_id, error: message });
+                        rule_results.push({ rule_id: rule.rule_id, issues: [], raw_response: message, check_type: rule.check_type });
                     }
-                }
-                catch (err) {
-                    const message = err instanceof Error ? err.message : 'Unknown error';
-                    on_error(err, `rule ${rule.rule_id}: ${rule.name}`);
-                    errors.push({ rule_id: rule.rule_id, error: message });
-                    rule_results.push({
-                        rule_id: rule.rule_id,
-                        has_issue: false,
-                        raw_response: message,
-                    });
                 }
             }
             const response = {