npm - hazo_collab_forms - Versions diffs - 3.1.6 → 5.0.0 - Mend

hazo_collab_forms 3.1.6 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (284) hide show

package/dist/components/thread_form/hooks/use_file_pipeline.js ADDED Viewed

@@ -0,0 +1,760 @@
+/**
+ * use_file_pipeline — orchestrates classification → validation for uploaded files.
+ *
+ * Phase 1 (client upload): classify → fetch immediate rules → validate
+ * Phase 2 (agent review): fetch backoffice rules → validate
+ */
+'use client';
+import { useState, useCallback, useRef } from 'react';
+import { infer_mime_type } from '../../hazo_fb_form/shared/format.js';
+/** Defensive normaliser: some upload paths still hand us octet-stream (the
+ *  classification API rejects it). Sniff the extension when it does. */
+function normalize_mime(mime_type, file_name) {
+    if (mime_type && mime_type !== 'application/octet-stream')
+        return mime_type;
+    return infer_mime_type(file_name);
+}
+function make_id(prefix = 'id') {
+    return `${prefix}_${Date.now()}_${Math.random().toString(36).slice(2, 7)}`;
+}
+export function use_file_pipeline({ classification_api_url, validation_api_url, validation_rules_api_url, content_gate_api_url, response_extraction_api_url, document_types, available_tags, file_manager, on_log, }) {
+    // Keep a ref so classify_file reads the latest list without needing a fresh callback
+    const document_types_ref = useRef(document_types);
+    document_types_ref.current = document_types;
+    const available_tags_ref = useRef(available_tags);
+    available_tags_ref.current = available_tags;
+    const file_manager_ref = useRef(file_manager);
+    file_manager_ref.current = file_manager;
+    const on_log_ref = useRef(on_log);
+    on_log_ref.current = on_log;
+    const log_event = useCallback((msg) => on_log_ref.current?.(msg), []);
+    const [file_statuses, set_file_statuses] = useState(new Map());
+    const [classification_results, set_classification_results] = useState(new Map());
+    const [validation_results_map, set_validation_results] = useState(new Map());
+    // Use refs to avoid stale closures
+    const statuses_ref = useRef(file_statuses);
+    statuses_ref.current = file_statuses;
+    const update_status = useCallback((content_id, status) => {
+        set_file_statuses(prev => {
+            const next = new Map(prev);
+            next.set(content_id, status);
+            return next;
+        });
+    }, []);
+    /** Classify a file via the classification API */
+    const classify_file = useCallback(async (file_b64, file_name, mime_type) => {
+        if (!classification_api_url)
+            return null;
+        // Pre-flight diagnostic: empty allow-list is the #1 cause of "unknown"
+        // classifications (LLM has nothing to choose from). Warn loudly so the
+        // root cause is obvious in the activity log instead of silent.
+        const doc_types_list = document_types_ref.current;
+        if (!doc_types_list || doc_types_list.length === 0) {
+            const msg = `Classify: document_types allow-list is EMPTY — LLM will return "unknown" by design. ` +
+                `Populate available_document_types config (e.g. via the doc-type-editor page) before classification will work.`;
+            console.warn('[file-pipeline]', msg);
+            on_log_ref.current?.(msg);
+        }
+        const safe_mime = normalize_mime(mime_type, file_name);
+        if (safe_mime === 'application/octet-stream') {
+            const msg = `Skipping classification for "${file_name}": cannot determine file type from extension. Add the extension to infer_mime_type or pass an explicit mime_type when uploading.`;
+            console.warn('[file-pipeline]', msg);
+            on_log_ref.current?.(msg);
+            return null;
+        }
+        const res = await fetch(classification_api_url, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({
+                file_b64,
+                file_name,
+                mime_type: safe_mime,
+                document_types: doc_types_list,
+                available_tags: available_tags_ref.current,
+            }),
+        });
+        const data = await res.json();
+        if (!data.success) {
+            console.error('[file-pipeline] Classification failed:', data.error);
+            on_log_ref.current?.(`Classification failed: ${data.error ?? 'unknown error'}`);
+            return null;
+        }
+        // Surface what the classify route returned so we can see if extracted_fields
+        // is missing/wrong-shape. Goes to the Activity Log via on_log_ref.
+        on_log_ref.current?.(`Classify response keys: ${Object.keys(data).join(', ')}` +
+            (data.extracted_fields
+                ? ` · extracted_fields: ${JSON.stringify(data.extracted_fields)}`
+                : ' · extracted_fields: <missing>'));
+        // Post-flight diagnostic: explain the most common failure mode if the
+        // LLM returned the default "unknown" verdict despite a non-empty list.
+        if (data.document_type === 'unknown' && (data.confidence ?? 0) === 0) {
+            const had_list = doc_types_list && doc_types_list.length > 0;
+            on_log_ref.current?.(had_list
+                ? `Classification returned 'unknown' even though ${doc_types_list.length} doc type(s) were offered — the LLM didn't match any. Check the prompt or expand the allow-list.`
+                : `Classification returned 'unknown' because the doc-type allow-list was empty (see warning above).`);
+        }
+        // Dump raw LLM text when extracted_fields is missing — helps diagnose
+        // whether it's a prompt-cache issue or a parser shape mismatch.
+        if (!data.extracted_fields && data._debug_llm_text) {
+            on_log_ref.current?.(`Raw LLM text: ${String(data._debug_llm_text).slice(0, 1500)}`);
+        }
+        return {
+            document_type: data.document_type,
+            tags: data.tags || [],
+            tag_reasons: data.tag_reasons,
+            confidence: data.confidence || 0,
+            document_date: data.document_date,
+            document_nature: data.document_nature,
+            ...(data.extracted_fields ? { extracted_fields: data.extracted_fields } : {}),
+        };
+    }, [classification_api_url]);
+    /** Fetch validation rules for a document type */
+    const fetch_rules = useCallback(async (document_type, check_type) => {
+        if (!validation_rules_api_url)
+            return [];
+        const params = new URLSearchParams({ document_type, check_type });
+        const res = await fetch(`${validation_rules_api_url}?${params}`);
+        const data = await res.json();
+        if (!res.ok || data.success === false) {
+            console.warn('[file-pipeline] Rules API returned non-success', {
+                status: res.status,
+                document_type,
+                check_type,
+                response: data,
+            });
+            return [];
+        }
+        const rules = data.rules || [];
+        console.debug('[file-pipeline] Rules fetched', {
+            document_type,
+            check_type,
+            rule_count: rules.length,
+            rule_names: rules.map((r) => `${r.name} (doc=${r.document_type ?? '∅'} chk=${r.check_type ?? '∅'})`),
+        });
+        return rules;
+    }, [validation_rules_api_url]);
+    /** Run validation rules against a file */
+    const validate_file = useCallback(async (file_b64, file_name, mime_type, rules) => {
+        if (!validation_api_url || rules.length === 0)
+            return [];
+        // Convert rules to ValidationRuleExecution format
+        const rule_executions = rules.map((r) => ({
+            rule_id: r.rule_id || r.id,
+            name: r.name,
+            prompt: r.prompt,
+            target_field_id: r.target_field_id || '__document',
+            target_label: r.target_label || r.name,
+            clarification_type: r.clarification_type || 'none',
+            check_type: r.check_type,
+        }));
+        const res = await fetch(validation_api_url, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({
+                file_b64,
+                file_name,
+                mime_type,
+                rules: rule_executions,
+                mode: 'single',
+            }),
+        });
+        const data = await res.json();
+        if (!res.ok || data.success === false) {
+            console.warn('[file-pipeline] Validation API returned non-success', {
+                status: res.status,
+                errors: data?.errors,
+                file_name,
+                response: data,
+            });
+        }
+        const results = data.rule_results || [];
+        // Enrich results with rule_name from the original rules (server only returns rule_id)
+        // Build maps using both rule_id and id (UUID) as keys since either could be in the result
+        const rule_name_map = new Map();
+        for (const r of rules) {
+            const name = r.name || 'Validation Rule';
+            if (r.rule_id)
+                rule_name_map.set(r.rule_id, name);
+            if (r.id)
+                rule_name_map.set(r.id, name);
+        }
+        for (const r of results) {
+            if (!r.rule_name || /^[0-9a-f]{8}-/.test(r.rule_name)) {
+                r.rule_name = rule_name_map.get(r.rule_id) || r.rule_id;
+            }
+        }
+        return results;
+    }, [validation_api_url]);
+    /** Full pipeline: classify → fetch rules → validate */
+    const process_file = useCallback(async (item, file_b64) => {
+        const content_id = item.content_id;
+        const file_name = item.file?.file_name || 'unknown';
+        const mime_type = normalize_mime(item.file?.mime_type, file_name);
+        let classification = null;
+        let rule_results = [];
+        let doc_check = null;
+        let ai_result = null;
+        try {
+            // Phase 1: Classify
+            update_status(content_id, 'classifying');
+            log_event(`Classifying: ${file_name}`);
+            classification = await classify_file(file_b64, file_name, mime_type);
+            if (classification) {
+                log_event(`Classified ${file_name} as ${classification.document_type} (${Math.round((classification.confidence ?? 0) * 100)}%)`);
+                set_classification_results(prev => {
+                    const next = new Map(prev);
+                    next.set(content_id, classification);
+                    return next;
+                });
+                // Phase 2: Fetch immediate rules and validate
+                update_status(content_id, 'validating');
+                const all_immediate = await fetch_rules(classification.document_type, 'immediate');
+                // Exclude non-LLM rule types (e.g. periodic_coverage) — those have
+                // their own deterministic runner, not the per-file LLM call.
+                const rules = all_immediate.filter((r) => (r.validation_type ?? 'llm_prompt') === 'llm_prompt');
+                log_event(`Running ${rules.length} immediate rule(s) on ${file_name}`);
+                if (rules.length > 0) {
+                    rule_results = await validate_file(file_b64, file_name, mime_type, rules);
+                    set_validation_results(prev => {
+                        const next = new Map(prev);
+                        next.set(content_id, rule_results);
+                        return next;
+                    });
+                }
+                // Build doc_check if there are issues (client-visible)
+                const failed_rules = rule_results.filter(r => r.issues.length > 0);
+                if (failed_rules.length > 0) {
+                    doc_check = {
+                        content_ref: content_id,
+                        source_type: 'file',
+                        status: 'issues',
+                        summary: `${failed_rules.length} issue${failed_rules.length !== 1 ? 's' : ''} found with ${file_name}`,
+                        issues: failed_rules.map(r => ({
+                            description: r.issues[0]?.issue_description || r.summary || r.rule_name || 'Validation issue',
+                        })),
+                    };
+                }
+                else {
+                    doc_check = {
+                        content_ref: content_id,
+                        source_type: 'file',
+                        status: 'passed',
+                        summary: `All checks passed for ${file_name}`,
+                    };
+                }
+                // Build AI result (agent-visible)
+                ai_result = {
+                    content_ref: content_id,
+                    classification: {
+                        document_type: classification.document_type,
+                        tags: classification.tags,
+                        confidence: classification.confidence,
+                    },
+                    validation: {
+                        status: failed_rules.length > 0 ? 'issues' : 'passed',
+                        checks: rule_results.map(r => {
+                            const has_issue = r.issues.length > 0;
+                            return {
+                                check_id: make_id('chk'),
+                                name: (r.rule_name && !/^[0-9a-f]{8}-/.test(r.rule_name)) ? r.rule_name : 'Check',
+                                status: has_issue ? 'failed' : 'passed',
+                                description: has_issue ? (r.issues[0]?.issue_description || 'Issue found') : (r.summary || 'Passed'),
+                                severity: has_issue ? 'error' : 'info',
+                            };
+                        }),
+                    },
+                    review_status: 'pending',
+                };
+            }
+            if (!classification)
+                log_event(`Classification returned nothing for ${file_name} — skipping validation`);
+            update_status(content_id, 'done');
+        }
+        catch (err) {
+            console.error('[file-pipeline] Error processing file:', err);
+            log_event(`Pipeline error for ${file_name}: ${err instanceof Error ? err.message : 'unknown'}`);
+            update_status(content_id, 'error');
+        }
+        return { classification, validation_results: rule_results, doc_check, ai_result };
+    }, [classify_file, fetch_rules, validate_file, update_status]);
+    /**
+     * Run back-office validation for the given files. Assumes each file has
+     * already been classified (reads item.classification_result). Fetches
+     * rules with check_type='backoffice' for the file's document_type, then
+     * posts to the validation API using the file_manager's download URL so
+     * the server can fetch the bytes directly.
+     *
+     * Returns new backoffice-phase ValidationRuleResults per content_id —
+     * caller is responsible for APPENDING them to item.validation_rule_results
+     * (not replacing immediate-phase results).
+     */
+    const run_backoffice_validation = useCallback(async (items) => {
+        const results = new Map();
+        const fm = file_manager_ref.current;
+        // Mark every pending item as 'queued' upfront so the agent sees at a glance
+        // which files are waiting vs being processed vs already done. The loop below
+        // transitions each one to 'validating' when its turn comes, then 'done' (or
+        // 'error') on completion.
+        for (const item of items) {
+            if (item.type !== 'file' || !item.file)
+                continue;
+            update_status(item.content_id, 'queued');
+        }
+        for (const item of items) {
+            if (item.type !== 'file' || !item.file)
+                continue;
+            const content_id = item.content_id;
+            const file_name = item.file.file_name;
+            const mime_type = normalize_mime(item.file.mime_type, file_name);
+            // Need a classification to know which rules apply. When missing, treat
+            // it as "nothing to validate" so the file stops showing as pending —
+            // otherwise the agent's "Run Backoffice (N)" pill would never decrement.
+            const classification = item.classification_result
+                || classification_results.get(content_id);
+            if (!classification) {
+                console.warn('[file-pipeline] No classification for', content_id, '— recording empty backoffice result');
+                results.set(content_id, {
+                    validation_results: [],
+                    ai_result: {
+                        content_ref: content_id,
+                        validation: { status: 'passed', checks: [] },
+                        review_status: 'pending',
+                    },
+                });
+                continue;
+            }
+            update_status(content_id, 'validating');
+            try {
+                const all_rules = await fetch_rules(classification.document_type, 'backoffice');
+                // Only run rules EXPLICITLY marked check_type='backoffice'. Rules with
+                // unset check_type are "both phases" and have already been executed
+                // during immediate phase — re-running them here would duplicate results.
+                // Also exclude non-LLM rule types (periodic_coverage) — those run
+                // deterministically via run_periodic_coverage_pass, not via the
+                // per-file LLM call.
+                const rules = all_rules.filter((r) => r.check_type === 'backoffice' &&
+                    (r.validation_type ?? 'llm_prompt') === 'llm_prompt');
+                if (rules.length === 0) {
+                    // No backoffice rules apply for this doc type. Still record an empty
+                    // result so handle_run_backoffice stamps backoffice_validated_at and
+                    // the "Run Backoffice (N)" counter decrements.
+                    results.set(content_id, {
+                        validation_results: [],
+                        ai_result: {
+                            content_ref: content_id,
+                            classification: {
+                                document_type: classification.document_type,
+                                tags: classification.tags,
+                                confidence: classification.confidence,
+                            },
+                            validation: { status: 'passed', checks: [] },
+                            review_status: 'pending',
+                        },
+                    });
+                    update_status(content_id, 'done');
+                    continue;
+                }
+                // Resolve a URL then fetch the bytes in the BROWSER so blob: URLs
+                // (which the server can't see) still work. Send base64 inline.
+                const raw_url = fm?.get_download_url
+                    ? await fm.get_download_url(item.file.file_id)
+                    : null;
+                if (!raw_url) {
+                    console.warn('[file-pipeline] No download_url for', content_id, '— recording empty backoffice result');
+                    results.set(content_id, {
+                        validation_results: [],
+                        ai_result: {
+                            content_ref: content_id,
+                            classification: {
+                                document_type: classification.document_type,
+                                tags: classification.tags,
+                                confidence: classification.confidence,
+                            },
+                            validation: { status: 'passed', checks: [] },
+                            review_status: 'pending',
+                        },
+                    });
+                    update_status(content_id, 'done');
+                    continue;
+                }
+                let file_b64_resolved;
+                try {
+                    const blob_res = await fetch(raw_url);
+                    if (!blob_res.ok)
+                        throw new Error(`status ${blob_res.status}`);
+                    const buf = await blob_res.arrayBuffer();
+                    // btoa needs a binary string; build it in chunks to avoid stack issues on large files
+                    const bytes = new Uint8Array(buf);
+                    let binary = '';
+                    const chunk = 0x8000;
+                    for (let i = 0; i < bytes.length; i += chunk) {
+                        binary += String.fromCharCode.apply(null, Array.from(bytes.subarray(i, i + chunk)));
+                    }
+                    file_b64_resolved = btoa(binary);
+                }
+                catch (err) {
+                    console.warn('[file-pipeline] Backoffice fetch failed for', content_id, err);
+                    update_status(content_id, 'error');
+                    continue;
+                }
+                const rule_executions = rules.map((r) => ({
+                    rule_id: r.rule_id || r.id,
+                    name: r.name,
+                    prompt: r.prompt,
+                    target_field_id: r.target_field_id || '__document',
+                    target_label: r.target_label || r.name,
+                    clarification_type: r.clarification_type || 'none',
+                    check_type: r.check_type ?? 'backoffice',
+                }));
+                const res = await fetch(validation_api_url, {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        file_b64: file_b64_resolved,
+                        file_name,
+                        mime_type,
+                        rules: rule_executions,
+                        mode: 'single',
+                    }),
+                });
+                const data = await res.json();
+                if (!res.ok || data.success === false) {
+                    console.warn('[file-pipeline] Backoffice validation API returned non-success', {
+                        status: res.status,
+                        errors: data?.errors,
+                        file_name,
+                        response: data,
+                    });
+                }
+                const rule_results = data.rule_results || [];
+                // Enrich with rule_name and ensure check_type is stamped (fallback for old servers)
+                const rule_name_map = new Map();
+                for (const r of rules) {
+                    const name = r.name || 'Validation Rule';
+                    if (r.rule_id)
+                        rule_name_map.set(r.rule_id, name);
+                    if (r.id)
+                        rule_name_map.set(r.id, name);
+                }
+                for (const r of rule_results) {
+                    if (!r.rule_name || /^[0-9a-f]{8}-/.test(r.rule_name)) {
+                        r.rule_name = rule_name_map.get(r.rule_id) || r.rule_id;
+                    }
+                    if (!r.check_type)
+                        r.check_type = 'backoffice';
+                }
+                set_validation_results(prev => {
+                    const next = new Map(prev);
+                    const existing = next.get(content_id) || [];
+                    // Replace any prior backoffice results for the same rule_id, keep immediate ones
+                    const kept = existing.filter(e => e.check_type !== 'backoffice');
+                    next.set(content_id, [...kept, ...rule_results]);
+                    return next;
+                });
+                const ai_result = {
+                    content_ref: content_id,
+                    classification: {
+                        document_type: classification.document_type,
+                        tags: classification.tags,
+                        confidence: classification.confidence,
+                    },
+                    validation: {
+                        status: rule_results.some(r => r.issues.length > 0) ? 'issues' : 'passed',
+                        checks: rule_results.map(r => {
+                            const has_issue = r.issues.length > 0;
+                            return {
+                                check_id: make_id('chk'),
+                                name: (r.rule_name && !/^[0-9a-f]{8}-/.test(r.rule_name)) ? r.rule_name : 'Check',
+                                status: has_issue ? 'failed' : 'passed',
+                                description: has_issue ? (r.issues[0]?.issue_description || 'Issue found') : (r.summary || 'Passed'),
+                                severity: has_issue ? 'error' : 'info',
+                            };
+                        }),
+                    },
+                    review_status: 'pending',
+                };
+                results.set(content_id, { validation_results: rule_results, ai_result });
+                update_status(content_id, 'done');
+            }
+            catch (err) {
+                console.error('[file-pipeline] Backoffice validation failed for', content_id, err);
+                update_status(content_id, 'error');
+            }
+        }
+        return results;
+    }, [classification_results, fetch_rules, validation_api_url, update_status]);
+    /**
+     * Per-group periodic_coverage pass. Run AFTER per-file backoffice
+     * validation has populated extracted_data on validation_results — this
+     * pass groups files by their classification primary tag (or document_type
+     * fallback), fetches periodic_coverage rules for each group's doc_type,
+     * resolves period bounds via resolve_variable_chain, and computes gaps
+     * using compute_coverage_gaps.
+     *
+     * Returns one GroupCoverageResult per (group_key, rule) pair. The consumer
+     * is responsible for translating these into user-visible validations or
+     * clarifications.
+     */
+    const run_periodic_coverage_pass = useCallback(async (items, ctx) => {
+        // Lazy-import the pure utilities directly (NOT via lib/index.js) so
+        // Turbopack doesn't pull the server-only barrel into the client bundle.
+        const [{ compute_coverage_gaps }, { resolve_variable_chain }] = await Promise.all([
+            import('../../../lib/periodic_coverage_runner.js'),
+            import('../../../lib/resolve_variable.js'),
+        ]);
+        // 1. Group files by document_type. Earlier versions grouped by primary tag,
+        //    but a file with two tags (e.g. ['rental_property', 'rental_income'])
+        //    would get assigned to its first tag — and a sibling with the tags in
+        //    the other order would land in a different "group" with the same
+        //    document_type, causing the coverage rule to fire twice on the same
+        //    logical set. Document type is the rule's actual scope, so use that.
+        const groups = new Map();
+        for (const item of items) {
+            if (item.type !== 'file')
+                continue;
+            const classification = item.classification_result || classification_results.get(item.content_id);
+            if (!classification?.document_type)
+                continue;
+            const group_key = classification.document_type;
+            if (!groups.has(group_key))
+                groups.set(group_key, []);
+            groups.get(group_key).push(item);
+        }
+        const out = [];
+        // 2. For each group, run matching periodic_coverage rules.
+        for (const [group_key, group_items] of groups.entries()) {
+            // group_key === document_type after the simplification above.
+            const document_type = group_key;
+            const all_rules = await fetch_rules(document_type, 'backoffice');
+            const coverage_rules = all_rules.filter((r) => r.validation_type === 'periodic_coverage' && r.coverage);
+            if (coverage_rules.length === 0)
+                continue;
+            // 3. Build FilePeriod[] from each file's per-file extracted_data.
+            const file_periods = group_items.map(item => {
+                const file_results = validation_results_map.get(item.content_id) ?? [];
+                const extracted = file_results
+                    .map(r => r.extracted_data)
+                    .find(d => d && d.period_start && d.period_end);
+                return {
+                    id: item.content_id,
+                    period_start: typeof extracted?.period_start === 'string' ? extracted.period_start : null,
+                    period_end: typeof extracted?.period_end === 'string' ? extracted.period_end : null,
+                };
+            });
+            // 4. For each rule, resolve bounds and compute gaps.
+            for (const rule of coverage_rules) {
+                const period_start_chain = rule.coverage.period_start ?? [];
+                const period_end_chain = rule.coverage.period_end ?? [];
+                const expected_start = resolve_variable_chain(period_start_chain, ctx);
+                const expected_end = resolve_variable_chain(period_end_chain, ctx);
+                if (!expected_start || !expected_end) {
+                    out.push({
+                        group_key,
+                        document_type,
+                        rule_id: rule.rule_id,
+                        rule_name: rule.name ?? rule.rule_id,
+                        custom_issue_description: rule.custom_issue_description,
+                        gaps: [],
+                        skipped: true,
+                    });
+                    continue;
+                }
+                const gaps = compute_coverage_gaps({
+                    files: file_periods,
+                    expected_start,
+                    expected_end,
+                    cadence: rule.coverage.cadence,
+                    alignment: rule.coverage.alignment,
+                });
+                out.push({
+                    group_key,
+                    document_type,
+                    rule_id: rule.rule_id,
+                    rule_name: rule.name ?? rule.rule_id,
+                    custom_issue_description: rule.custom_issue_description,
+                    gaps,
+                });
+            }
+        }
+        return out;
+    }, [classification_results, fetch_rules, validation_results_map]);
+    /** Run validation rules against text content (no classification — uses 'general' rules) */
+    const process_text = useCallback(async (message_id, text) => {
+        let rule_results = [];
+        let doc_check = null;
+        try {
+            update_status(message_id, 'validating');
+            // Fetch 'general' validation rules (text doesn't have a document_type)
+            const rules = await fetch_rules('general', 'immediate');
+            if (rules.length > 0 && validation_api_url) {
+                // Build rule executions
+                const rule_executions = rules.map((r) => ({
+                    rule_id: r.rule_id || r.id,
+                    name: r.name,
+                    prompt: r.prompt,
+                    target_field_id: r.target_field_id || '__text',
+                    target_label: r.target_label || r.name,
+                    clarification_type: r.clarification_type || 'none',
+                    check_type: r.check_type,
+                }));
+                const res = await fetch(validation_api_url, {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        text_content: text,
+                        file_name: 'text input',
+                        mime_type: 'text/plain',
+                        rules: rule_executions,
+                        mode: 'single',
+                    }),
+                });
+                const data = await res.json();
+                rule_results = data.rule_results || [];
+                // Enrich with rule names
+                const rule_name_map = new Map();
+                for (const r of rules) {
+                    const name = r.name || 'Validation Rule';
+                    if (r.rule_id)
+                        rule_name_map.set(r.rule_id, name);
+                    if (r.id)
+                        rule_name_map.set(r.id, name);
+                }
+                for (const r of rule_results) {
+                    if (!r.rule_name || /^[0-9a-f]{8}-/.test(r.rule_name)) {
+                        r.rule_name = rule_name_map.get(r.rule_id) || r.rule_id;
+                    }
+                }
+                set_validation_results(prev => {
+                    const next = new Map(prev);
+                    next.set(message_id, rule_results);
+                    return next;
+                });
+            }
+            // Build doc_check for client-visible issues
+            const failed_rules = rule_results.filter(r => r.issues.length > 0);
+            if (failed_rules.length > 0) {
+                doc_check = {
+                    content_ref: message_id,
+                    source_type: 'text',
+                    status: 'issues',
+                    summary: `${failed_rules.length} issue${failed_rules.length !== 1 ? 's' : ''} found in your response`,
+                    issues: failed_rules.map(r => ({
+                        description: r.issues[0]?.issue_description || r.summary || r.rule_name || 'Validation issue',
+                        text_snippet: undefined,
+                    })),
+                };
+            }
+            update_status(message_id, 'done');
+        }
+        catch (err) {
+            console.error('[file-pipeline] Error processing text:', err);
+            update_status(message_id, 'error');
+        }
+        return { validation_results: rule_results, doc_check };
+    }, [fetch_rules, validation_api_url, update_status]);
+    /**
+     * Gate that decides whether a piece of client text is worth running
+     * validation on. Two stages:
+     *   1. Cheap heuristic — short strings or strings with no digits/dates/
+     *      currency tokens fail immediately (no LLM call).
+     *   2. LLM gate — only invoked when heuristic passes AND a gate URL is
+     *      configured. If no URL, we trust the heuristic alone.
+     * Fails closed: any network error returns has_content=false so nothing
+     * AI-flavoured leaks to the client.
+     */
+    const gate_text_content = useCallback(async (text) => {
+        const trimmed = text.trim();
+        // Heuristic: too short to be a document-style statement
+        if (trimmed.length < 80) {
+            return { has_content: false, reason: 'below length threshold' };
+        }
+        // Heuristic: no numeric signal (amounts, dates, IDs) usually means prose
+        const has_signal = /\d/.test(trimmed);
+        if (!has_signal) {
+            return { has_content: false, reason: 'no numeric signal' };
+        }
+        // No gate endpoint configured — trust the heuristic
+        if (!content_gate_api_url) {
+            return { has_content: true, reason: 'heuristic passed; no LLM gate configured' };
+        }
+        try {
+            const res = await fetch(content_gate_api_url, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({ text: trimmed }),
+            });
+            const data = await res.json();
+            if (!data?.success) {
+                return { has_content: false, reason: data?.error || 'gate failed; defaulting to false' };
+            }
+            return {
+                has_content: data.has_content === true,
+                reason: typeof data.reason === 'string' ? data.reason : '',
+            };
+        }
+        catch (err) {
+            console.error('[file-pipeline] Content gate error:', err);
+            return { has_content: false, reason: 'gate network error; defaulting to false' };
+        }
+    }, [content_gate_api_url]);
+    /**
+     * Extract structured fields from a response file via response_extraction_api_url.
+     * The route is expected to return { success, data: { total?, date?, vendor? } }.
+     */
+    const extract_response_fields = useCallback(async (file_id, file_b64, file_name, mime_type) => {
+        if (!response_extraction_api_url)
+            return null;
+        try {
+            const res = await fetch(response_extraction_api_url, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    file_id,
+                    file_name,
+                    mime_type,
+                    ...(file_b64 ? { file_b64 } : {}),
+                }),
+            });
+            const data = await res.json();
+            if (!res.ok || data?.success === false) {
+                console.warn('[file-pipeline] Response extraction returned non-success', data);
+                return null;
+            }
+            const raw = data.data ?? {};
+            const total_value = raw.total ?? raw.amount ?? raw.invoice_total;
+            const date_value = raw.date ?? raw.invoice_date ?? raw.document_date;
+            const vendor_value = raw.vendor ?? raw.supplier ?? raw.payee;
+            const fields = {
+                ...(total_value !== undefined && total_value !== null && total_value !== ''
+                    ? { total: typeof total_value === 'number' ? total_value : parseFloat(String(total_value).replace(/[^\d.\-]/g, '')) }
+                    : {}),
+                ...(date_value ? { date: String(date_value) } : {}),
+                ...(vendor_value ? { vendor: String(vendor_value) } : {}),
+                raw,
+            };
+            // Drop NaN totals
+            if (fields.total !== undefined && Number.isNaN(fields.total))
+                delete fields.total;
+            // No useful fields → null so caller treats as "no extraction available"
+            if (fields.total === undefined && !fields.date && !fields.vendor)
+                return null;
+            return fields;
+        }
+        catch (err) {
+            console.warn('[file-pipeline] Response extraction failed', err);
+            return null;
+        }
+    }, [response_extraction_api_url]);
+    return {
+        process_file,
+        process_text,
+        gate_text_content,
+        run_backoffice_validation,
+        run_periodic_coverage_pass,
+        extract_response_fields,
+        file_statuses,
+        classification_results,
+        validation_results: validation_results_map,
+    };
+}
+//# sourceMappingURL=use_file_pipeline.js.map