@realtimex/folio 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +20 -0
- package/README.md +63 -0
- package/api/server.ts +130 -0
- package/api/src/config/index.ts +96 -0
- package/api/src/middleware/auth.ts +128 -0
- package/api/src/middleware/errorHandler.ts +88 -0
- package/api/src/middleware/index.ts +4 -0
- package/api/src/middleware/rateLimit.ts +71 -0
- package/api/src/middleware/validation.ts +58 -0
- package/api/src/routes/accounts.ts +142 -0
- package/api/src/routes/baseline-config.ts +124 -0
- package/api/src/routes/chat.ts +154 -0
- package/api/src/routes/health.ts +61 -0
- package/api/src/routes/index.ts +35 -0
- package/api/src/routes/ingestions.ts +275 -0
- package/api/src/routes/migrate.ts +112 -0
- package/api/src/routes/policies.ts +121 -0
- package/api/src/routes/processing.ts +90 -0
- package/api/src/routes/rules.ts +11 -0
- package/api/src/routes/sdk.ts +100 -0
- package/api/src/routes/settings.ts +80 -0
- package/api/src/routes/setup.ts +389 -0
- package/api/src/routes/stats.ts +81 -0
- package/api/src/routes/tts.ts +190 -0
- package/api/src/services/BaselineConfigService.ts +208 -0
- package/api/src/services/ChatService.ts +204 -0
- package/api/src/services/GoogleDriveService.ts +331 -0
- package/api/src/services/GoogleSheetsService.ts +1107 -0
- package/api/src/services/IngestionService.ts +1187 -0
- package/api/src/services/ModelCapabilityService.ts +248 -0
- package/api/src/services/PolicyEngine.ts +1625 -0
- package/api/src/services/PolicyLearningService.ts +527 -0
- package/api/src/services/PolicyLoader.ts +249 -0
- package/api/src/services/RAGService.ts +391 -0
- package/api/src/services/SDKService.ts +249 -0
- package/api/src/services/supabase.ts +113 -0
- package/api/src/utils/Actuator.ts +284 -0
- package/api/src/utils/actions/ActionHandler.ts +34 -0
- package/api/src/utils/actions/AppendToGSheetAction.ts +260 -0
- package/api/src/utils/actions/AutoRenameAction.ts +58 -0
- package/api/src/utils/actions/CopyAction.ts +120 -0
- package/api/src/utils/actions/CopyToGDriveAction.ts +64 -0
- package/api/src/utils/actions/LogCsvAction.ts +48 -0
- package/api/src/utils/actions/NotifyAction.ts +39 -0
- package/api/src/utils/actions/RenameAction.ts +57 -0
- package/api/src/utils/actions/WebhookAction.ts +58 -0
- package/api/src/utils/actions/utils.ts +293 -0
- package/api/src/utils/llmResponse.ts +61 -0
- package/api/src/utils/logger.ts +67 -0
- package/bin/folio-deploy.js +12 -0
- package/bin/folio-setup.js +45 -0
- package/bin/folio.js +65 -0
- package/dist/api/server.js +106 -0
- package/dist/api/src/config/index.js +81 -0
- package/dist/api/src/middleware/auth.js +93 -0
- package/dist/api/src/middleware/errorHandler.js +73 -0
- package/dist/api/src/middleware/index.js +4 -0
- package/dist/api/src/middleware/rateLimit.js +43 -0
- package/dist/api/src/middleware/validation.js +54 -0
- package/dist/api/src/routes/accounts.js +110 -0
- package/dist/api/src/routes/baseline-config.js +91 -0
- package/dist/api/src/routes/chat.js +114 -0
- package/dist/api/src/routes/health.js +52 -0
- package/dist/api/src/routes/index.js +31 -0
- package/dist/api/src/routes/ingestions.js +207 -0
- package/dist/api/src/routes/migrate.js +91 -0
- package/dist/api/src/routes/policies.js +86 -0
- package/dist/api/src/routes/processing.js +75 -0
- package/dist/api/src/routes/rules.js +8 -0
- package/dist/api/src/routes/sdk.js +80 -0
- package/dist/api/src/routes/settings.js +68 -0
- package/dist/api/src/routes/setup.js +315 -0
- package/dist/api/src/routes/stats.js +62 -0
- package/dist/api/src/routes/tts.js +178 -0
- package/dist/api/src/services/BaselineConfigService.js +168 -0
- package/dist/api/src/services/ChatService.js +166 -0
- package/dist/api/src/services/GoogleDriveService.js +280 -0
- package/dist/api/src/services/GoogleSheetsService.js +795 -0
- package/dist/api/src/services/IngestionService.js +990 -0
- package/dist/api/src/services/ModelCapabilityService.js +179 -0
- package/dist/api/src/services/PolicyEngine.js +1353 -0
- package/dist/api/src/services/PolicyLearningService.js +397 -0
- package/dist/api/src/services/PolicyLoader.js +159 -0
- package/dist/api/src/services/RAGService.js +295 -0
- package/dist/api/src/services/SDKService.js +212 -0
- package/dist/api/src/services/supabase.js +72 -0
- package/dist/api/src/utils/Actuator.js +225 -0
- package/dist/api/src/utils/actions/ActionHandler.js +1 -0
- package/dist/api/src/utils/actions/AppendToGSheetAction.js +191 -0
- package/dist/api/src/utils/actions/AutoRenameAction.js +49 -0
- package/dist/api/src/utils/actions/CopyAction.js +112 -0
- package/dist/api/src/utils/actions/CopyToGDriveAction.js +55 -0
- package/dist/api/src/utils/actions/LogCsvAction.js +42 -0
- package/dist/api/src/utils/actions/NotifyAction.js +32 -0
- package/dist/api/src/utils/actions/RenameAction.js +51 -0
- package/dist/api/src/utils/actions/WebhookAction.js +51 -0
- package/dist/api/src/utils/actions/utils.js +237 -0
- package/dist/api/src/utils/llmResponse.js +63 -0
- package/dist/api/src/utils/logger.js +51 -0
- package/dist/assets/index-DzN8-j-e.css +1 -0
- package/dist/assets/index-Uy-ai3Dh.js +113 -0
- package/dist/favicon.svg +31 -0
- package/dist/folio-logo.svg +46 -0
- package/dist/index.html +14 -0
- package/docs-dev/FPE-spec.md +196 -0
- package/docs-dev/folio-prd.md +47 -0
- package/docs-dev/foundation-checklist.md +30 -0
- package/docs-dev/hybrid-routing-architecture.md +205 -0
- package/docs-dev/ingestion-engine.md +69 -0
- package/docs-dev/port-from-email-automator.md +32 -0
- package/docs-dev/tech-spec.md +98 -0
- package/index.html +13 -0
- package/package.json +101 -0
- package/public/favicon.svg +31 -0
- package/public/folio-logo.svg +46 -0
- package/scripts/dev-task.mjs +51 -0
- package/scripts/get-latest-migration-timestamp.mjs +34 -0
- package/scripts/migrate.sh +91 -0
- package/supabase/.temp/cli-latest +1 -0
- package/supabase/.temp/gotrue-version +1 -0
- package/supabase/.temp/pooler-url +1 -0
- package/supabase/.temp/postgres-version +1 -0
- package/supabase/.temp/project-ref +1 -0
- package/supabase/.temp/rest-version +1 -0
- package/supabase/.temp/storage-migration +1 -0
- package/supabase/.temp/storage-version +1 -0
- package/supabase/config.toml +64 -0
- package/supabase/functions/_shared/auth.ts +35 -0
- package/supabase/functions/_shared/cors.ts +12 -0
- package/supabase/functions/_shared/supabaseAdmin.ts +17 -0
- package/supabase/functions/api-v1-settings/index.ts +66 -0
- package/supabase/functions/setup/index.ts +91 -0
- package/supabase/migrations/20260223000000_initial_foundation.sql +136 -0
- package/supabase/migrations/20260223000001_add_migration_rpc.sql +10 -0
- package/supabase/migrations/20260224000002_add_init_state_view.sql +20 -0
- package/supabase/migrations/20260224000003_port_user_creation_parity.sql +139 -0
- package/supabase/migrations/20260224000004_add_avatars_storage.sql +26 -0
- package/supabase/migrations/20260224000005_add_tts_and_embed_settings.sql +24 -0
- package/supabase/migrations/20260224000006_add_policies_table.sql +48 -0
- package/supabase/migrations/20260224000007_fix_migration_rpc.sql +9 -0
- package/supabase/migrations/20260224000008_add_ingestions_table.sql +42 -0
- package/supabase/migrations/20260225000000_setup_compatible_mode.sql +119 -0
- package/supabase/migrations/20260225000001_restore_ingestions.sql +49 -0
- package/supabase/migrations/20260225000002_add_ingestion_trace.sql +2 -0
- package/supabase/migrations/20260225000003_add_baseline_configs.sql +35 -0
- package/supabase/migrations/20260226000000_add_processing_events.sql +26 -0
- package/supabase/migrations/20260226000001_add_ingestion_file_hash.sql +10 -0
- package/supabase/migrations/20260226000002_add_dynamic_rag.sql +150 -0
- package/supabase/migrations/20260226000003_add_ingestion_summary.sql +4 -0
- package/supabase/migrations/20260226000004_add_ingestion_tags.sql +7 -0
- package/supabase/migrations/20260226000005_add_chat_tables.sql +60 -0
- package/supabase/migrations/20260227000000_harden_chat_messages_rls.sql +25 -0
- package/supabase/migrations/20260228000000_add_vision_model_capabilities.sql +8 -0
- package/supabase/migrations/20260228000001_add_policy_match_feedback.sql +51 -0
- package/supabase/migrations/29991231235959_test_migration.sql +0 -0
- package/supabase/templates/confirmation.html +76 -0
- package/supabase/templates/email-change.html +76 -0
- package/supabase/templates/invite.html +72 -0
- package/supabase/templates/magic-link.html +68 -0
- package/supabase/templates/recovery.html +82 -0
- package/tsconfig.api.json +16 -0
- package/tsconfig.json +25 -0
- package/vite.config.ts +146 -0
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
import type { SupabaseClient } from "@supabase/supabase-js";
|
|
2
|
+
import { createLogger } from "../utils/logger.js";
|
|
3
|
+
|
|
4
|
+
const logger = createLogger("PolicyLearningService");
|
|
5
|
+
|
|
6
|
+
type PolicyLearningFeatures = {
|
|
7
|
+
tokens: string[];
|
|
8
|
+
extension?: string;
|
|
9
|
+
mime_type?: string;
|
|
10
|
+
document_type?: string;
|
|
11
|
+
issuer?: string;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
type PolicyLearningRow = {
|
|
15
|
+
policy_id: string;
|
|
16
|
+
policy_name?: string | null;
|
|
17
|
+
features?: unknown;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
type IngestionLike = {
|
|
21
|
+
id: string;
|
|
22
|
+
filename?: string | null;
|
|
23
|
+
mime_type?: string | null;
|
|
24
|
+
tags?: unknown;
|
|
25
|
+
extracted?: unknown;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
type CandidatePolicy = {
|
|
29
|
+
policyId: string;
|
|
30
|
+
score: number;
|
|
31
|
+
support: number;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
type CandidatePolicyScore = CandidatePolicy & {
|
|
35
|
+
requiredScore: number;
|
|
36
|
+
accepted: boolean;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
export type PolicyLearningDecisionReason =
|
|
40
|
+
| "accepted"
|
|
41
|
+
| "no_policy_ids"
|
|
42
|
+
| "no_document_features"
|
|
43
|
+
| "no_feedback_samples"
|
|
44
|
+
| "no_valid_samples"
|
|
45
|
+
| "score_below_threshold"
|
|
46
|
+
| "read_error";
|
|
47
|
+
|
|
48
|
+
export type PolicyLearningDiagnostics = {
|
|
49
|
+
reason: PolicyLearningDecisionReason;
|
|
50
|
+
evaluatedPolicies: number;
|
|
51
|
+
evaluatedSamples: number;
|
|
52
|
+
bestCandidate?: CandidatePolicyScore;
|
|
53
|
+
topCandidates: CandidatePolicyScore[];
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
export type LearnedCandidateResolution = {
|
|
57
|
+
candidate: CandidatePolicy | null;
|
|
58
|
+
diagnostics: PolicyLearningDiagnostics;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
export type PolicyLearningStats = Record<string, { samples: number; lastSampleAt: string | null }>;
|
|
62
|
+
|
|
63
|
+
function normalizeText(value: unknown): string {
|
|
64
|
+
if (value == null) return "";
|
|
65
|
+
return String(value).toLowerCase().trim();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function tokenize(value: unknown): string[] {
|
|
69
|
+
const normalized = normalizeText(value)
|
|
70
|
+
.replace(/[^a-z0-9]+/g, " ")
|
|
71
|
+
.trim();
|
|
72
|
+
if (!normalized) return [];
|
|
73
|
+
return normalized
|
|
74
|
+
.split(/\s+/)
|
|
75
|
+
.map((token) => token.trim())
|
|
76
|
+
.filter((token) => token.length >= 2);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function dedupeTokens(tokens: string[], limit = 100): string[] {
|
|
80
|
+
const seen = new Set<string>();
|
|
81
|
+
const out: string[] = [];
|
|
82
|
+
for (const token of tokens) {
|
|
83
|
+
if (seen.has(token)) continue;
|
|
84
|
+
seen.add(token);
|
|
85
|
+
out.push(token);
|
|
86
|
+
if (out.length >= limit) break;
|
|
87
|
+
}
|
|
88
|
+
return out;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function toRecord(value: unknown): Record<string, unknown> | null {
|
|
92
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return null;
|
|
93
|
+
return value as Record<string, unknown>;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function extractExtension(filename: string | null | undefined): string | undefined {
|
|
97
|
+
const name = normalizeText(filename);
|
|
98
|
+
if (!name) return undefined;
|
|
99
|
+
const dot = name.lastIndexOf(".");
|
|
100
|
+
if (dot < 0 || dot === name.length - 1) return undefined;
|
|
101
|
+
const ext = name.slice(dot + 1).replace(/[^a-z0-9]/g, "");
|
|
102
|
+
return ext || undefined;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function flattenValues(value: unknown, depth = 0): string[] {
|
|
106
|
+
if (value == null || depth > 2) return [];
|
|
107
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
108
|
+
return [String(value)];
|
|
109
|
+
}
|
|
110
|
+
if (Array.isArray(value)) {
|
|
111
|
+
return value.flatMap((item) => flattenValues(item, depth + 1));
|
|
112
|
+
}
|
|
113
|
+
const record = toRecord(value);
|
|
114
|
+
if (!record) return [];
|
|
115
|
+
return Object.values(record).flatMap((item) => flattenValues(item, depth + 1));
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function normalizeFeatures(value: unknown): PolicyLearningFeatures | null {
|
|
119
|
+
const record = toRecord(value);
|
|
120
|
+
if (!record) return null;
|
|
121
|
+
|
|
122
|
+
const rawTokens = Array.isArray(record.tokens) ? record.tokens.map((t) => normalizeText(t)).filter(Boolean) : [];
|
|
123
|
+
const tokens = dedupeTokens(rawTokens, 120);
|
|
124
|
+
if (tokens.length === 0) return null;
|
|
125
|
+
|
|
126
|
+
const extension = normalizeText(record.extension) || undefined;
|
|
127
|
+
const mime_type = normalizeText(record.mime_type) || undefined;
|
|
128
|
+
const document_type = normalizeText(record.document_type) || undefined;
|
|
129
|
+
const issuer = normalizeText(record.issuer) || undefined;
|
|
130
|
+
|
|
131
|
+
return { tokens, extension, mime_type, document_type, issuer };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function jaccard(tokensA: string[], tokensB: string[]): number {
|
|
135
|
+
if (tokensA.length === 0 || tokensB.length === 0) return 0;
|
|
136
|
+
const setA = new Set(tokensA);
|
|
137
|
+
const setB = new Set(tokensB);
|
|
138
|
+
let intersection = 0;
|
|
139
|
+
for (const token of setA) {
|
|
140
|
+
if (setB.has(token)) intersection += 1;
|
|
141
|
+
}
|
|
142
|
+
const union = setA.size + setB.size - intersection;
|
|
143
|
+
if (union === 0) return 0;
|
|
144
|
+
return intersection / union;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function softTextMatch(a?: string, b?: string): boolean {
|
|
148
|
+
if (!a || !b) return false;
|
|
149
|
+
if (a === b) return true;
|
|
150
|
+
return a.includes(b) || b.includes(a);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function clamp01(value: number): number {
|
|
154
|
+
if (value < 0) return 0;
|
|
155
|
+
if (value > 1) return 1;
|
|
156
|
+
return value;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function requiredScoreForSupport(support: number): number {
|
|
160
|
+
return support >= 2 ? 0.72 : 0.82;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function scorePair(doc: PolicyLearningFeatures, sample: PolicyLearningFeatures): number {
|
|
164
|
+
let score = jaccard(doc.tokens, sample.tokens) * 0.72;
|
|
165
|
+
|
|
166
|
+
if (doc.extension && sample.extension) {
|
|
167
|
+
score += doc.extension === sample.extension ? 0.16 : -0.04;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (doc.mime_type && sample.mime_type) {
|
|
171
|
+
score += doc.mime_type === sample.mime_type ? 0.08 : -0.02;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (doc.document_type && sample.document_type) {
|
|
175
|
+
score += softTextMatch(doc.document_type, sample.document_type) ? 0.17 : -0.03;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (doc.issuer && sample.issuer) {
|
|
179
|
+
score += softTextMatch(doc.issuer, sample.issuer) ? 0.14 : -0.02;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return clamp01(score);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function buildFromDocInput(opts: {
|
|
186
|
+
filePath: string;
|
|
187
|
+
baselineEntities: Record<string, unknown>;
|
|
188
|
+
documentText?: string;
|
|
189
|
+
}): PolicyLearningFeatures {
|
|
190
|
+
const extension = extractExtension(opts.filePath);
|
|
191
|
+
const baseline = opts.baselineEntities ?? {};
|
|
192
|
+
|
|
193
|
+
const docType = normalizeText(
|
|
194
|
+
baseline.document_type ??
|
|
195
|
+
baseline.doc_type ??
|
|
196
|
+
baseline.type ??
|
|
197
|
+
baseline.category
|
|
198
|
+
) || undefined;
|
|
199
|
+
|
|
200
|
+
const issuer = normalizeText(
|
|
201
|
+
baseline.issuer ??
|
|
202
|
+
baseline.vendor ??
|
|
203
|
+
baseline.merchant ??
|
|
204
|
+
baseline.store_name ??
|
|
205
|
+
baseline.sender
|
|
206
|
+
) || undefined;
|
|
207
|
+
|
|
208
|
+
const extractedTokens = flattenValues(baseline).flatMap((value) => tokenize(value));
|
|
209
|
+
const fileTokens = tokenize(opts.filePath.split("/").pop() ?? opts.filePath);
|
|
210
|
+
const textTokens = tokenize((opts.documentText ?? "").slice(0, 1200));
|
|
211
|
+
|
|
212
|
+
const tokens = dedupeTokens(
|
|
213
|
+
[
|
|
214
|
+
...fileTokens,
|
|
215
|
+
...extractedTokens,
|
|
216
|
+
...textTokens,
|
|
217
|
+
...(docType ? tokenize(docType) : []),
|
|
218
|
+
...(issuer ? tokenize(issuer) : []),
|
|
219
|
+
],
|
|
220
|
+
120
|
|
221
|
+
);
|
|
222
|
+
|
|
223
|
+
return {
|
|
224
|
+
tokens,
|
|
225
|
+
extension,
|
|
226
|
+
document_type: docType,
|
|
227
|
+
issuer,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function buildFromIngestionRow(ingestion: IngestionLike): PolicyLearningFeatures {
|
|
232
|
+
const extracted = toRecord(ingestion.extracted) ?? {};
|
|
233
|
+
const tags = Array.isArray(ingestion.tags) ? ingestion.tags.map((t) => String(t)) : [];
|
|
234
|
+
const extension = extractExtension(ingestion.filename);
|
|
235
|
+
const mime_type = normalizeText(ingestion.mime_type) || undefined;
|
|
236
|
+
|
|
237
|
+
const docType = normalizeText(
|
|
238
|
+
extracted.document_type ??
|
|
239
|
+
extracted.doc_type ??
|
|
240
|
+
extracted.type ??
|
|
241
|
+
extracted.category
|
|
242
|
+
) || undefined;
|
|
243
|
+
|
|
244
|
+
const issuer = normalizeText(
|
|
245
|
+
extracted.issuer ??
|
|
246
|
+
extracted.vendor ??
|
|
247
|
+
extracted.merchant ??
|
|
248
|
+
extracted.store_name ??
|
|
249
|
+
extracted.sender
|
|
250
|
+
) || undefined;
|
|
251
|
+
|
|
252
|
+
const extractedWithoutEnrichment = { ...extracted };
|
|
253
|
+
delete extractedWithoutEnrichment["_enrichment"];
|
|
254
|
+
|
|
255
|
+
const tokens = dedupeTokens(
|
|
256
|
+
[
|
|
257
|
+
...tokenize(ingestion.filename),
|
|
258
|
+
...tags.flatMap((tag) => tokenize(tag)),
|
|
259
|
+
...flattenValues(extractedWithoutEnrichment).flatMap((value) => tokenize(value)),
|
|
260
|
+
...(docType ? tokenize(docType) : []),
|
|
261
|
+
...(issuer ? tokenize(issuer) : []),
|
|
262
|
+
],
|
|
263
|
+
120
|
|
264
|
+
);
|
|
265
|
+
|
|
266
|
+
return {
|
|
267
|
+
tokens,
|
|
268
|
+
extension,
|
|
269
|
+
mime_type,
|
|
270
|
+
document_type: docType,
|
|
271
|
+
issuer,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
export class PolicyLearningService {
|
|
276
|
+
static async recordManualMatch(opts: {
|
|
277
|
+
supabase: SupabaseClient;
|
|
278
|
+
userId: string;
|
|
279
|
+
ingestion: IngestionLike;
|
|
280
|
+
policyId: string;
|
|
281
|
+
policyName?: string;
|
|
282
|
+
}): Promise<void> {
|
|
283
|
+
const { supabase, userId, ingestion, policyId, policyName } = opts;
|
|
284
|
+
const features = buildFromIngestionRow(ingestion);
|
|
285
|
+
|
|
286
|
+
if (features.tokens.length === 0) {
|
|
287
|
+
logger.warn("Skipping policy learning feedback: no usable tokens", {
|
|
288
|
+
ingestionId: ingestion.id,
|
|
289
|
+
policyId,
|
|
290
|
+
});
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
const row = {
|
|
295
|
+
user_id: userId,
|
|
296
|
+
ingestion_id: ingestion.id,
|
|
297
|
+
policy_id: policyId,
|
|
298
|
+
policy_name: policyName ?? null,
|
|
299
|
+
feedback_type: "manual_match",
|
|
300
|
+
features,
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
const { error } = await supabase
|
|
304
|
+
.from("policy_match_feedback")
|
|
305
|
+
.upsert(row, { onConflict: "user_id,ingestion_id,policy_id" });
|
|
306
|
+
|
|
307
|
+
if (error) {
|
|
308
|
+
logger.error("Failed to save policy match feedback", {
|
|
309
|
+
ingestionId: ingestion.id,
|
|
310
|
+
policyId,
|
|
311
|
+
error,
|
|
312
|
+
});
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
logger.info("Saved policy learning feedback", {
|
|
317
|
+
ingestionId: ingestion.id,
|
|
318
|
+
policyId,
|
|
319
|
+
tokens: features.tokens.length,
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
static async getPolicyLearningStats(opts: {
|
|
324
|
+
supabase: SupabaseClient;
|
|
325
|
+
userId: string;
|
|
326
|
+
policyIds?: string[];
|
|
327
|
+
}): Promise<PolicyLearningStats> {
|
|
328
|
+
const { supabase, userId } = opts;
|
|
329
|
+
const normalizedPolicyIds = (opts.policyIds ?? []).map((id) => id.trim()).filter(Boolean);
|
|
330
|
+
|
|
331
|
+
let query = supabase
|
|
332
|
+
.from("policy_match_feedback")
|
|
333
|
+
.select("policy_id,created_at")
|
|
334
|
+
.eq("user_id", userId)
|
|
335
|
+
.order("created_at", { ascending: false })
|
|
336
|
+
.limit(5000);
|
|
337
|
+
|
|
338
|
+
if (normalizedPolicyIds.length > 0) {
|
|
339
|
+
query = query.in("policy_id", normalizedPolicyIds);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const { data, error } = await query;
|
|
343
|
+
if (error) {
|
|
344
|
+
logger.warn("Failed to read policy learning stats", { userId, error });
|
|
345
|
+
return {};
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const stats: PolicyLearningStats = {};
|
|
349
|
+
for (const row of data ?? []) {
|
|
350
|
+
const policyId = typeof row.policy_id === "string" ? row.policy_id : "";
|
|
351
|
+
if (!policyId) continue;
|
|
352
|
+
const createdAt = typeof row.created_at === "string" ? row.created_at : null;
|
|
353
|
+
if (!stats[policyId]) {
|
|
354
|
+
stats[policyId] = { samples: 1, lastSampleAt: createdAt };
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
stats[policyId].samples += 1;
|
|
358
|
+
if (!stats[policyId].lastSampleAt && createdAt) {
|
|
359
|
+
stats[policyId].lastSampleAt = createdAt;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return stats;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
static async resolveLearnedCandidate(opts: {
|
|
367
|
+
supabase: SupabaseClient;
|
|
368
|
+
userId: string;
|
|
369
|
+
policyIds: string[];
|
|
370
|
+
filePath: string;
|
|
371
|
+
baselineEntities: Record<string, unknown>;
|
|
372
|
+
documentText?: string;
|
|
373
|
+
}): Promise<LearnedCandidateResolution> {
|
|
374
|
+
const { supabase, userId, policyIds, filePath, baselineEntities, documentText } = opts;
|
|
375
|
+
if (policyIds.length === 0) {
|
|
376
|
+
return {
|
|
377
|
+
candidate: null,
|
|
378
|
+
diagnostics: {
|
|
379
|
+
reason: "no_policy_ids",
|
|
380
|
+
evaluatedPolicies: 0,
|
|
381
|
+
evaluatedSamples: 0,
|
|
382
|
+
topCandidates: [],
|
|
383
|
+
},
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const docFeatures = buildFromDocInput({ filePath, baselineEntities, documentText });
|
|
388
|
+
if (docFeatures.tokens.length === 0) {
|
|
389
|
+
return {
|
|
390
|
+
candidate: null,
|
|
391
|
+
diagnostics: {
|
|
392
|
+
reason: "no_document_features",
|
|
393
|
+
evaluatedPolicies: policyIds.length,
|
|
394
|
+
evaluatedSamples: 0,
|
|
395
|
+
topCandidates: [],
|
|
396
|
+
},
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const { data, error } = await supabase
|
|
401
|
+
.from("policy_match_feedback")
|
|
402
|
+
.select("policy_id,policy_name,features")
|
|
403
|
+
.eq("user_id", userId)
|
|
404
|
+
.in("policy_id", policyIds)
|
|
405
|
+
.order("created_at", { ascending: false })
|
|
406
|
+
.limit(400);
|
|
407
|
+
|
|
408
|
+
if (error) {
|
|
409
|
+
logger.warn("Failed to read policy learning feedback", { userId, error });
|
|
410
|
+
return {
|
|
411
|
+
candidate: null,
|
|
412
|
+
diagnostics: {
|
|
413
|
+
reason: "read_error",
|
|
414
|
+
evaluatedPolicies: policyIds.length,
|
|
415
|
+
evaluatedSamples: 0,
|
|
416
|
+
topCandidates: [],
|
|
417
|
+
},
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const rows = (data ?? []) as PolicyLearningRow[];
|
|
422
|
+
if (rows.length === 0) {
|
|
423
|
+
return {
|
|
424
|
+
candidate: null,
|
|
425
|
+
diagnostics: {
|
|
426
|
+
reason: "no_feedback_samples",
|
|
427
|
+
evaluatedPolicies: policyIds.length,
|
|
428
|
+
evaluatedSamples: 0,
|
|
429
|
+
topCandidates: [],
|
|
430
|
+
},
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
const byPolicy = new Map<string, number[]>();
|
|
435
|
+
let validSamples = 0;
|
|
436
|
+
for (const row of rows) {
|
|
437
|
+
const sample = normalizeFeatures(row.features);
|
|
438
|
+
if (!sample) continue;
|
|
439
|
+
const score = scorePair(docFeatures, sample);
|
|
440
|
+
const existing = byPolicy.get(row.policy_id) ?? [];
|
|
441
|
+
existing.push(score);
|
|
442
|
+
byPolicy.set(row.policy_id, existing);
|
|
443
|
+
validSamples += 1;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
if (byPolicy.size === 0) {
|
|
447
|
+
return {
|
|
448
|
+
candidate: null,
|
|
449
|
+
diagnostics: {
|
|
450
|
+
reason: "no_valid_samples",
|
|
451
|
+
evaluatedPolicies: policyIds.length,
|
|
452
|
+
evaluatedSamples: validSamples,
|
|
453
|
+
topCandidates: [],
|
|
454
|
+
},
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const candidates: CandidatePolicyScore[] = [];
|
|
459
|
+
for (const [policyId, scores] of byPolicy.entries()) {
|
|
460
|
+
if (scores.length === 0) continue;
|
|
461
|
+
scores.sort((a, b) => b - a);
|
|
462
|
+
const topScores = scores.slice(0, 3);
|
|
463
|
+
const averageTop = topScores.reduce((sum, value) => sum + value, 0) / topScores.length;
|
|
464
|
+
const supportBoost = Math.min(0.08, (scores.length - 1) * 0.02);
|
|
465
|
+
const score = clamp01(averageTop + supportBoost);
|
|
466
|
+
const support = scores.length;
|
|
467
|
+
const requiredScore = requiredScoreForSupport(support);
|
|
468
|
+
|
|
469
|
+
candidates.push({
|
|
470
|
+
policyId,
|
|
471
|
+
score,
|
|
472
|
+
support,
|
|
473
|
+
requiredScore,
|
|
474
|
+
accepted: score >= requiredScore,
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
479
|
+
const best = candidates[0];
|
|
480
|
+
const topCandidates = candidates.slice(0, 3);
|
|
481
|
+
|
|
482
|
+
if (!best) {
|
|
483
|
+
return {
|
|
484
|
+
candidate: null,
|
|
485
|
+
diagnostics: {
|
|
486
|
+
reason: "no_valid_samples",
|
|
487
|
+
evaluatedPolicies: byPolicy.size,
|
|
488
|
+
evaluatedSamples: validSamples,
|
|
489
|
+
topCandidates: [],
|
|
490
|
+
},
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
if (!best.accepted) {
|
|
495
|
+
return {
|
|
496
|
+
candidate: null,
|
|
497
|
+
diagnostics: {
|
|
498
|
+
reason: "score_below_threshold",
|
|
499
|
+
evaluatedPolicies: byPolicy.size,
|
|
500
|
+
evaluatedSamples: validSamples,
|
|
501
|
+
bestCandidate: best,
|
|
502
|
+
topCandidates,
|
|
503
|
+
},
|
|
504
|
+
};
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
logger.info("Resolved learned policy candidate", {
|
|
508
|
+
policyId: best.policyId,
|
|
509
|
+
score: best.score,
|
|
510
|
+
support: best.support,
|
|
511
|
+
});
|
|
512
|
+
return {
|
|
513
|
+
candidate: {
|
|
514
|
+
policyId: best.policyId,
|
|
515
|
+
score: best.score,
|
|
516
|
+
support: best.support,
|
|
517
|
+
},
|
|
518
|
+
diagnostics: {
|
|
519
|
+
reason: "accepted",
|
|
520
|
+
evaluatedPolicies: byPolicy.size,
|
|
521
|
+
evaluatedSamples: validSamples,
|
|
522
|
+
bestCandidate: best,
|
|
523
|
+
topCandidates,
|
|
524
|
+
},
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
}
|