@steno-ai/engine 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/storage.d.ts +1 -0
- package/dist/adapters/storage.d.ts.map +1 -1
- package/dist/extraction/llm-extractor.d.ts.map +1 -1
- package/dist/extraction/llm-extractor.js +5 -3
- package/dist/extraction/llm-extractor.js.map +1 -1
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +5 -1
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/prompts.d.ts +2 -2
- package/dist/extraction/prompts.d.ts.map +1 -1
- package/dist/extraction/prompts.js +12 -3
- package/dist/extraction/prompts.js.map +1 -1
- package/package.json +6 -2
- package/src/adapters/cache.js +2 -0
- package/src/adapters/embedding.js +2 -0
- package/src/adapters/llm.js +2 -0
- package/src/adapters/perplexity-embedding.js +78 -0
- package/src/adapters/storage.js +2 -0
- package/src/adapters/storage.ts +1 -0
- package/src/config.d.ts +211 -1
- package/src/config.d.ts.map +1 -1
- package/src/config.js +92 -0
- package/src/config.js.map +1 -1
- package/src/extraction/contradiction.js +23 -0
- package/src/extraction/dedup.js +93 -0
- package/src/extraction/dedup.js.map +1 -1
- package/src/extraction/entity-extractor.d.ts.map +1 -1
- package/src/extraction/entity-extractor.js +145 -0
- package/src/extraction/entity-extractor.js.map +1 -1
- package/src/extraction/hasher.js +8 -0
- package/src/extraction/heuristic.js +282 -0
- package/src/extraction/llm-extractor.d.ts +3 -1
- package/src/extraction/llm-extractor.d.ts.map +1 -1
- package/src/extraction/llm-extractor.js +238 -0
- package/src/extraction/llm-extractor.js.map +1 -1
- package/src/extraction/llm-extractor.ts +7 -5
- package/src/extraction/pipeline.d.ts +3 -0
- package/src/extraction/pipeline.d.ts.map +1 -1
- package/src/extraction/pipeline.js +398 -0
- package/src/extraction/pipeline.js.map +1 -1
- package/src/extraction/pipeline.ts +6 -1
- package/src/extraction/prompts.d.ts +28 -0
- package/src/extraction/prompts.d.ts.map +1 -0
- package/src/extraction/prompts.js +196 -0
- package/src/extraction/prompts.js.map +1 -1
- package/src/extraction/prompts.ts +12 -3
- package/src/extraction/sliding-window.js +84 -0
- package/src/extraction/sliding-window.js.map +1 -1
- package/src/extraction/types.d.ts +12 -0
- package/src/extraction/types.d.ts.map +1 -1
- package/src/extraction/types.js +2 -0
- package/src/feedback/tracker.js +90 -0
- package/src/models/api-key.d.ts +2 -2
- package/src/models/api-key.js +21 -0
- package/src/models/edge.d.ts +6 -6
- package/src/models/edge.js +29 -0
- package/src/models/entity.d.ts +2 -2
- package/src/models/entity.js +22 -0
- package/src/models/extraction.d.ts +6 -6
- package/src/models/extraction.js +40 -0
- package/src/models/fact-entity.js +14 -0
- package/src/models/fact.d.ts +191 -0
- package/src/models/fact.d.ts.map +1 -0
- package/src/models/fact.js +72 -0
- package/src/models/fact.js.map +1 -0
- package/src/models/index.js +13 -0
- package/src/models/memory-access.d.ts +4 -4
- package/src/models/memory-access.js +33 -0
- package/src/models/session.js +23 -0
- package/src/models/tenant.d.ts +248 -14
- package/src/models/tenant.d.ts.map +1 -1
- package/src/models/tenant.js +23 -0
- package/src/models/trigger.d.ts +5 -5
- package/src/models/trigger.js +41 -0
- package/src/models/usage-record.js +14 -0
- package/src/models/webhook.d.ts +1 -1
- package/src/models/webhook.js +25 -0
- package/src/retrieval/compound-search.d.ts.map +1 -1
- package/src/retrieval/compound-search.js +87 -0
- package/src/retrieval/compound-search.js.map +1 -1
- package/src/retrieval/contradiction-surfacer.js +64 -0
- package/src/retrieval/embedding-cache.js +56 -0
- package/src/retrieval/fusion.d.ts +1 -0
- package/src/retrieval/fusion.d.ts.map +1 -1
- package/src/retrieval/fusion.js +87 -0
- package/src/retrieval/fusion.js.map +1 -1
- package/src/retrieval/graph-traversal.d.ts +2 -1
- package/src/retrieval/graph-traversal.d.ts.map +1 -1
- package/src/retrieval/graph-traversal.js +208 -0
- package/src/retrieval/graph-traversal.js.map +1 -1
- package/src/retrieval/query-expansion.js +76 -0
- package/src/retrieval/reranker.js +47 -0
- package/src/retrieval/salience-scorer.js +41 -0
- package/src/retrieval/search.d.ts.map +1 -1
- package/src/retrieval/search.js +228 -0
- package/src/retrieval/search.js.map +1 -1
- package/src/retrieval/temporal-scorer.d.ts +18 -0
- package/src/retrieval/temporal-scorer.d.ts.map +1 -0
- package/src/retrieval/temporal-scorer.js +106 -0
- package/src/retrieval/temporal-scorer.js.map +1 -0
- package/src/retrieval/trigger-matcher.d.ts.map +1 -1
- package/src/retrieval/trigger-matcher.js +134 -0
- package/src/retrieval/trigger-matcher.js.map +1 -1
- package/src/retrieval/types.d.ts +4 -0
- package/src/retrieval/types.d.ts.map +1 -1
- package/src/retrieval/types.js +9 -0
- package/src/retrieval/types.js.map +1 -1
- package/src/retrieval/vector-search.d.ts.map +1 -1
- package/src/retrieval/vector-search.js +24 -0
- package/src/retrieval/vector-search.js.map +1 -1
- package/src/salience/decay.js +15 -0
- package/src/scratchpad/scratchpad.js +107 -0
- package/src/sessions/manager.d.ts +11 -0
- package/src/sessions/manager.d.ts.map +1 -0
- package/src/sessions/manager.js +63 -0
- package/src/sessions/manager.js.map +1 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"entity-extractor.js","sourceRoot":"","sources":["entity-extractor.ts"],"names":[],"mappings":"AAUA;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,OAAuB,EACvB,SAA2B,EAC3B,QAAgB,EAChB,QAA2B;IAE3B,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,2DAA2D;IAC3D,MAAM,cAAc,GAAG,IAAI,GAAG,EAA2B,CAAC;IAC1D,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;YAC9C,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,KAAK,MAAM,MAAM,IAAI,cAAc,CAAC,MAAM,EAAE,EAAE,CAAC;QAC7C,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,yBAAyB,CACtD,QAAQ,EACR,MAAM,CAAC,aAAa,EACpB,MAAM,CAAC,UAAU,CAClB,CAAC;QAEF,IAAI,QAAQ,EAAE,CAAC;YACb,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,aAAa,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,EAAE,CAAC;YAC/B,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAC/C,MAAM,OAAO,CAAC,YAAY,CAAC;gBACzB,GAAG,MAAM;gBACT,EAAE;gBACF,QAAQ;gBACR,SAAS,EAAE,GAAG;gBACd,cAAc,EAAE,SAAS,CAAC,KAAK;gBAC/B,YAAY,EAAE,SAAS,CAAC,UAAU;aACnC,CAAC,CAAC;YACH,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;YAC1C,eAAe,EAAE,CAAC;QACpB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,CAAC;AAC1C,CAAC;AAED;;;GAGG;AACH,+DAA+D;AAC/D,MAAM,iBAAiB,GAA2B;IAChD,YAAY,EAAE,OAAO;IACrB,gBAAgB,EAAE,YAAY;IAC9B,qBAAqB,EAAE,YAAY;IACnC,MAAM,EAAE,YAAY;IACpB,oBAAoB,EAAE,YAAY;IAClC,gBAAgB,EAAE,YAAY;IAC9B,aAAa,EAAE,YAAY;IAC3B,YAAY,EAAE,YAAY;IAC1B,UAAU,EAAE,YAAY;IACxB,SAAS,EAAE,UAAU;IACrB,WAAW,EAAE,UAAU;IACvB,WAAW,EAAE,UAAU;IACvB,UAAU,EAAE,UAAU;IACtB,UAAU,EAAE,YAAY;IACxB,YAAY,EAAE,WAAW;IACzB,eAAe,EAAE,OAAO;IACxB,WAAW,EAAE,OAAO;IACpB,aAAa,EAAE,SAAS;IACxB,YAAY,EAAE,SAAS;IACvB,KAAK,EAAE,SAAS;
|
|
1
|
+
{"version":3,"file":"entity-extractor.js","sourceRoot":"","sources":["entity-extractor.ts"],"names":[],"mappings":"AAUA;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,OAAuB,EACvB,SAA2B,EAC3B,QAAgB,EAChB,QAA2B;IAE3B,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,2DAA2D;IAC3D,MAAM,cAAc,GAAG,IAAI,GAAG,EAA2B,CAAC;IAC1D,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;YAC9C,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,KAAK,MAAM,MAAM,IAAI,cAAc,CAAC,MAAM,EAAE,EAAE,CAAC;QAC7C,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,yBAAyB,CACtD,QAAQ,EACR,MAAM,CAAC,aAAa,EACpB,MAAM,CAAC,UAAU,CAClB,CAAC;QAEF,IAAI,QAAQ,EAAE,CAAC;YACb,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,aAAa,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,EAAE,CAAC;YAC/B,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAC/C,MAAM,OAAO,CAAC,YAAY,CAAC;gBACzB,GAAG,MAAM;gBACT,EAAE;gBACF,QAAQ;gBACR,SAAS,EAAE,GAAG;gBACd,cAAc,EAAE,SAAS,CAAC,KAAK;gBAC/B,YAAY,EAAE,SAAS,CAAC,UAAU;aACnC,CAAC,CAAC;YACH,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;YAC1C,eAAe,EAAE,CAAC;QACpB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,CAAC;AAC1C,CAAC;AAED;;;GAGG;AACH,+DAA+D;AAC/D,MAAM,iBAAiB,GAA2B;IAChD,YAAY,EAAE,OAAO;IACrB,gBAAgB,EAAE,YAAY;IAC9B,qBAAqB,EAAE,YAAY;IACnC,MAAM,EAAE,YAAY;IACpB,oBAAoB,EAAE,YAAY;IAClC,gBAAgB,EAAE,YAAY;IAC9B,aAAa,EAAE,YAAY;IAC3B,YAAY,EAAE,YAAY;IAC1B,UAAU,EAAE,YAAY;IACxB,SAAS,EAAE,UAAU;IACrB,WAAW,EAAE,UAAU;IACvB,WAAW,EAAE,UAAU;IACvB,UAAU,EAAE,UAAU;IACtB,UAAU,EAAE,YAAY;IACxB,YAAY,EAAE,WAAW;IACzB,eAAe,EAAE,OAAO;IACxB,WAAW,EAAE,OAAO;IACpB,aAAa,EAAE,SAAS;IACxB,YAAY,EAAE,SAAS;IACvB,KAAK,EAAE,SAAS;IAChB,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,SAAS;IACrB,SAAS,EAAE,SAAS;IACpB,QAAQ,EAAE,SAAS;IACnB,WAAW,EAAE,SAAS;IACtB,OAAO,EAAE,SAAS;IAClB,UAAU,EAAE,SAAS;IACrB,OAAO,EAAE,SAAS;IAClB,MAAM,EAAE,SAAS;IACjB,OAAO,EAAE,SAAS;IAClB,QAAQ,EAAE,SAAS;CACpB,CAAC;AAEF,SAAS,iBAAiB,CAAC,QAAgB;IACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAC5C,OAAO,iBAAiB,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC;AAC3C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,OAAuB,EACvB,QAAgB,EAChB,MAAc,EACd,KAAsB,EACtB,WAAgC;IAEhC,+EAA+E;IAC/E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,YAAY,GAAoB,EAAE,CAAC;IACzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC5D,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,UAAU,IAAI,kBAAkB,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QAC1E,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,YAAY,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,QAAQ,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAClD,IAAI,QAAQ,IAAI,QAAQ,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,OAAO,CAAC,UAAU,CAAC;oBACvB,QAAQ;oBACR,QAAQ;oBACR,QAAQ;oBACR,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,MAAM,EAAE,GAAG;oBACX,QAAQ,EAAE,EAAE;oBACZ,MAAM;oBACN,EAAE,EAAE,MAAM,CAAC,UAAU,EAAE;iBACxB,CAAC,CAAC;gBACH,YAAY,EAAE,CAAC;YACjB,CAAC;YAAC,MAAM,CAAC;gBACP,oDAAoD;YACtD,CAAC;QACH,CAAC;QACD,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC3B,OAAO,CAAC,IAAI,CACV,0BAA0B,IAAI,CAAC,UAAU,QAAQ,IAAI,CAAC,QAAQ,QAAQ,IAAI,CAAC,UAAU,IAAI;gBACzF,WAAW,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,YAAY,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,IAAI;gBACvF,oBAAoB,CAAC,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAC1D,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,OAAuB,EACvB,SAA2B,EAC3B,QAAgB,EAChB,MAAc,EACd,QAA2B,EAC3B,KAAsB;IAEtB,MAAM,EAAE,WAAW,EAAE,eAAe,EAAE,GAAG,MAAM,gBAAgB,CAC7D,OAAO,EACP,SAAS,EACT,QAAQ,EACR,QAAQ,CACT,CAAC;IAEF,iCAAiC;IACjC,KAAK,MAAM,CAAC,aAAa,EAAE,QAAQ,CAAC,IAAI,WAAW,EAAE,CAAC;QACpD,KAAK,aAAa,CAAC,CAAC,mBAAmB;QACvC,MAAM,OAAO,CAAC,cAAc,CAAC,MAAM,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAC;IAC9D,CAAC;IAED,MAAM,YAAY,GAAG,MAAM,YAAY,CAAC,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;IAEvF,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AACxD,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export async function hashInput(input) {
|
|
2
|
+
const payload = JSON.stringify({ type: input.type, data: input.data });
|
|
3
|
+
const encoded = new TextEncoder().encode(payload);
|
|
4
|
+
const hashBuffer = await crypto.subtle.digest('SHA-256', encoded);
|
|
5
|
+
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
|
6
|
+
return hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
|
|
7
|
+
}
|
|
8
|
+
//# sourceMappingURL=hasher.js.map
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import nlp from 'compromise';
|
|
2
|
+
// ---------------------------------------------------------------------------
|
|
3
|
+
// Regex patterns
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
const REGEX = {
|
|
6
|
+
email: /\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b/g,
|
|
7
|
+
phone: /(?:\+?1[\s\-.]?)?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}\b/g,
|
|
8
|
+
url: /https?:\/\/[^\s/$.?#].[^\s]*/g,
|
|
9
|
+
date: /\b(?:\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}|\d{4}[\/\-]\d{2}[\/\-]\d{2}|(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:st|nd|rd|th)?,?\s+\d{4}|\d{1,2}(?:st|nd|rd|th)?\s+(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{4})\b/gi,
|
|
10
|
+
money: /\$\s?\d+(?:[.,]\d+)*(?:\.\d{2})?|\b\d+(?:[.,]\d+)*(?:\.\d{2})?\s?(?:dollars?|USD|EUR|GBP|euros?|pounds?)\b/gi,
|
|
11
|
+
};
|
|
12
|
+
const PATTERN_RULES = [
|
|
13
|
+
// Health / allergy — importance 0.95
|
|
14
|
+
{
|
|
15
|
+
pattern: /\bi(?:'m| am)\s+allergic\s+to\s+(.+?)(?:[.!?]|$)/i,
|
|
16
|
+
template: (m) => `User is allergic to ${m[1].trim()}`,
|
|
17
|
+
importance: 0.95,
|
|
18
|
+
tags: ['health', 'allergy'],
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
pattern: /\bi\s+have\s+(?:a\s+)?(?:allergy|allergies)\s+to\s+(.+?)(?:[.!?]|$)/i,
|
|
22
|
+
template: (m) => `User is allergic to ${m[1].trim()}`,
|
|
23
|
+
importance: 0.95,
|
|
24
|
+
tags: ['health', 'allergy'],
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
pattern: /\bi\s+(?:have|suffer from|was diagnosed with)\s+(.+?)(?:[.!?]|$)/i,
|
|
28
|
+
template: (m) => `User has ${m[1].trim()}`,
|
|
29
|
+
importance: 0.95,
|
|
30
|
+
tags: ['health'],
|
|
31
|
+
},
|
|
32
|
+
// Name — importance 0.9
|
|
33
|
+
{
|
|
34
|
+
pattern: /\bmy\s+name\s+is\s+([A-Za-z][a-zA-Z\s\-']{1,40}?)(?:[.!?,]|$)/i,
|
|
35
|
+
template: (m) => `User's name is ${m[1].trim()}`,
|
|
36
|
+
importance: 0.9,
|
|
37
|
+
tags: ['identity', 'name'],
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
pattern: /\bthey\s+call\s+me\s+([A-Za-z][a-zA-Z\s\-']{1,40}?)(?:[.!?,]|$)/i,
|
|
41
|
+
template: (m) => `User's name is ${m[1].trim()}`,
|
|
42
|
+
importance: 0.9,
|
|
43
|
+
tags: ['identity', 'name'],
|
|
44
|
+
},
|
|
45
|
+
// Identity — importance 0.85
|
|
46
|
+
{
|
|
47
|
+
pattern: /\bi(?:'m| am)\s+a(?:n)?\s+(.+?)(?:[.!?]|$)/i,
|
|
48
|
+
template: (m) => `User is a ${m[1].trim()}`,
|
|
49
|
+
importance: 0.85,
|
|
50
|
+
tags: ['identity'],
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
pattern: /\bi(?:'m| am)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)(?:[.!?,]|$)/,
|
|
54
|
+
template: (m) => `User is ${m[1].trim()}`,
|
|
55
|
+
importance: 0.85,
|
|
56
|
+
tags: ['identity'],
|
|
57
|
+
},
|
|
58
|
+
// Work / company — importance 0.8
|
|
59
|
+
{
|
|
60
|
+
pattern: /\bi\s+work\s+(?:at|for)\s+(.+?)(?:[.!?]|$)/i,
|
|
61
|
+
template: (m) => `User works at ${m[1].trim()}`,
|
|
62
|
+
importance: 0.8,
|
|
63
|
+
tags: ['work', 'company'],
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
pattern: /\bmy\s+(?:job|career|profession|occupation)\s+is\s+(.+?)(?:[.!?]|$)/i,
|
|
67
|
+
template: (m) => `User's job is ${m[1].trim()}`,
|
|
68
|
+
importance: 0.8,
|
|
69
|
+
tags: ['work'],
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
pattern: /\bi\s+(?:work|am employed)\s+as\s+(?:a(?:n)?\s+)?(.+?)(?:[.!?]|$)/i,
|
|
73
|
+
template: (m) => `User works as ${m[1].trim()}`,
|
|
74
|
+
importance: 0.8,
|
|
75
|
+
tags: ['work'],
|
|
76
|
+
},
|
|
77
|
+
// Location — importance 0.7
|
|
78
|
+
{
|
|
79
|
+
pattern: /\bi\s+live\s+in\s+(.+?)(?:[.!?]|$)/i,
|
|
80
|
+
template: (m) => `User lives in ${m[1].trim()}`,
|
|
81
|
+
importance: 0.7,
|
|
82
|
+
tags: ['location'],
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
pattern: /\bi(?:'m| am)\s+from\s+(.+?)(?:[.!?]|$)/i,
|
|
86
|
+
template: (m) => `User is from ${m[1].trim()}`,
|
|
87
|
+
importance: 0.7,
|
|
88
|
+
tags: ['location'],
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
pattern: /\bi\s+(?:moved|relocated)\s+to\s+(.+?)(?:[.!?]|$)/i,
|
|
92
|
+
template: (m) => `User moved to ${m[1].trim()}`,
|
|
93
|
+
importance: 0.7,
|
|
94
|
+
tags: ['location'],
|
|
95
|
+
},
|
|
96
|
+
// Preferences (like/love/enjoy) — importance 0.6
|
|
97
|
+
{
|
|
98
|
+
pattern: /\bi\s+(?:really\s+)?(?:like|love|enjoy|adore)\s+(.+?)(?:[.!?]|$)/i,
|
|
99
|
+
template: (m) => `User likes ${m[1].trim()}`,
|
|
100
|
+
importance: 0.6,
|
|
101
|
+
tags: ['preference', 'like'],
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
pattern: /\bmy\s+favorite\s+(?:\w+\s+)?is\s+(.+?)(?:[.!?]|$)/i,
|
|
105
|
+
template: (m) => `User's favorite is ${m[1].trim()}`,
|
|
106
|
+
importance: 0.6,
|
|
107
|
+
tags: ['preference', 'like'],
|
|
108
|
+
},
|
|
109
|
+
// Dislikes — importance 0.6
|
|
110
|
+
{
|
|
111
|
+
pattern: /\bi\s+(?:really\s+)?(?:hate|dislike|can'?t stand|despise|detest)\s+(.+?)(?:[.!?]|$)/i,
|
|
112
|
+
template: (m) => `User dislikes ${m[1].trim()}`,
|
|
113
|
+
importance: 0.6,
|
|
114
|
+
tags: ['preference', 'dislike'],
|
|
115
|
+
},
|
|
116
|
+
// Trivia / other — importance 0.3
|
|
117
|
+
{
|
|
118
|
+
pattern: /\bi\s+(?:think|believe|feel|guess|suppose)\s+(?:that\s+)?(.+?)(?:[.!?]|$)/i,
|
|
119
|
+
template: (m) => `User thinks ${m[1].trim()}`,
|
|
120
|
+
importance: 0.3,
|
|
121
|
+
tags: ['opinion'],
|
|
122
|
+
},
|
|
123
|
+
];
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
// Helpers
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
/** Strip a "role: " prefix like "user: " or "assistant: " from a line. */
|
|
128
|
+
function stripRolePrefix(line) {
|
|
129
|
+
return line.replace(/^[a-zA-Z_\-]+:\s*/, '');
|
|
130
|
+
}
|
|
131
|
+
function makeFact(content, originalContent, importance, confidence, tags, entityCanonicalNames = []) {
|
|
132
|
+
return {
|
|
133
|
+
content,
|
|
134
|
+
importance,
|
|
135
|
+
confidence,
|
|
136
|
+
sourceType: 'conversation',
|
|
137
|
+
modality: 'text',
|
|
138
|
+
tags,
|
|
139
|
+
originalContent,
|
|
140
|
+
operation: 'add',
|
|
141
|
+
entityCanonicalNames,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
// Regex extraction (runs on the full text)
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
function extractRegex(text) {
|
|
148
|
+
const facts = [];
|
|
149
|
+
const seenContents = new Set();
|
|
150
|
+
function addFact(content, original, tags) {
|
|
151
|
+
if (!seenContents.has(content)) {
|
|
152
|
+
seenContents.add(content);
|
|
153
|
+
facts.push(makeFact(content, original, 0.8, 0.9, tags));
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
// Emails
|
|
157
|
+
for (const match of text.matchAll(REGEX.email)) {
|
|
158
|
+
const email = match[0];
|
|
159
|
+
addFact(`User's email is ${email}`, email, ['contact', 'email']);
|
|
160
|
+
}
|
|
161
|
+
// Phone numbers
|
|
162
|
+
for (const match of text.matchAll(REGEX.phone)) {
|
|
163
|
+
const phone = match[0];
|
|
164
|
+
addFact(`User's phone number is ${phone}`, phone, ['contact', 'phone']);
|
|
165
|
+
}
|
|
166
|
+
// URLs
|
|
167
|
+
for (const match of text.matchAll(REGEX.url)) {
|
|
168
|
+
const url = match[0];
|
|
169
|
+
addFact(`User mentioned URL: ${url}`, url, ['url']);
|
|
170
|
+
}
|
|
171
|
+
// Dates
|
|
172
|
+
for (const match of text.matchAll(REGEX.date)) {
|
|
173
|
+
const date = match[0];
|
|
174
|
+
addFact(`Mentioned date: ${date}`, date, ['date']);
|
|
175
|
+
}
|
|
176
|
+
// Money
|
|
177
|
+
for (const match of text.matchAll(REGEX.money)) {
|
|
178
|
+
const amount = match[0];
|
|
179
|
+
addFact(`Mentioned monetary amount: ${amount}`, amount, ['money']);
|
|
180
|
+
}
|
|
181
|
+
return facts;
|
|
182
|
+
}
|
|
183
|
+
// ---------------------------------------------------------------------------
|
|
184
|
+
// Pattern extraction (runs line by line, role prefix stripped)
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
function extractPatterns(lines) {
|
|
187
|
+
const facts = [];
|
|
188
|
+
const seenContents = new Set();
|
|
189
|
+
for (const rawLine of lines) {
|
|
190
|
+
const line = stripRolePrefix(rawLine).trim();
|
|
191
|
+
if (!line)
|
|
192
|
+
continue;
|
|
193
|
+
for (const rule of PATTERN_RULES) {
|
|
194
|
+
// Reset lastIndex for global regexes (these aren't global, but be safe)
|
|
195
|
+
const match = line.match(rule.pattern);
|
|
196
|
+
if (match) {
|
|
197
|
+
const content = rule.template(match);
|
|
198
|
+
if (!seenContents.has(content)) {
|
|
199
|
+
seenContents.add(content);
|
|
200
|
+
facts.push(makeFact(content, line, rule.importance, 0.7, rule.tags));
|
|
201
|
+
}
|
|
202
|
+
// Only use the first matching rule per line to avoid overlapping facts
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return facts;
|
|
208
|
+
}
|
|
209
|
+
// ---------------------------------------------------------------------------
|
|
210
|
+
// NER via compromise
|
|
211
|
+
// ---------------------------------------------------------------------------
|
|
212
|
+
function extractEntities(text) {
|
|
213
|
+
const doc = nlp(text);
|
|
214
|
+
const entities = [];
|
|
215
|
+
const seenCanonical = new Set();
|
|
216
|
+
function addEntity(name, entityType) {
|
|
217
|
+
const trimmed = name.trim();
|
|
218
|
+
if (!trimmed)
|
|
219
|
+
return;
|
|
220
|
+
const canonical = trimmed.toLowerCase();
|
|
221
|
+
if (!seenCanonical.has(`${entityType}:${canonical}`)) {
|
|
222
|
+
seenCanonical.add(`${entityType}:${canonical}`);
|
|
223
|
+
entities.push({
|
|
224
|
+
name: trimmed,
|
|
225
|
+
entityType,
|
|
226
|
+
canonicalName: canonical,
|
|
227
|
+
properties: {},
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
// People
|
|
232
|
+
const people = doc.people().out('array');
|
|
233
|
+
for (const name of people) {
|
|
234
|
+
addEntity(name, 'person');
|
|
235
|
+
}
|
|
236
|
+
// Organizations
|
|
237
|
+
const orgs = doc.organizations().out('array');
|
|
238
|
+
for (const org of orgs) {
|
|
239
|
+
addEntity(org, 'organization');
|
|
240
|
+
}
|
|
241
|
+
// Places
|
|
242
|
+
const places = doc.places().out('array');
|
|
243
|
+
for (const place of places) {
|
|
244
|
+
addEntity(place, 'location');
|
|
245
|
+
}
|
|
246
|
+
return entities;
|
|
247
|
+
}
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
// Main export
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
export function extractHeuristic(text) {
|
|
252
|
+
const lines = text.split('\n');
|
|
253
|
+
// 1. Regex extraction — run on full text
|
|
254
|
+
const regexFacts = extractRegex(text);
|
|
255
|
+
const regexContents = new Set(regexFacts.map((f) => f.content));
|
|
256
|
+
// 2. Pattern extraction — line by line, strip role prefix
|
|
257
|
+
const patternFacts = extractPatterns(lines);
|
|
258
|
+
// 3. De-duplicate: remove pattern facts whose content overlaps with regex facts
|
|
259
|
+
// (e.g., if regex grabbed the email and a pattern also mentions it)
|
|
260
|
+
const filteredPatternFacts = patternFacts.filter((f) => !regexContents.has(f.content));
|
|
261
|
+
// 4. NER entities via compromise
|
|
262
|
+
const entities = extractEntities(text);
|
|
263
|
+
const facts = [...regexFacts, ...filteredPatternFacts];
|
|
264
|
+
// Overall confidence: weighted average or fixed 0.9 for regex, 0.7 for pattern
|
|
265
|
+
// Use the maximum confidence across extracted facts, or 0.9 if only regex results exist
|
|
266
|
+
const confidence = facts.length > 0
|
|
267
|
+
? Math.max(...facts.map((f) => f.confidence))
|
|
268
|
+
: entities.length > 0
|
|
269
|
+
? 0.6
|
|
270
|
+
: 0.5;
|
|
271
|
+
return {
|
|
272
|
+
facts,
|
|
273
|
+
entities,
|
|
274
|
+
edges: [],
|
|
275
|
+
tier: 'heuristic',
|
|
276
|
+
confidence,
|
|
277
|
+
tokensInput: 0,
|
|
278
|
+
tokensOutput: 0,
|
|
279
|
+
model: null,
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
//# sourceMappingURL=heuristic.js.map
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import type { LLMAdapter } from '../adapters/llm.js';
|
|
2
2
|
import type { ExtractionResult } from './types.js';
|
|
3
|
-
import type { ExtractionTier } from '../config.js';
|
|
3
|
+
import type { ExtractionTier, DomainEntityType } from '../config.js';
|
|
4
4
|
export interface LLMExtractorConfig {
|
|
5
5
|
llm: LLMAdapter;
|
|
6
6
|
tier: ExtractionTier;
|
|
7
|
+
entityTypes?: string[];
|
|
8
|
+
domainEntityTypes?: DomainEntityType[];
|
|
7
9
|
}
|
|
8
10
|
/**
|
|
9
11
|
* Two-pass extraction like Mem0:
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm-extractor.d.ts","sourceRoot":"","sources":["llm-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAE,gBAAgB,EAAiD,MAAM,YAAY,CAAC;AAClG,OAAO,KAAK,EAAE,cAAc,EAAY,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"llm-extractor.d.ts","sourceRoot":"","sources":["llm-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAE,gBAAgB,EAAiD,MAAM,YAAY,CAAC;AAClG,OAAO,KAAK,EAAE,cAAc,EAAY,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAI/E,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,UAAU,CAAC;IAChB,IAAI,EAAE,cAAc,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,iBAAiB,CAAC,EAAE,gBAAgB,EAAE,CAAC;CACxC;AAED;;;;GAIG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,kBAAkB,EAC1B,KAAK,EAAE,MAAM,EACb,aAAa,CAAC,EAAE,KAAK,CAAC;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,GAC5D,OAAO,CAAC,gBAAgB,CAAC,CA8M3B;AAsBD;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAavD"}
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import { buildFactExtractionPrompt, buildGraphExtractionPrompt } from './prompts.js';
|
|
2
|
+
import { createEnrichedSegments } from './sliding-window.js';
|
|
3
|
+
/**
|
|
4
|
+
* Two-pass extraction like Mem0:
|
|
5
|
+
* Pass 1: Extract facts as simple strings (focused, high quality)
|
|
6
|
+
* Pass 2: Extract entities + edges from the facts (separate concern)
|
|
7
|
+
*/
|
|
8
|
+
export async function extractWithLLM(config, input, existingFacts) {
|
|
9
|
+
let totalTokensIn = 0;
|
|
10
|
+
let totalTokensOut = 0;
|
|
11
|
+
// ── PASS 1: Fact extraction with Sliding Window Inference ──
|
|
12
|
+
// For long inputs, split into overlapping segments so the LLM can resolve
|
|
13
|
+
// pronouns and references using surrounding context (like Hydra DB).
|
|
14
|
+
const segments = createEnrichedSegments(input);
|
|
15
|
+
let factStrings = [];
|
|
16
|
+
let factEntries = [];
|
|
17
|
+
// Process segments (in parallel for speed, up to 4 at a time)
|
|
18
|
+
const segmentBatches = [];
|
|
19
|
+
for (let i = 0; i < segments.length; i += 4) {
|
|
20
|
+
segmentBatches.push(segments.slice(i, i + 4));
|
|
21
|
+
}
|
|
22
|
+
for (const batch of segmentBatches) {
|
|
23
|
+
const batchPromises = batch.map(async (seg) => {
|
|
24
|
+
const factMessages = buildFactExtractionPrompt(seg.contextWindow);
|
|
25
|
+
// Append existing facts for dedup context
|
|
26
|
+
if (existingFacts && existingFacts.length > 0) {
|
|
27
|
+
const factsBlock = existingFacts
|
|
28
|
+
.map(f => `- [lineage: ${f.lineageId}] ${f.content}`)
|
|
29
|
+
.join('\n');
|
|
30
|
+
factMessages[1].content += `\n\n--- EXISTING FACTS (skip duplicates, mark updates) ---\n${factsBlock}`;
|
|
31
|
+
}
|
|
32
|
+
// Also append already-extracted facts from previous segments to avoid duplicates
|
|
33
|
+
if (factEntries.length > 0) {
|
|
34
|
+
const alreadyExtracted = factEntries.map(f => `- ${f.text}`).join('\n');
|
|
35
|
+
factMessages[1].content += `\n\n--- ALREADY EXTRACTED (skip these) ---\n${alreadyExtracted}`;
|
|
36
|
+
}
|
|
37
|
+
try {
|
|
38
|
+
const factResponse = await config.llm.complete(factMessages, { temperature: 0, responseFormat: 'json' });
|
|
39
|
+
totalTokensIn += factResponse.tokensInput;
|
|
40
|
+
totalTokensOut += factResponse.tokensOutput;
|
|
41
|
+
const parsed = JSON.parse(factResponse.content);
|
|
42
|
+
const rawFacts = Array.isArray(parsed.facts) ? parsed.facts : [];
|
|
43
|
+
const entries = [];
|
|
44
|
+
for (const f of rawFacts) {
|
|
45
|
+
if (typeof f === 'string') {
|
|
46
|
+
const trimmed = f.trim();
|
|
47
|
+
if (trimmed.length > 0)
|
|
48
|
+
entries.push({ text: trimmed, importance: 0.5, sourceChunk: seg.contextWindow });
|
|
49
|
+
}
|
|
50
|
+
else if (f && typeof f === 'object') {
|
|
51
|
+
const obj = f;
|
|
52
|
+
const text = (typeof obj.t === 'string' ? obj.t : typeof obj.text === 'string' ? obj.text : '').trim();
|
|
53
|
+
const importance = typeof obj.i === 'number' ? obj.i : typeof obj.importance === 'number' ? obj.importance : 0.5;
|
|
54
|
+
const eventDate = obj.ed ? new Date(obj.ed) : undefined;
|
|
55
|
+
const documentDate = obj.dd ? new Date(obj.dd) : undefined;
|
|
56
|
+
if (text.length > 0)
|
|
57
|
+
entries.push({
|
|
58
|
+
text,
|
|
59
|
+
importance: Math.max(0, Math.min(1, importance)),
|
|
60
|
+
sourceChunk: seg.contextWindow,
|
|
61
|
+
eventDate: eventDate && !isNaN(eventDate.getTime()) ? eventDate : undefined,
|
|
62
|
+
documentDate: documentDate && !isNaN(documentDate.getTime()) ? documentDate : undefined,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return entries;
|
|
67
|
+
}
|
|
68
|
+
catch {
|
|
69
|
+
return [];
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
const batchResults = await Promise.all(batchPromises);
|
|
73
|
+
for (const entries of batchResults) {
|
|
74
|
+
factEntries.push(...entries);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// Deduplicate facts by content similarity (simple string match)
|
|
78
|
+
const seenContent = new Set();
|
|
79
|
+
factEntries = factEntries.filter(e => {
|
|
80
|
+
const key = e.text.toLowerCase().trim();
|
|
81
|
+
if (seenContent.has(key))
|
|
82
|
+
return false;
|
|
83
|
+
seenContent.add(key);
|
|
84
|
+
return true;
|
|
85
|
+
});
|
|
86
|
+
factStrings = factEntries.map(e => e.text);
|
|
87
|
+
if (factEntries.length === 0) {
|
|
88
|
+
return emptyResult(config.tier, config.llm.model);
|
|
89
|
+
}
|
|
90
|
+
if (factStrings.length === 0) {
|
|
91
|
+
return emptyResult(config.tier, config.llm.model);
|
|
92
|
+
}
|
|
93
|
+
// Build ExtractedFact objects from parsed entries with LLM-scored importance
|
|
94
|
+
const facts = factEntries.map(({ text, importance, sourceChunk, eventDate, documentDate }) => ({
|
|
95
|
+
content: text,
|
|
96
|
+
importance,
|
|
97
|
+
confidence: 0.8,
|
|
98
|
+
sourceType: 'conversation',
|
|
99
|
+
modality: 'text',
|
|
100
|
+
tags: [],
|
|
101
|
+
originalContent: input,
|
|
102
|
+
entityCanonicalNames: [],
|
|
103
|
+
sourceChunk,
|
|
104
|
+
eventDate,
|
|
105
|
+
documentDate,
|
|
106
|
+
}));
|
|
107
|
+
// ── Contextual memory wrappers ──
|
|
108
|
+
// Prepend source context so the embedding captures the full meaning.
|
|
109
|
+
// E.g. "User went to the gym" becomes "Context: <segment>... | Fact: User went to the gym"
|
|
110
|
+
// This is a transient field — only used at embedding time, never stored.
|
|
111
|
+
for (const fact of facts) {
|
|
112
|
+
const src = fact.sourceChunk ?? input;
|
|
113
|
+
const contextPrefix = src.length > 100
|
|
114
|
+
? `Context: ${src.slice(0, 200).trim()}... | Fact: `
|
|
115
|
+
: `Context: ${src.trim()} | Fact: `;
|
|
116
|
+
fact.contextualContent = contextPrefix + fact.content;
|
|
117
|
+
}
|
|
118
|
+
// ── PASS 2: Graph extraction (entities + edges) from the facts ──
|
|
119
|
+
let entities = [];
|
|
120
|
+
let edges = [];
|
|
121
|
+
try {
|
|
122
|
+
const graphMessages = buildGraphExtractionPrompt(factStrings, config.entityTypes, config.domainEntityTypes);
|
|
123
|
+
const graphResponse = await config.llm.complete(graphMessages, { temperature: 0, responseFormat: 'json' });
|
|
124
|
+
totalTokensIn += graphResponse.tokensInput;
|
|
125
|
+
totalTokensOut += graphResponse.tokensOutput;
|
|
126
|
+
const graphParsed = JSON.parse(graphResponse.content);
|
|
127
|
+
// Parse entities
|
|
128
|
+
const seenEntities = new Set();
|
|
129
|
+
if (Array.isArray(graphParsed.entities)) {
|
|
130
|
+
for (const e of graphParsed.entities) {
|
|
131
|
+
if (!e || typeof e.name !== 'string')
|
|
132
|
+
continue;
|
|
133
|
+
const entity = e;
|
|
134
|
+
const canonical = normalizeEntityName(entity.name);
|
|
135
|
+
if (canonical.length === 0 || seenEntities.has(canonical))
|
|
136
|
+
continue;
|
|
137
|
+
seenEntities.add(canonical);
|
|
138
|
+
// Capture properties from domain entity types (e.g., {"company_size": "enterprise"})
|
|
139
|
+
const rawProps = entity.properties;
|
|
140
|
+
const properties = (rawProps && typeof rawProps === 'object' && !Array.isArray(rawProps))
|
|
141
|
+
? rawProps
|
|
142
|
+
: {};
|
|
143
|
+
entities.push({
|
|
144
|
+
name: canonical.split(' ').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' '),
|
|
145
|
+
entityType: String(entity.entity_type ?? entity.type ?? 'concept'),
|
|
146
|
+
canonicalName: canonical,
|
|
147
|
+
properties,
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
// Parse edges
|
|
152
|
+
if (Array.isArray(graphParsed.edges)) {
|
|
153
|
+
for (const r of graphParsed.edges) {
|
|
154
|
+
if (!r)
|
|
155
|
+
continue;
|
|
156
|
+
const rel = r;
|
|
157
|
+
const rawSource = typeof rel.source === 'string' ? rel.source :
|
|
158
|
+
typeof rel.source_name === 'string' ? rel.source_name : null;
|
|
159
|
+
const rawTarget = typeof rel.target === 'string' ? rel.target :
|
|
160
|
+
typeof rel.target_name === 'string' ? rel.target_name : null;
|
|
161
|
+
if (!rawSource || !rawTarget)
|
|
162
|
+
continue;
|
|
163
|
+
const source = normalizeEntityName(rawSource);
|
|
164
|
+
const target = normalizeEntityName(rawTarget);
|
|
165
|
+
if (!source || !target)
|
|
166
|
+
continue;
|
|
167
|
+
edges.push({
|
|
168
|
+
sourceName: source,
|
|
169
|
+
targetName: target,
|
|
170
|
+
relation: String(rel.relation ?? 'related_to'),
|
|
171
|
+
edgeType: isValidEdgeType(rel.edge_type) ? rel.edge_type : 'associative',
|
|
172
|
+
confidence: 0.8,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// Link entities to facts by text match
|
|
177
|
+
for (const fact of facts) {
|
|
178
|
+
const contentLower = fact.content.toLowerCase();
|
|
179
|
+
for (const entity of entities) {
|
|
180
|
+
if (entity.canonicalName === 'user') {
|
|
181
|
+
if (contentLower.startsWith('user ') || contentLower.includes(' user ')) {
|
|
182
|
+
fact.entityCanonicalNames.push(entity.canonicalName);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
else if (entity.canonicalName.length >= 3 && contentLower.includes(entity.canonicalName)) {
|
|
186
|
+
fact.entityCanonicalNames.push(entity.canonicalName);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
catch {
|
|
192
|
+
// Graph pass failed — we still have facts, just no graph. That's OK.
|
|
193
|
+
}
|
|
194
|
+
return {
|
|
195
|
+
facts,
|
|
196
|
+
entities,
|
|
197
|
+
edges,
|
|
198
|
+
tier: config.tier,
|
|
199
|
+
confidence: 0.8,
|
|
200
|
+
tokensInput: totalTokensIn,
|
|
201
|
+
tokensOutput: totalTokensOut,
|
|
202
|
+
model: config.llm.model,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
function emptyResult(tier, model) {
|
|
206
|
+
return {
|
|
207
|
+
facts: [],
|
|
208
|
+
entities: [],
|
|
209
|
+
edges: [],
|
|
210
|
+
tier,
|
|
211
|
+
confidence: 0,
|
|
212
|
+
tokensInput: 0,
|
|
213
|
+
tokensOutput: 0,
|
|
214
|
+
model,
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
function isValidEdgeType(t) {
|
|
218
|
+
return (typeof t === 'string' &&
|
|
219
|
+
['associative', 'causal', 'temporal', 'contradictory', 'hierarchical'].includes(t));
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Normalize an entity name to a clean canonical form.
|
|
223
|
+
*/
|
|
224
|
+
export function normalizeEntityName(raw) {
|
|
225
|
+
let name = raw.trim();
|
|
226
|
+
name = name.replace(/^[-–—*•#>]+\s*/g, '');
|
|
227
|
+
name = name.replace(/'s$/i, '');
|
|
228
|
+
name = name.replace(/\u2019s$/i, '');
|
|
229
|
+
name = name.replace(/^[^a-zA-Z0-9]+/, '');
|
|
230
|
+
name = name.replace(/[^a-zA-Z0-9]+$/, '');
|
|
231
|
+
const leadingNoise = /^(the|a|an|when|where|how|what|why|who|is|are|was|were|has|have|had|my|our|their|his|her|its|this|that|these|those)\s+/i;
|
|
232
|
+
name = name.replace(leadingNoise, '');
|
|
233
|
+
name = name.replace(leadingNoise, '');
|
|
234
|
+
name = name.replace(/\s+/g, ' ').trim();
|
|
235
|
+
name = name.toLowerCase();
|
|
236
|
+
return name;
|
|
237
|
+
}
|
|
238
|
+
//# sourceMappingURL=llm-extractor.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm-extractor.js","sourceRoot":"","sources":["llm-extractor.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,yBAAyB,EAAE,0BAA0B,EAAyB,MAAM,cAAc,CAAC;AAC5G,OAAO,EAAE,sBAAsB,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"llm-extractor.js","sourceRoot":"","sources":["llm-extractor.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,yBAAyB,EAAE,0BAA0B,EAAyB,MAAM,cAAc,CAAC;AAC5G,OAAO,EAAE,sBAAsB,EAAE,MAAM,qBAAqB,CAAC;AAS7D;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAA0B,EAC1B,KAAa,EACb,aAA6D;IAE7D,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,cAAc,GAAG,CAAC,CAAC;IAEvB,8DAA8D;IAC9D,0EAA0E;IAC1E,qEAAqE;IACrE,MAAM,QAAQ,GAAG,sBAAsB,CAAC,KAAK,CAAC,CAAC;IAE/C,IAAI,WAAW,GAAa,EAAE,CAAC;IAC/B,IAAI,WAAW,GAA4G,EAAE,CAAC;IAE9H,8DAA8D;IAC9D,MAAM,cAAc,GAAsB,EAAE,CAAC;IAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5C,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;QACnC,MAAM,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;YAC5C,MAAM,YAAY,GAAG,yBAAyB,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;YAElE,0CAA0C;YAC1C,IAAI,aAAa,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9C,MAAM,UAAU,GAAG,aAAa;qBAC7B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,eAAe,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC;qBACpD,IAAI,CAAC,IAAI,CAAC,CAAC;gBACd,YAAY,CAAC,CAAC,CAAE,CAAC,OAAO,IAAI,+DAA+D,UAAU,EAAE,CAAC;YAC1G,CAAC;YAED,iFAAiF;YACjF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,MAAM,gBAAgB,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACxE,YAAY,CAAC,CAAC,CAAE,CAAC,OAAO,IAAI,+CAA+C,gBAAgB,EAAE,CAAC;YAChG,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,YAAY,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,cAAc,EAAE,MAAM,EAAE,CAAC,CAAC;gBACzG,aAAa,IAAI,YAAY,CAAC,WAAW,CAAC;gBAC1C,cAAc,IAAI,YAAY,CAAC,YAAY,CAAC;gBAE5C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,CAA4B,CAAC;gBAC3E,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACjE,MAAM,OAAO,GAA4G,EAAE,CAAC;gBAC5H,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;oBACzB,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;wBAC1B,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;wBACzB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;4BAAE,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC;oBAC3G,CAAC;yBAAM,IAAI,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;wBACtC,MAAM,GAAG,GAAG,CAA4B,CAAC;wBACzC,MAAM,IAAI,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;wBACvG,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC;wBACjH,MAAM,SAAS,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,EAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;wBAClE,MAAM,YAAY,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,EAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;wBACrE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;4BAAE,OAAO,CAAC,IAAI,CAAC;gCAChC,IAAI;gCACJ,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;gCAChD,WAAW,EAAE,GAAG,CAAC,aAAa;gCAC9B,SAAS,EAAE,SAAS,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;gCAC3E,YAAY,EAAE,YAAY,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS;6BACxF,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;gBACD,OAAO,OAAO,CAAC;YACjB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;QACtD,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;YACnC,WAAW,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,gEAAgE;IAChE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;IACtC,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;QACnC,MAAM,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QACxC,IAAI,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QACvC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACrB,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IACH,WAAW,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAE3C,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,WAAW,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,WAAW,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC;IAED,6EAA6E;IAC7E,MAAM,KAAK,GAAoB,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC,CAAC;QAC9G,OAAO,EAAE,IAAI;QACb,UAAU;QACV,UAAU,EAAE,GAAG;QACf,UAAU,EAAE,cAAuB;QACnC,QAAQ,EAAE,MAAe;QACzB,IAAI,EAAE,EAAE;QACR,eAAe,EAAE,KAAK;QACtB,oBAAoB,EAAE,EAAE;QACxB,WAAW;QACX,SAAS;QACT,YAAY;KACb,CAAC,CAAC,CAAC;IAEJ,mCAAmC;IACnC,qEAAqE;IACrE,2FAA2F;IAC3F,yEAAyE;IACzE,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,IAAI,KAAK,CAAC;QACtC,MAAM,aAAa,GAAG,GAAG,CAAC,MAAM,GAAG,GAAG;YACpC,CAAC,CAAC,YAAY,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,cAAc;YACpD,CAAC,CAAC,YAAY,GAAG,CAAC,IAAI,EAAE,WAAW,CAAC;QACtC,IAAI,CAAC,iBAAiB,GAAG,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC;IACxD,CAAC;IAED,mEAAmE;IACnE,IAAI,QAAQ,GAAsB,EAAE,CAAC;IACrC,IAAI,KAAK,GAAoB,EAAE,CAAC;IAEhC,IAAI,CAAC;QACH,MAAM,aAAa,GAAG,0BAA0B,CAAC,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,iBAAiB,CAAC,CAAC;QAC5G,MAAM,aAAa,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,aAAa,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,cAAc,EAAE,MAAM,EAAE,CAAC,CAAC;QAC3G,aAAa,IAAI,aAAa,CAAC,WAAW,CAAC;QAC3C,cAAc,IAAI,aAAa,CAAC,YAAY,CAAC;QAE7C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,OAAO,CAA4B,CAAC;QAEjF,iBAAiB;QACjB,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;QACvC,IAAI,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;YACxC,KAAK,MAAM,CAAC,IAAI,WAAW,CAAC,QAAqB,EAAE,CAAC;gBAClD,IAAI,CAAC,CAAC,IAAI,OAAQ,CAA6B,CAAC,IAAI,KAAK,QAAQ;oBAAE,SAAS;gBAC5E,MAAM,MAAM,GAAG,CAA4B,CAAC;gBAC5C,MAAM,SAAS,GAAG,mBAAmB,CAAC,MAAM,CAAC,IAAc,CAAC,CAAC;gBAC7D,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC;oBAAE,SAAS;gBACpE,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;gBAC5B,qFAAqF;gBACrF,MAAM,QAAQ,GAAG,MAAM,CAAC,UAAU,CAAC;gBACnC,MAAM,UAAU,GAA4B,CAAC,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;oBAChH,CAAC,CAAC,QAAmC;oBACrC,CAAC,CAAC,EAAE,CAAC;gBAEP,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;oBACrF,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,IAAI,IAAI,SAAS,CAAC;oBAClE,aAAa,EAAE,SAAS;oBACxB,UAAU;iBACX,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,cAAc;QACd,IAAI,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;YACrC,KAAK,MAAM,CAAC,IAAI,WAAW,CAAC,KAAkB,EAAE,CAAC;gBAC/C,IAAI,CAAC,CAAC;oBAAE,SAAS;gBACjB,MAAM,GAAG,GAAG,CAA4B,CAAC;gBACzC,MAAM,SAAS,GAAG,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBAC9C,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC;gBAC9E,MAAM,SAAS,GAAG,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBAC9C,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC;gBAC9E,IAAI,CAAC,SAAS,IAAI,CAAC,SAAS;oBAAE,SAAS;gBACvC,MAAM,MAAM,GAAG,mBAAmB,CAAC,SAAS,CAAC,CAAC;gBAC9C,MAAM,MAAM,GAAG,mBAAmB,CAAC,SAAS,CAAC,CAAC;gBAC9C,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM;oBAAE,SAAS;gBACjC,KAAK,CAAC,IAAI,CAAC;oBACT,UAAU,EAAE,MAAM;oBAClB,UAAU,EAAE,MAAM;oBAClB,QAAQ,EAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,IAAI,YAAY,CAAC;oBAC9C,QAAQ,EAAE,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa;oBACxE,UAAU,EAAE,GAAG;iBAChB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAChD,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;gBAC9B,IAAI,MAAM,CAAC,aAAa,KAAK,MAAM,EAAE,CAAC;oBACpC,IAAI,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;wBACxE,IAAI,CAAC,oBAAqB,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;oBACxD,CAAC;gBACH,CAAC;qBAAM,IAAI,MAAM,CAAC,aAAa,CAAC,MAAM,IAAI,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;oBAC3F,IAAI,CAAC,oBAAqB,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;gBACxD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,qEAAqE;IACvE,CAAC;IAED,OAAO;QACL,KAAK;QACL,QAAQ;QACR,KAAK;QACL,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,aAAa;QAC1B,YAAY,EAAE,cAAc;QAC5B,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,KAAK;KACxB,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,IAAoB,EAAE,KAAa;IACtD,OAAO;QACL,KAAK,EAAE,EAAE;QACT,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,IAAI;QACJ,UAAU,EAAE,CAAC;QACb,WAAW,EAAE,CAAC;QACd,YAAY,EAAE,CAAC;QACf,KAAK;KACN,CAAC;AACJ,CAAC;AAED,SAAS,eAAe,CAAC,CAAU;IACjC,OAAO,CACL,OAAO,CAAC,KAAK,QAAQ;QACrB,CAAC,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,eAAe,EAAE,cAAc,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CACnF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,GAAW;IAC7C,IAAI,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IACtB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC;IAC3C,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAChC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IACrC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC;IAC1C,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC;IAC1C,MAAM,YAAY,GAAG,yHAAyH,CAAC;IAC/I,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;IACtC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;IACtC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACxC,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAC1B,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -30,7 +30,7 @@ export async function extractWithLLM(
|
|
|
30
30
|
const segments = createEnrichedSegments(input);
|
|
31
31
|
|
|
32
32
|
let factStrings: string[] = [];
|
|
33
|
-
let factEntries: Array<{ text: string; importance: number; sourceChunk: string; eventDate?: Date; documentDate?: Date }> = [];
|
|
33
|
+
let factEntries: Array<{ text: string; importance: number; sourceChunk: string; eventDate?: Date; documentDate?: Date; isPattern?: boolean }> = [];
|
|
34
34
|
|
|
35
35
|
// Process segments (in parallel for speed, up to 4 at a time)
|
|
36
36
|
const segmentBatches: typeof segments[] = [];
|
|
@@ -63,23 +63,25 @@ export async function extractWithLLM(
|
|
|
63
63
|
|
|
64
64
|
const parsed = JSON.parse(factResponse.content) as Record<string, unknown>;
|
|
65
65
|
const rawFacts = Array.isArray(parsed.facts) ? parsed.facts : [];
|
|
66
|
-
const entries: Array<{ text: string; importance: number; sourceChunk: string; eventDate?: Date; documentDate?: Date }> = [];
|
|
66
|
+
const entries: Array<{ text: string; importance: number; sourceChunk: string; eventDate?: Date; documentDate?: Date; isPattern?: boolean }> = [];
|
|
67
67
|
for (const f of rawFacts) {
|
|
68
68
|
if (typeof f === 'string') {
|
|
69
69
|
const trimmed = f.trim();
|
|
70
|
-
if (trimmed.length > 0) entries.push({ text: trimmed, importance: 0.5, sourceChunk: seg.contextWindow });
|
|
70
|
+
if (trimmed.length > 0) entries.push({ text: trimmed, importance: 0.5, sourceChunk: seg.contextWindow, isPattern: false });
|
|
71
71
|
} else if (f && typeof f === 'object') {
|
|
72
72
|
const obj = f as Record<string, unknown>;
|
|
73
73
|
const text = (typeof obj.t === 'string' ? obj.t : typeof obj.text === 'string' ? obj.text : '').trim();
|
|
74
74
|
const importance = typeof obj.i === 'number' ? obj.i : typeof obj.importance === 'number' ? obj.importance : 0.5;
|
|
75
75
|
const eventDate = obj.ed ? new Date(obj.ed as string) : undefined;
|
|
76
76
|
const documentDate = obj.dd ? new Date(obj.dd as string) : undefined;
|
|
77
|
+
const isPattern = obj.p === true;
|
|
77
78
|
if (text.length > 0) entries.push({
|
|
78
79
|
text,
|
|
79
80
|
importance: Math.max(0, Math.min(1, importance)),
|
|
80
81
|
sourceChunk: seg.contextWindow,
|
|
81
82
|
eventDate: eventDate && !isNaN(eventDate.getTime()) ? eventDate : undefined,
|
|
82
83
|
documentDate: documentDate && !isNaN(documentDate.getTime()) ? documentDate : undefined,
|
|
84
|
+
isPattern,
|
|
83
85
|
});
|
|
84
86
|
}
|
|
85
87
|
}
|
|
@@ -114,13 +116,13 @@ export async function extractWithLLM(
|
|
|
114
116
|
}
|
|
115
117
|
|
|
116
118
|
// Build ExtractedFact objects from parsed entries with LLM-scored importance
|
|
117
|
-
const facts: ExtractedFact[] = factEntries.map(({ text, importance, sourceChunk, eventDate, documentDate }) => ({
|
|
119
|
+
const facts: ExtractedFact[] = factEntries.map(({ text, importance, sourceChunk, eventDate, documentDate, isPattern }) => ({
|
|
118
120
|
content: text,
|
|
119
121
|
importance,
|
|
120
122
|
confidence: 0.8,
|
|
121
123
|
sourceType: 'conversation' as const,
|
|
122
124
|
modality: 'text' as const,
|
|
123
|
-
tags: [],
|
|
125
|
+
tags: isPattern ? ['pattern'] : [],
|
|
124
126
|
originalContent: input,
|
|
125
127
|
entityCanonicalNames: [],
|
|
126
128
|
sourceChunk,
|