@chatman-media/kb 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +16 -13
- package/dist/ingest.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/chunk.ts +1 -1
- package/src/ingest.ts +6 -1
- package/src/reranker.ts +2 -2
- package/src/structured-output.ts +1 -1
- package/src/text-style-rules.ts +1 -1
- package/src/vision.ts +2 -2
package/dist/index.js
CHANGED
|
@@ -44647,7 +44647,7 @@ var NO_CONTEXT_MARKER = "__NO_CONTEXT__";
|
|
|
44647
44647
|
var replaceEmDash = {
|
|
44648
44648
|
name: "replace-em-dash",
|
|
44649
44649
|
description: "U+2014 \xAB\u2014\xBB \u2192 \xAB-\xBB (\u0441 \u043D\u043E\u0440\u043C\u0430\u043B\u0438\u0437\u0430\u0446\u0438\u0435\u0439 \u043F\u0440\u043E\u0431\u0435\u043B\u043E\u0432)",
|
|
44650
|
-
apply: (s) => s.replace(/\s
|
|
44650
|
+
apply: (s) => s.replace(/\s{0,200}\u2014\s{0,200}/g, " - ").replace(/ {2,}/g, " ")
|
|
44651
44651
|
};
|
|
44652
44652
|
var replaceEnDash = {
|
|
44653
44653
|
name: "replace-en-dash",
|
|
@@ -59649,7 +59649,7 @@ Schema:
|
|
|
59649
59649
|
${schemaStr}`;
|
|
59650
59650
|
}
|
|
59651
59651
|
function parseStructuredOutput(raw, schema) {
|
|
59652
|
-
const cleaned = raw.trim().replace(/^```(?:json)?\s*/i, "").replace(/\s
|
|
59652
|
+
const cleaned = raw.trim().replace(/^```(?:json)?\s*/i, "").replace(/\s{0,1000}```$/, "").trim();
|
|
59653
59653
|
let parsed;
|
|
59654
59654
|
try {
|
|
59655
59655
|
parsed = JSON.parse(cleaned);
|
|
@@ -60296,7 +60296,7 @@ class OllamaChatClient {
|
|
|
60296
60296
|
disableThinking;
|
|
60297
60297
|
timeoutMs;
|
|
60298
60298
|
constructor(opts) {
|
|
60299
|
-
this.host = opts.host.replace(
|
|
60299
|
+
this.host = opts.host.replace(/\/{1,512}$/, "");
|
|
60300
60300
|
this.model = opts.model;
|
|
60301
60301
|
this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
60302
60302
|
this.disableThinking = opts.disableThinking ?? true;
|
|
@@ -60427,7 +60427,7 @@ class OllamaEmbeddingClient {
|
|
|
60427
60427
|
fetchImpl;
|
|
60428
60428
|
timeoutMs;
|
|
60429
60429
|
constructor(opts) {
|
|
60430
|
-
this.host = opts.host.replace(
|
|
60430
|
+
this.host = opts.host.replace(/\/{1,512}$/, "");
|
|
60431
60431
|
this.model = opts.model;
|
|
60432
60432
|
this.dim = opts.dim;
|
|
60433
60433
|
this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
@@ -60478,7 +60478,7 @@ class OpenAIChatClient {
|
|
|
60478
60478
|
if (!opts.apiKey)
|
|
60479
60479
|
throw new Error("OpenAIChatClient: apiKey required");
|
|
60480
60480
|
this.apiKey = opts.apiKey;
|
|
60481
|
-
this.baseUrl = opts.baseUrl.replace(
|
|
60481
|
+
this.baseUrl = opts.baseUrl.replace(/\/{1,512}$/, "");
|
|
60482
60482
|
this.model = opts.model;
|
|
60483
60483
|
this.timeoutMs = opts.timeoutMs ?? 60000;
|
|
60484
60484
|
this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
@@ -60624,7 +60624,7 @@ class OpenAIEmbeddingClient {
|
|
|
60624
60624
|
if (!opts.apiKey)
|
|
60625
60625
|
throw new Error("OpenAIEmbeddingClient: apiKey required");
|
|
60626
60626
|
this.apiKey = opts.apiKey;
|
|
60627
|
-
this.baseUrl = opts.baseUrl.replace(
|
|
60627
|
+
this.baseUrl = opts.baseUrl.replace(/\/{1,512}$/, "");
|
|
60628
60628
|
this.model = opts.model;
|
|
60629
60629
|
this.dim = opts.dim;
|
|
60630
60630
|
this.timeoutMs = opts.timeoutMs ?? 60000;
|
|
@@ -60701,7 +60701,7 @@ class OpenRouterChatClient {
|
|
|
60701
60701
|
throw new Error("OpenRouterChatClient: model required");
|
|
60702
60702
|
}
|
|
60703
60703
|
this.apiKey = opts.apiKey;
|
|
60704
|
-
this.baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(
|
|
60704
|
+
this.baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/{1,512}$/, "");
|
|
60705
60705
|
this.model = opts.model;
|
|
60706
60706
|
this.siteUrl = opts.siteUrl;
|
|
60707
60707
|
this.appName = opts.appName;
|
|
@@ -61111,7 +61111,7 @@ function chunkBySections(text, opts = {}) {
|
|
|
61111
61111
|
bodyLines = [];
|
|
61112
61112
|
};
|
|
61113
61113
|
for (const line of lines) {
|
|
61114
|
-
const m = line.match(/^(#{1,6})\s+(
|
|
61114
|
+
const m = line.match(/^(#{1,6})\s+(\S.*)$/);
|
|
61115
61115
|
if (m) {
|
|
61116
61116
|
flush();
|
|
61117
61117
|
currentHeading = m[2] ?? null;
|
|
@@ -61719,7 +61719,10 @@ function* walk(dir) {
|
|
|
61719
61719
|
function stripNonContent(raw) {
|
|
61720
61720
|
let s = raw;
|
|
61721
61721
|
s = s.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
|
|
61722
|
-
|
|
61722
|
+
for (let prev = "";prev !== s; ) {
|
|
61723
|
+
prev = s;
|
|
61724
|
+
s = s.replace(/<!--[\s\S]{0,10000}?-->/g, "");
|
|
61725
|
+
}
|
|
61723
61726
|
s = s.replace(/\n{3,}/g, `
|
|
61724
61727
|
|
|
61725
61728
|
`).trim();
|
|
@@ -61806,7 +61809,7 @@ class CohereReranker {
|
|
|
61806
61809
|
throw new Error("CohereReranker: apiKey required");
|
|
61807
61810
|
this.apiKey = opts.apiKey;
|
|
61808
61811
|
this.model = opts.model ?? "rerank-v3.5";
|
|
61809
|
-
this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(
|
|
61812
|
+
this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(/\/{1,512}$/, "");
|
|
61810
61813
|
this.timeoutMs = opts.timeoutMs ?? 30000;
|
|
61811
61814
|
this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
61812
61815
|
}
|
|
@@ -61852,7 +61855,7 @@ class JinaReranker {
|
|
|
61852
61855
|
throw new Error("JinaReranker: apiKey required");
|
|
61853
61856
|
this.apiKey = opts.apiKey;
|
|
61854
61857
|
this.model = opts.model ?? "jina-reranker-v2-base-multilingual";
|
|
61855
|
-
this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(
|
|
61858
|
+
this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(/\/{1,512}$/, "");
|
|
61856
61859
|
this.timeoutMs = opts.timeoutMs ?? 30000;
|
|
61857
61860
|
this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
61858
61861
|
}
|
|
@@ -62424,7 +62427,7 @@ async function classifyPhoto(opts) {
|
|
|
62424
62427
|
if (!opts.apiKey || opts.apiKey.trim().length === 0) {
|
|
62425
62428
|
throw new Error("classifyPhoto: apiKey required");
|
|
62426
62429
|
}
|
|
62427
|
-
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL2).replace(
|
|
62430
|
+
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL2).replace(/\/{1,512}$/, "");
|
|
62428
62431
|
const fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
62429
62432
|
const mime = opts.mimeType?.trim() ? opts.mimeType : "image/jpeg";
|
|
62430
62433
|
const base643 = Buffer.from(opts.bytes).toString("base64");
|
|
@@ -62519,7 +62522,7 @@ async function extractPassportIdentity(opts) {
|
|
|
62519
62522
|
if (!opts.apiKey || opts.apiKey.trim().length === 0) {
|
|
62520
62523
|
throw new Error("extractPassportIdentity: apiKey required");
|
|
62521
62524
|
}
|
|
62522
|
-
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL2).replace(
|
|
62525
|
+
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL2).replace(/\/{1,512}$/, "");
|
|
62523
62526
|
const fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
62524
62527
|
const mime = opts.mimeType?.trim() ? opts.mimeType : "image/jpeg";
|
|
62525
62528
|
const base643 = Buffer.from(opts.bytes).toString("base64");
|
package/dist/ingest.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,YAAY,EAAa,MAAM,YAAY,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAEjE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAI3C,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,QAAQ,CAAC;IACb,QAAQ,EAAE,eAAe,CAAC;IAC1B,KAAK,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;IAC9B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB;;sEAEkE;IAClE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;iFAC6E;IAC7E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,gBAAgB,CAAC,CA+C1F;AAED;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,EACtC,IAAI,EAAE,UAAU,GACf,OAAO,CAAC,gBAAgB,CAAC,CA4C3B;AAED,MAAM,WAAW,sBAAsB;IACrC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,eAAe,CACnC,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,UAAU,GACf,OAAO,CAAC,sBAAsB,CAAC,CAejC;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAK7E;AAaD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,
|
|
1
|
+
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,YAAY,EAAa,MAAM,YAAY,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAEjE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAI3C,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,QAAQ,CAAC;IACb,QAAQ,EAAE,eAAe,CAAC;IAC1B,KAAK,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;IAC9B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB;;sEAEkE;IAClE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;iFAC6E;IAC7E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,gBAAgB,CAAC,CA+C1F;AAED;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,EACtC,IAAI,EAAE,UAAU,GACf,OAAO,CAAC,gBAAgB,CAAC,CA4C3B;AAED,MAAM,WAAW,sBAAsB;IACrC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,eAAe,CACnC,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,UAAU,GACf,OAAO,CAAC,sBAAsB,CAAC,CAejC;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAK7E;AAaD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAWnD"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chatman-media/kb",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.1",
|
|
4
4
|
"description": "Tenant-scoped Knowledge Base: hybrid retrieval (pgvector + BM25), ingest, answer pipeline, persona/skill composition. LLM I/O живёт в @chatman-media/llm-router.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -56,7 +56,7 @@
|
|
|
56
56
|
"url": "https://github.com/chatman-media/lead-engine/issues"
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
|
-
"@chatman-media/llm-router": "1.
|
|
59
|
+
"@chatman-media/llm-router": "1.1.0",
|
|
60
60
|
"unpdf": "^1.6.2",
|
|
61
61
|
"zod": "^4.4.1"
|
|
62
62
|
},
|
package/src/chunk.ts
CHANGED
|
@@ -137,7 +137,7 @@ export function chunkBySections(text: string, opts: Partial<ChunkOptions> = {}):
|
|
|
137
137
|
};
|
|
138
138
|
|
|
139
139
|
for (const line of lines) {
|
|
140
|
-
const m = line.match(/^(#{1,6})\s+(
|
|
140
|
+
const m = line.match(/^(#{1,6})\s+(\S.*)$/);
|
|
141
141
|
if (m) {
|
|
142
142
|
flush();
|
|
143
143
|
currentHeading = m[2] ?? null;
|
package/src/ingest.ts
CHANGED
|
@@ -187,7 +187,12 @@ function* walk(dir: string): Generator<string> {
|
|
|
187
187
|
export function stripNonContent(raw: string): string {
|
|
188
188
|
let s = raw;
|
|
189
189
|
s = s.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
|
|
190
|
-
|
|
190
|
+
// Strip HTML comments. Bounded quantifier avoids polynomial backtracking;
|
|
191
|
+
// loop until stable so a leftover `<!--` can't survive a single pass.
|
|
192
|
+
for (let prev = ""; prev !== s; ) {
|
|
193
|
+
prev = s;
|
|
194
|
+
s = s.replace(/<!--[\s\S]{0,10000}?-->/g, "");
|
|
195
|
+
}
|
|
191
196
|
s = s.replace(/\n{3,}/g, "\n\n").trim();
|
|
192
197
|
return `${s}\n`;
|
|
193
198
|
}
|
package/src/reranker.ts
CHANGED
|
@@ -49,7 +49,7 @@ export class CohereReranker implements Reranker {
|
|
|
49
49
|
if (!opts.apiKey) throw new Error("CohereReranker: apiKey required");
|
|
50
50
|
this.apiKey = opts.apiKey;
|
|
51
51
|
this.model = opts.model ?? "rerank-v3.5";
|
|
52
|
-
this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(
|
|
52
|
+
this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(/\/{1,512}$/, "");
|
|
53
53
|
this.timeoutMs = opts.timeoutMs ?? 30_000;
|
|
54
54
|
this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
55
55
|
}
|
|
@@ -128,7 +128,7 @@ export class JinaReranker implements Reranker {
|
|
|
128
128
|
if (!opts.apiKey) throw new Error("JinaReranker: apiKey required");
|
|
129
129
|
this.apiKey = opts.apiKey;
|
|
130
130
|
this.model = opts.model ?? "jina-reranker-v2-base-multilingual";
|
|
131
|
-
this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(
|
|
131
|
+
this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(/\/{1,512}$/, "");
|
|
132
132
|
this.timeoutMs = opts.timeoutMs ?? 30_000;
|
|
133
133
|
this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
134
134
|
}
|
package/src/structured-output.ts
CHANGED
package/src/text-style-rules.ts
CHANGED
|
@@ -37,7 +37,7 @@ export interface TextStyleRule {
|
|
|
37
37
|
export const replaceEmDash: TextStyleRule = {
|
|
38
38
|
name: "replace-em-dash",
|
|
39
39
|
description: "U+2014 «—» → «-» (с нормализацией пробелов)",
|
|
40
|
-
apply: (s) => s.replace(/\s
|
|
40
|
+
apply: (s) => s.replace(/\s{0,200}—\s{0,200}/g, " - ").replace(/ {2,}/g, " "),
|
|
41
41
|
};
|
|
42
42
|
|
|
43
43
|
/**
|
package/src/vision.ts
CHANGED
|
@@ -74,7 +74,7 @@ export async function classifyPhoto(opts: ClassifyPhotoOptions): Promise<PhotoCl
|
|
|
74
74
|
if (!opts.apiKey || opts.apiKey.trim().length === 0) {
|
|
75
75
|
throw new Error("classifyPhoto: apiKey required");
|
|
76
76
|
}
|
|
77
|
-
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(
|
|
77
|
+
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/{1,512}$/, "");
|
|
78
78
|
const fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
79
79
|
const mime = opts.mimeType?.trim() ? opts.mimeType : "image/jpeg";
|
|
80
80
|
const base64 = Buffer.from(opts.bytes).toString("base64");
|
|
@@ -210,7 +210,7 @@ export async function extractPassportIdentity(
|
|
|
210
210
|
if (!opts.apiKey || opts.apiKey.trim().length === 0) {
|
|
211
211
|
throw new Error("extractPassportIdentity: apiKey required");
|
|
212
212
|
}
|
|
213
|
-
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(
|
|
213
|
+
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/{1,512}$/, "");
|
|
214
214
|
const fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
|
|
215
215
|
const mime = opts.mimeType?.trim() ? opts.mimeType : "image/jpeg";
|
|
216
216
|
const base64 = Buffer.from(opts.bytes).toString("base64");
|