@chatman-media/kb 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -44647,7 +44647,7 @@ var NO_CONTEXT_MARKER = "__NO_CONTEXT__";
44647
44647
  var replaceEmDash = {
44648
44648
  name: "replace-em-dash",
44649
44649
  description: "U+2014 \xAB\u2014\xBB \u2192 \xAB-\xBB (\u0441 \u043D\u043E\u0440\u043C\u0430\u043B\u0438\u0437\u0430\u0446\u0438\u0435\u0439 \u043F\u0440\u043E\u0431\u0435\u043B\u043E\u0432)",
44650
- apply: (s) => s.replace(/\s*\u2014\s*/g, " - ").replace(/ {2,}/g, " ")
44650
+ apply: (s) => s.replace(/\s{0,200}\u2014\s{0,200}/g, " - ").replace(/ {2,}/g, " ")
44651
44651
  };
44652
44652
  var replaceEnDash = {
44653
44653
  name: "replace-en-dash",
@@ -59649,7 +59649,7 @@ Schema:
59649
59649
  ${schemaStr}`;
59650
59650
  }
59651
59651
  function parseStructuredOutput(raw, schema) {
59652
- const cleaned = raw.trim().replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
59652
+ const cleaned = raw.trim().replace(/^```(?:json)?\s*/i, "").replace(/\s{0,1000}```$/, "").trim();
59653
59653
  let parsed;
59654
59654
  try {
59655
59655
  parsed = JSON.parse(cleaned);
@@ -60296,7 +60296,7 @@ class OllamaChatClient {
60296
60296
  disableThinking;
60297
60297
  timeoutMs;
60298
60298
  constructor(opts) {
60299
- this.host = opts.host.replace(/\/+$/, "");
60299
+ this.host = opts.host.replace(/\/{1,512}$/, "");
60300
60300
  this.model = opts.model;
60301
60301
  this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
60302
60302
  this.disableThinking = opts.disableThinking ?? true;
@@ -60427,7 +60427,7 @@ class OllamaEmbeddingClient {
60427
60427
  fetchImpl;
60428
60428
  timeoutMs;
60429
60429
  constructor(opts) {
60430
- this.host = opts.host.replace(/\/+$/, "");
60430
+ this.host = opts.host.replace(/\/{1,512}$/, "");
60431
60431
  this.model = opts.model;
60432
60432
  this.dim = opts.dim;
60433
60433
  this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
@@ -60478,7 +60478,7 @@ class OpenAIChatClient {
60478
60478
  if (!opts.apiKey)
60479
60479
  throw new Error("OpenAIChatClient: apiKey required");
60480
60480
  this.apiKey = opts.apiKey;
60481
- this.baseUrl = opts.baseUrl.replace(/\/+$/, "");
60481
+ this.baseUrl = opts.baseUrl.replace(/\/{1,512}$/, "");
60482
60482
  this.model = opts.model;
60483
60483
  this.timeoutMs = opts.timeoutMs ?? 60000;
60484
60484
  this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
@@ -60624,7 +60624,7 @@ class OpenAIEmbeddingClient {
60624
60624
  if (!opts.apiKey)
60625
60625
  throw new Error("OpenAIEmbeddingClient: apiKey required");
60626
60626
  this.apiKey = opts.apiKey;
60627
- this.baseUrl = opts.baseUrl.replace(/\/+$/, "");
60627
+ this.baseUrl = opts.baseUrl.replace(/\/{1,512}$/, "");
60628
60628
  this.model = opts.model;
60629
60629
  this.dim = opts.dim;
60630
60630
  this.timeoutMs = opts.timeoutMs ?? 60000;
@@ -60701,7 +60701,7 @@ class OpenRouterChatClient {
60701
60701
  throw new Error("OpenRouterChatClient: model required");
60702
60702
  }
60703
60703
  this.apiKey = opts.apiKey;
60704
- this.baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
60704
+ this.baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/{1,512}$/, "");
60705
60705
  this.model = opts.model;
60706
60706
  this.siteUrl = opts.siteUrl;
60707
60707
  this.appName = opts.appName;
@@ -61111,7 +61111,7 @@ function chunkBySections(text, opts = {}) {
61111
61111
  bodyLines = [];
61112
61112
  };
61113
61113
  for (const line of lines) {
61114
- const m = line.match(/^(#{1,6})\s+(.+)$/);
61114
+ const m = line.match(/^(#{1,6})\s+(\S.*)$/);
61115
61115
  if (m) {
61116
61116
  flush();
61117
61117
  currentHeading = m[2] ?? null;
@@ -61719,7 +61719,10 @@ function* walk(dir) {
61719
61719
  function stripNonContent(raw) {
61720
61720
  let s = raw;
61721
61721
  s = s.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
61722
- s = s.replace(/<!--[\s\S]*?-->/g, "");
61722
+ for (let prev = "";prev !== s; ) {
61723
+ prev = s;
61724
+ s = s.replace(/<!--[\s\S]{0,10000}?-->/g, "");
61725
+ }
61723
61726
  s = s.replace(/\n{3,}/g, `
61724
61727
 
61725
61728
  `).trim();
@@ -61806,7 +61809,7 @@ class CohereReranker {
61806
61809
  throw new Error("CohereReranker: apiKey required");
61807
61810
  this.apiKey = opts.apiKey;
61808
61811
  this.model = opts.model ?? "rerank-v3.5";
61809
- this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(/\/+$/, "");
61812
+ this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(/\/{1,512}$/, "");
61810
61813
  this.timeoutMs = opts.timeoutMs ?? 30000;
61811
61814
  this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
61812
61815
  }
@@ -61852,7 +61855,7 @@ class JinaReranker {
61852
61855
  throw new Error("JinaReranker: apiKey required");
61853
61856
  this.apiKey = opts.apiKey;
61854
61857
  this.model = opts.model ?? "jina-reranker-v2-base-multilingual";
61855
- this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(/\/+$/, "");
61858
+ this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(/\/{1,512}$/, "");
61856
61859
  this.timeoutMs = opts.timeoutMs ?? 30000;
61857
61860
  this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
61858
61861
  }
@@ -62424,7 +62427,7 @@ async function classifyPhoto(opts) {
62424
62427
  if (!opts.apiKey || opts.apiKey.trim().length === 0) {
62425
62428
  throw new Error("classifyPhoto: apiKey required");
62426
62429
  }
62427
- const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL2).replace(/\/+$/, "");
62430
+ const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL2).replace(/\/{1,512}$/, "");
62428
62431
  const fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
62429
62432
  const mime = opts.mimeType?.trim() ? opts.mimeType : "image/jpeg";
62430
62433
  const base643 = Buffer.from(opts.bytes).toString("base64");
@@ -62519,7 +62522,7 @@ async function extractPassportIdentity(opts) {
62519
62522
  if (!opts.apiKey || opts.apiKey.trim().length === 0) {
62520
62523
  throw new Error("extractPassportIdentity: apiKey required");
62521
62524
  }
62522
- const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL2).replace(/\/+$/, "");
62525
+ const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL2).replace(/\/{1,512}$/, "");
62523
62526
  const fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
62524
62527
  const mime = opts.mimeType?.trim() ? opts.mimeType : "image/jpeg";
62525
62528
  const base643 = Buffer.from(opts.bytes).toString("base64");
@@ -1 +1 @@
1
- {"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,YAAY,EAAa,MAAM,YAAY,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAEjE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAI3C,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,QAAQ,CAAC;IACb,QAAQ,EAAE,eAAe,CAAC;IAC1B,KAAK,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;IAC9B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB;;sEAEkE;IAClE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;iFAC6E;IAC7E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,gBAAgB,CAAC,CA+C1F;AAED;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,EACtC,IAAI,EAAE,UAAU,GACf,OAAO,CAAC,gBAAgB,CAAC,CA4C3B;AAED,MAAM,WAAW,sBAAsB;IACrC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,eAAe,CACnC,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,UAAU,GACf,OAAO,CAAC,sBAAsB,CAAC,CAejC;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAK7E;AAaD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAMnD"}
1
+ {"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,YAAY,EAAa,MAAM,YAAY,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAEjE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAI3C,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,QAAQ,CAAC;IACb,QAAQ,EAAE,eAAe,CAAC;IAC1B,KAAK,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;IAC9B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB;;sEAEkE;IAClE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;iFAC6E;IAC7E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,gBAAgB,CAAC,CA+C1F;AAED;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,EACtC,IAAI,EAAE,UAAU,GACf,OAAO,CAAC,gBAAgB,CAAC,CA4C3B;AAED,MAAM,WAAW,sBAAsB;IACrC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,eAAe,CACnC,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,UAAU,GACf,OAAO,CAAC,sBAAsB,CAAC,CAejC;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAK7E;AAaD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAWnD"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chatman-media/kb",
3
- "version": "1.3.0",
3
+ "version": "1.3.1",
4
4
  "description": "Tenant-scoped Knowledge Base: hybrid retrieval (pgvector + BM25), ingest, answer pipeline, persona/skill composition. LLM I/O живёт в @chatman-media/llm-router.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -56,7 +56,7 @@
56
56
  "url": "https://github.com/chatman-media/lead-engine/issues"
57
57
  },
58
58
  "dependencies": {
59
- "@chatman-media/llm-router": "1.0.0",
59
+ "@chatman-media/llm-router": "1.1.0",
60
60
  "unpdf": "^1.6.2",
61
61
  "zod": "^4.4.1"
62
62
  },
package/src/chunk.ts CHANGED
@@ -137,7 +137,7 @@ export function chunkBySections(text: string, opts: Partial<ChunkOptions> = {}):
137
137
  };
138
138
 
139
139
  for (const line of lines) {
140
- const m = line.match(/^(#{1,6})\s+(.+)$/);
140
+ const m = line.match(/^(#{1,6})\s+(\S.*)$/);
141
141
  if (m) {
142
142
  flush();
143
143
  currentHeading = m[2] ?? null;
package/src/ingest.ts CHANGED
@@ -187,7 +187,12 @@ function* walk(dir: string): Generator<string> {
187
187
  export function stripNonContent(raw: string): string {
188
188
  let s = raw;
189
189
  s = s.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
190
- s = s.replace(/<!--[\s\S]*?-->/g, "");
190
+ // Strip HTML comments. Bounded quantifier avoids polynomial backtracking;
191
+ // loop until stable so a leftover `<!--` can't survive a single pass.
192
+ for (let prev = ""; prev !== s; ) {
193
+ prev = s;
194
+ s = s.replace(/<!--[\s\S]{0,10000}?-->/g, "");
195
+ }
191
196
  s = s.replace(/\n{3,}/g, "\n\n").trim();
192
197
  return `${s}\n`;
193
198
  }
package/src/reranker.ts CHANGED
@@ -49,7 +49,7 @@ export class CohereReranker implements Reranker {
49
49
  if (!opts.apiKey) throw new Error("CohereReranker: apiKey required");
50
50
  this.apiKey = opts.apiKey;
51
51
  this.model = opts.model ?? "rerank-v3.5";
52
- this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(/\/+$/, "");
52
+ this.baseUrl = (opts.baseUrl ?? "https://api.cohere.com/v2").replace(/\/{1,512}$/, "");
53
53
  this.timeoutMs = opts.timeoutMs ?? 30_000;
54
54
  this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
55
55
  }
@@ -128,7 +128,7 @@ export class JinaReranker implements Reranker {
128
128
  if (!opts.apiKey) throw new Error("JinaReranker: apiKey required");
129
129
  this.apiKey = opts.apiKey;
130
130
  this.model = opts.model ?? "jina-reranker-v2-base-multilingual";
131
- this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(/\/+$/, "");
131
+ this.baseUrl = (opts.baseUrl ?? "https://api.jina.ai/v1").replace(/\/{1,512}$/, "");
132
132
  this.timeoutMs = opts.timeoutMs ?? 30_000;
133
133
  this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
134
134
  }
@@ -24,7 +24,7 @@ export function parseStructuredOutput<T extends z.ZodTypeAny>(
24
24
  const cleaned = raw
25
25
  .trim()
26
26
  .replace(/^```(?:json)?\s*/i, "")
27
- .replace(/\s*```$/, "")
27
+ .replace(/\s{0,1000}```$/, "")
28
28
  .trim();
29
29
 
30
30
  let parsed: unknown;
@@ -37,7 +37,7 @@ export interface TextStyleRule {
37
37
  export const replaceEmDash: TextStyleRule = {
38
38
  name: "replace-em-dash",
39
39
  description: "U+2014 «—» → «-» (с нормализацией пробелов)",
40
- apply: (s) => s.replace(/\s*—\s*/g, " - ").replace(/ {2,}/g, " "),
40
+ apply: (s) => s.replace(/\s{0,200}—\s{0,200}/g, " - ").replace(/ {2,}/g, " "),
41
41
  };
42
42
 
43
43
  /**
package/src/vision.ts CHANGED
@@ -74,7 +74,7 @@ export async function classifyPhoto(opts: ClassifyPhotoOptions): Promise<PhotoCl
74
74
  if (!opts.apiKey || opts.apiKey.trim().length === 0) {
75
75
  throw new Error("classifyPhoto: apiKey required");
76
76
  }
77
- const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
77
+ const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/{1,512}$/, "");
78
78
  const fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
79
79
  const mime = opts.mimeType?.trim() ? opts.mimeType : "image/jpeg";
80
80
  const base64 = Buffer.from(opts.bytes).toString("base64");
@@ -210,7 +210,7 @@ export async function extractPassportIdentity(
210
210
  if (!opts.apiKey || opts.apiKey.trim().length === 0) {
211
211
  throw new Error("extractPassportIdentity: apiKey required");
212
212
  }
213
- const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
213
+ const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/{1,512}$/, "");
214
214
  const fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
215
215
  const mime = opts.mimeType?.trim() ? opts.mimeType : "image/jpeg";
216
216
  const base64 = Buffer.from(opts.bytes).toString("base64");