gencow 0.1.140 → 0.1.141

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gencow",
3
- "version": "0.1.140",
3
+ "version": "0.1.141",
4
4
  "description": "Gencow — AI Backend Engine",
5
5
  "type": "module",
6
6
  "bin": {
package/server/index.js CHANGED
@@ -130352,6 +130352,7 @@ var MAX_SEARCH_LIMIT = 100;
130352
130352
  var RRF_K = 60;
130353
130353
  var IDENTIFIER_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
130354
130354
  var DEFAULT_FUSION_WEIGHT = 1;
130355
+ var RELAXED_LEXICAL_TERM_RE = /[\p{L}\p{N}]+/gu;
130355
130356
  var INTERNAL_SEARCH_ROW_KEYS = ["score", "keyword_score", "vector_score", "fused_score"];
130356
130357
  var SearchRuntimeError = class extends Error {
130357
130358
  status;
@@ -130438,20 +130439,45 @@ function buildFieldConcat(fields) {
130438
130439
  const chunks = fields.map((field) => sql10.raw(`coalesce(${requireSafeIdentifier(field)}::text, '')`));
130439
130440
  return joinSql2(chunks, ` || ' ' || `);
130440
130441
  }
130442
+ function shouldUseRelaxedLexicalFallback(query2) {
130443
+ return /[^\x00-\x7F]/.test(query2);
130444
+ }
130445
+ function extractRelaxedLexicalTerms(query2) {
130446
+ const terms = query2.toLowerCase().match(RELAXED_LEXICAL_TERM_RE) ?? [];
130447
+ return [...new Set(terms.filter((term) => term.length > 1))];
130448
+ }
130449
+ function buildRelaxedLexicalClause(fields, query2) {
130450
+ const terms = extractRelaxedLexicalTerms(query2);
130451
+ if (terms.length === 0) return sql10`false`;
130452
+ const fieldConcat = buildFieldConcat(fields);
130453
+ const clauses = terms.map((term) => sql10`${fieldConcat} ILIKE ${`%${term}%`}`);
130454
+ return joinSql2(clauses, " OR ");
130455
+ }
130456
+ function buildRelaxedLexicalScore(fields, query2) {
130457
+ const terms = extractRelaxedLexicalTerms(query2);
130458
+ if (terms.length === 0) return sql10`0::double precision`;
130459
+ const fieldConcat = buildFieldConcat(fields);
130460
+ const scoreParts = terms.map((term) => sql10`CASE WHEN ${fieldConcat} ILIKE ${`%${term}%`} THEN 1 ELSE 0 END`);
130461
+ return sql10`((${joinSql2(scoreParts, " + ")})::double precision / ${terms.length})`;
130462
+ }
130441
130463
  function buildLexicalClause(engine, fields, query2) {
130442
130464
  if (engine === "pgroonga") {
130443
130465
  const fieldClauses = fields.map((field) => sql10`${sql10.raw(requireSafeIdentifier(field))} &@~ ${query2}`);
130444
130466
  return joinSql2(fieldClauses, " OR ");
130445
130467
  }
130446
130468
  const fieldConcat = buildFieldConcat(fields);
130447
- return sql10`to_tsvector('english', ${fieldConcat}) @@ plainto_tsquery('english', ${query2})`;
130469
+ const strictClause = sql10`to_tsvector('english', ${fieldConcat}) @@ plainto_tsquery('english', ${query2})`;
130470
+ if (!shouldUseRelaxedLexicalFallback(query2)) return strictClause;
130471
+ return sql10`(${strictClause} OR ${buildRelaxedLexicalClause(fields, query2)})`;
130448
130472
  }
130449
130473
  function buildLexicalScore(engine, fields, query2) {
130450
130474
  if (engine === "pgroonga") {
130451
130475
  return sql10`pgroonga_score(tableoid, ctid)::double precision`;
130452
130476
  }
130453
130477
  const fieldConcat = buildFieldConcat(fields);
130454
- return sql10`ts_rank(to_tsvector('english', ${fieldConcat}), plainto_tsquery('english', ${query2}))::double precision`;
130478
+ const strictScore = sql10`ts_rank(to_tsvector('english', ${fieldConcat}), plainto_tsquery('english', ${query2}))::double precision`;
130479
+ if (!shouldUseRelaxedLexicalFallback(query2)) return strictScore;
130480
+ return sql10`GREATEST(${strictScore}, ${buildRelaxedLexicalScore(fields, query2)})::double precision`;
130455
130481
  }
130456
130482
  function buildScopeClauses(auth, scope) {
130457
130483
  if (!scope.corpus.trim()) {