searchsocket 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3,12 +3,12 @@ import path from 'path';
3
3
  import { createJiti } from 'jiti';
4
4
  import { z } from 'zod';
5
5
  import { execSync, spawn } from 'child_process';
6
- import pLimit2 from 'p-limit';
7
6
  import { createHash } from 'crypto';
8
7
  import { load } from 'cheerio';
9
8
  import matter from 'gray-matter';
10
- import fs4 from 'fs/promises';
11
9
  import fg from 'fast-glob';
10
+ import pLimit from 'p-limit';
11
+ import fs3 from 'fs/promises';
12
12
  import net from 'net';
13
13
  import { gunzipSync } from 'zlib';
14
14
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
@@ -2759,12 +2759,12 @@ var require_ChildNode = __commonJS({
2759
2759
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/ChildNode.js"(exports$1, module) {
2760
2760
  var Node2 = require_Node();
2761
2761
  var LinkedList = require_LinkedList();
2762
- var createDocumentFragmentFromArguments = function(document, args) {
2763
- var docFrag = document.createDocumentFragment();
2762
+ var createDocumentFragmentFromArguments = function(document2, args) {
2763
+ var docFrag = document2.createDocumentFragment();
2764
2764
  for (var i = 0; i < args.length; i++) {
2765
2765
  var argItem = args[i];
2766
2766
  var isNode = argItem instanceof Node2;
2767
- docFrag.appendChild(isNode ? argItem : document.createTextNode(String(argItem)));
2767
+ docFrag.appendChild(isNode ? argItem : document2.createTextNode(String(argItem)));
2768
2768
  }
2769
2769
  return docFrag;
2770
2770
  };
@@ -2922,7 +2922,7 @@ var require_NamedNodeMap = __commonJS({
2922
2922
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js
2923
2923
  var require_Element = __commonJS({
2924
2924
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js"(exports$1, module) {
2925
- module.exports = Element;
2925
+ module.exports = Element2;
2926
2926
  var xml = require_xmlnames();
2927
2927
  var utils = require_utils();
2928
2928
  var NAMESPACE = utils.NAMESPACE;
@@ -2939,7 +2939,7 @@ var require_Element = __commonJS({
2939
2939
  var NonDocumentTypeChildNode = require_NonDocumentTypeChildNode();
2940
2940
  var NamedNodeMap = require_NamedNodeMap();
2941
2941
  var uppercaseCache = /* @__PURE__ */ Object.create(null);
2942
- function Element(doc, localName, namespaceURI, prefix) {
2942
+ function Element2(doc, localName, namespaceURI, prefix) {
2943
2943
  ContainerNode.call(this);
2944
2944
  this.nodeType = Node2.ELEMENT_NODE;
2945
2945
  this.ownerDocument = doc;
@@ -2959,7 +2959,7 @@ var require_Element = __commonJS({
2959
2959
  recursiveGetText(node.childNodes[i], a);
2960
2960
  }
2961
2961
  }
2962
- Element.prototype = Object.create(ContainerNode.prototype, {
2962
+ Element2.prototype = Object.create(ContainerNode.prototype, {
2963
2963
  isHTML: { get: function isHTML() {
2964
2964
  return this.namespaceURI === NAMESPACE.HTML && this.ownerDocument.isHTML;
2965
2965
  } },
@@ -3029,7 +3029,7 @@ var require_Element = __commonJS({
3029
3029
  return NodeUtils.serializeOne(this, { nodeType: 0 });
3030
3030
  },
3031
3031
  set: function(v) {
3032
- var document = this.ownerDocument;
3032
+ var document2 = this.ownerDocument;
3033
3033
  var parent = this.parentNode;
3034
3034
  if (parent === null) {
3035
3035
  return;
@@ -3040,8 +3040,8 @@ var require_Element = __commonJS({
3040
3040
  if (parent.nodeType === Node2.DOCUMENT_FRAGMENT_NODE) {
3041
3041
  parent = parent.ownerDocument.createElement("body");
3042
3042
  }
3043
- var parser = document.implementation.mozHTMLParser(
3044
- document._address,
3043
+ var parser = document2.implementation.mozHTMLParser(
3044
+ document2._address,
3045
3045
  parent
3046
3046
  );
3047
3047
  parser.parse(v === null ? "" : String(v), true);
@@ -3100,7 +3100,7 @@ var require_Element = __commonJS({
3100
3100
  default:
3101
3101
  utils.SyntaxError();
3102
3102
  }
3103
- if (!(context instanceof Element) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
3103
+ if (!(context instanceof Element2) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
3104
3104
  context = context.ownerDocument.createElementNS(NAMESPACE.HTML, "body");
3105
3105
  }
3106
3106
  var parser = this.ownerDocument.implementation.mozHTMLParser(
@@ -3708,10 +3708,10 @@ var require_Element = __commonJS({
3708
3708
  return nodes.item ? nodes : new NodeList(nodes);
3709
3709
  } }
3710
3710
  });
3711
- Object.defineProperties(Element.prototype, ChildNode);
3712
- Object.defineProperties(Element.prototype, NonDocumentTypeChildNode);
3711
+ Object.defineProperties(Element2.prototype, ChildNode);
3712
+ Object.defineProperties(Element2.prototype, NonDocumentTypeChildNode);
3713
3713
  attributes.registerChangeHandler(
3714
- Element,
3714
+ Element2,
3715
3715
  "id",
3716
3716
  function(element, lname, oldval, newval) {
3717
3717
  if (element.rooted) {
@@ -3725,7 +3725,7 @@ var require_Element = __commonJS({
3725
3725
  }
3726
3726
  );
3727
3727
  attributes.registerChangeHandler(
3728
- Element,
3728
+ Element2,
3729
3729
  "class",
3730
3730
  function(element, lname, oldval, newval) {
3731
3731
  if (element._classList) {
@@ -3824,7 +3824,7 @@ var require_Element = __commonJS({
3824
3824
  }
3825
3825
  }
3826
3826
  });
3827
- Element._Attr = Attr;
3827
+ Element2._Attr = Attr;
3828
3828
  function AttributesArray(elt) {
3829
3829
  NamedNodeMap.call(this, elt);
3830
3830
  for (var name in elt._attrsByQName) {
@@ -4226,7 +4226,7 @@ var require_DocumentFragment = __commonJS({
4226
4226
  var Node2 = require_Node();
4227
4227
  var NodeList = require_NodeList();
4228
4228
  var ContainerNode = require_ContainerNode();
4229
- var Element = require_Element();
4229
+ var Element2 = require_Element();
4230
4230
  var select = require_select();
4231
4231
  var utils = require_utils();
4232
4232
  function DocumentFragment(doc) {
@@ -4244,9 +4244,9 @@ var require_DocumentFragment = __commonJS({
4244
4244
  }
4245
4245
  },
4246
4246
  // Copy the text content getter/setter from Element
4247
- textContent: Object.getOwnPropertyDescriptor(Element.prototype, "textContent"),
4247
+ textContent: Object.getOwnPropertyDescriptor(Element2.prototype, "textContent"),
4248
4248
  // Copy the text content getter/setter from Element
4249
- innerText: Object.getOwnPropertyDescriptor(Element.prototype, "innerText"),
4249
+ innerText: Object.getOwnPropertyDescriptor(Element2.prototype, "innerText"),
4250
4250
  querySelector: { value: function(selector) {
4251
4251
  var nodes = this.querySelectorAll(selector);
4252
4252
  return nodes.length ? nodes[0] : null;
@@ -4254,8 +4254,8 @@ var require_DocumentFragment = __commonJS({
4254
4254
  querySelectorAll: { value: function(selector) {
4255
4255
  var context = Object.create(this);
4256
4256
  context.isHTML = true;
4257
- context.getElementsByTagName = Element.prototype.getElementsByTagName;
4258
- context.nextElement = Object.getOwnPropertyDescriptor(Element.prototype, "firstElementChild").get;
4257
+ context.getElementsByTagName = Element2.prototype.getElementsByTagName;
4258
+ context.nextElement = Object.getOwnPropertyDescriptor(Element2.prototype, "firstElementChild").get;
4259
4259
  var nodes = select(selector, context);
4260
4260
  return nodes.item ? nodes : new NodeList(nodes);
4261
4261
  } },
@@ -4337,7 +4337,7 @@ var require_ProcessingInstruction = __commonJS({
4337
4337
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js
4338
4338
  var require_NodeFilter = __commonJS({
4339
4339
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js"(exports$1, module) {
4340
- var NodeFilter = {
4340
+ var NodeFilter2 = {
4341
4341
  // Constants for acceptNode()
4342
4342
  FILTER_ACCEPT: 1,
4343
4343
  FILTER_REJECT: 2,
@@ -4362,7 +4362,7 @@ var require_NodeFilter = __commonJS({
4362
4362
  SHOW_NOTATION: 2048
4363
4363
  // historical
4364
4364
  };
4365
- module.exports = NodeFilter.constructor = NodeFilter.prototype = NodeFilter;
4365
+ module.exports = NodeFilter2.constructor = NodeFilter2.prototype = NodeFilter2;
4366
4366
  }
4367
4367
  });
4368
4368
 
@@ -4437,7 +4437,7 @@ var require_TreeWalker = __commonJS({
4437
4437
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/TreeWalker.js"(exports$1, module) {
4438
4438
  module.exports = TreeWalker;
4439
4439
  var Node2 = require_Node();
4440
- var NodeFilter = require_NodeFilter();
4440
+ var NodeFilter2 = require_NodeFilter();
4441
4441
  var NodeTraversal = require_NodeTraversal();
4442
4442
  var utils = require_utils();
4443
4443
  var mapChild = {
@@ -4457,11 +4457,11 @@ var require_TreeWalker = __commonJS({
4457
4457
  node = tw._currentNode[mapChild[type]];
4458
4458
  while (node !== null) {
4459
4459
  result = tw._internalFilter(node);
4460
- if (result === NodeFilter.FILTER_ACCEPT) {
4460
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4461
4461
  tw._currentNode = node;
4462
4462
  return node;
4463
4463
  }
4464
- if (result === NodeFilter.FILTER_SKIP) {
4464
+ if (result === NodeFilter2.FILTER_SKIP) {
4465
4465
  child = node[mapChild[type]];
4466
4466
  if (child !== null) {
4467
4467
  node = child;
@@ -4495,12 +4495,12 @@ var require_TreeWalker = __commonJS({
4495
4495
  while (sibling !== null) {
4496
4496
  node = sibling;
4497
4497
  result = tw._internalFilter(node);
4498
- if (result === NodeFilter.FILTER_ACCEPT) {
4498
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4499
4499
  tw._currentNode = node;
4500
4500
  return node;
4501
4501
  }
4502
4502
  sibling = node[mapChild[type]];
4503
- if (result === NodeFilter.FILTER_REJECT || sibling === null) {
4503
+ if (result === NodeFilter2.FILTER_REJECT || sibling === null) {
4504
4504
  sibling = node[mapSibling[type]];
4505
4505
  }
4506
4506
  }
@@ -4508,7 +4508,7 @@ var require_TreeWalker = __commonJS({
4508
4508
  if (node === null || node === tw.root) {
4509
4509
  return null;
4510
4510
  }
4511
- if (tw._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4511
+ if (tw._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4512
4512
  return null;
4513
4513
  }
4514
4514
  }
@@ -4556,11 +4556,11 @@ var require_TreeWalker = __commonJS({
4556
4556
  utils.InvalidStateError();
4557
4557
  }
4558
4558
  if (!(1 << node.nodeType - 1 & this._whatToShow)) {
4559
- return NodeFilter.FILTER_SKIP;
4559
+ return NodeFilter2.FILTER_SKIP;
4560
4560
  }
4561
4561
  filter = this._filter;
4562
4562
  if (filter === null) {
4563
- result = NodeFilter.FILTER_ACCEPT;
4563
+ result = NodeFilter2.FILTER_ACCEPT;
4564
4564
  } else {
4565
4565
  this._active = true;
4566
4566
  try {
@@ -4589,7 +4589,7 @@ var require_TreeWalker = __commonJS({
4589
4589
  if (node === null) {
4590
4590
  return null;
4591
4591
  }
4592
- if (this._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4592
+ if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4593
4593
  this._currentNode = node;
4594
4594
  return node;
4595
4595
  }
@@ -4642,17 +4642,17 @@ var require_TreeWalker = __commonJS({
4642
4642
  for (previousSibling = node.previousSibling; previousSibling; previousSibling = node.previousSibling) {
4643
4643
  node = previousSibling;
4644
4644
  result = this._internalFilter(node);
4645
- if (result === NodeFilter.FILTER_REJECT) {
4645
+ if (result === NodeFilter2.FILTER_REJECT) {
4646
4646
  continue;
4647
4647
  }
4648
4648
  for (lastChild = node.lastChild; lastChild; lastChild = node.lastChild) {
4649
4649
  node = lastChild;
4650
4650
  result = this._internalFilter(node);
4651
- if (result === NodeFilter.FILTER_REJECT) {
4651
+ if (result === NodeFilter2.FILTER_REJECT) {
4652
4652
  break;
4653
4653
  }
4654
4654
  }
4655
- if (result === NodeFilter.FILTER_ACCEPT) {
4655
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4656
4656
  this._currentNode = node;
4657
4657
  return node;
4658
4658
  }
@@ -4661,7 +4661,7 @@ var require_TreeWalker = __commonJS({
4661
4661
  return null;
4662
4662
  }
4663
4663
  node = node.parentNode;
4664
- if (this._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4664
+ if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4665
4665
  this._currentNode = node;
4666
4666
  return node;
4667
4667
  }
@@ -4678,26 +4678,26 @@ var require_TreeWalker = __commonJS({
4678
4678
  nextNode: { value: function nextNode() {
4679
4679
  var node, result, firstChild, nextSibling;
4680
4680
  node = this._currentNode;
4681
- result = NodeFilter.FILTER_ACCEPT;
4681
+ result = NodeFilter2.FILTER_ACCEPT;
4682
4682
  CHILDREN:
4683
4683
  while (true) {
4684
4684
  for (firstChild = node.firstChild; firstChild; firstChild = node.firstChild) {
4685
4685
  node = firstChild;
4686
4686
  result = this._internalFilter(node);
4687
- if (result === NodeFilter.FILTER_ACCEPT) {
4687
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4688
4688
  this._currentNode = node;
4689
4689
  return node;
4690
- } else if (result === NodeFilter.FILTER_REJECT) {
4690
+ } else if (result === NodeFilter2.FILTER_REJECT) {
4691
4691
  break;
4692
4692
  }
4693
4693
  }
4694
4694
  for (nextSibling = NodeTraversal.nextSkippingChildren(node, this.root); nextSibling; nextSibling = NodeTraversal.nextSkippingChildren(node, this.root)) {
4695
4695
  node = nextSibling;
4696
4696
  result = this._internalFilter(node);
4697
- if (result === NodeFilter.FILTER_ACCEPT) {
4697
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4698
4698
  this._currentNode = node;
4699
4699
  return node;
4700
- } else if (result === NodeFilter.FILTER_SKIP) {
4700
+ } else if (result === NodeFilter2.FILTER_SKIP) {
4701
4701
  continue CHILDREN;
4702
4702
  }
4703
4703
  }
@@ -4716,7 +4716,7 @@ var require_TreeWalker = __commonJS({
4716
4716
  var require_NodeIterator = __commonJS({
4717
4717
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeIterator.js"(exports$1, module) {
4718
4718
  module.exports = NodeIterator;
4719
- var NodeFilter = require_NodeFilter();
4719
+ var NodeFilter2 = require_NodeFilter();
4720
4720
  var NodeTraversal = require_NodeTraversal();
4721
4721
  var utils = require_utils();
4722
4722
  function move(node, stayWithin, directionIsNext) {
@@ -4751,7 +4751,7 @@ var require_NodeIterator = __commonJS({
4751
4751
  }
4752
4752
  }
4753
4753
  var result = ni._internalFilter(node);
4754
- if (result === NodeFilter.FILTER_ACCEPT) {
4754
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4755
4755
  break;
4756
4756
  }
4757
4757
  }
@@ -4799,11 +4799,11 @@ var require_NodeIterator = __commonJS({
4799
4799
  utils.InvalidStateError();
4800
4800
  }
4801
4801
  if (!(1 << node.nodeType - 1 & this._whatToShow)) {
4802
- return NodeFilter.FILTER_SKIP;
4802
+ return NodeFilter2.FILTER_SKIP;
4803
4803
  }
4804
4804
  filter = this._filter;
4805
4805
  if (filter === null) {
4806
- result = NodeFilter.FILTER_ACCEPT;
4806
+ result = NodeFilter2.FILTER_ACCEPT;
4807
4807
  } else {
4808
4808
  this._active = true;
4809
4809
  try {
@@ -5013,32 +5013,32 @@ var require_URL = __commonJS({
5013
5013
  else
5014
5014
  return basepath.substring(0, lastslash + 1) + refpath;
5015
5015
  }
5016
- function remove_dot_segments(path15) {
5017
- if (!path15) return path15;
5016
+ function remove_dot_segments(path13) {
5017
+ if (!path13) return path13;
5018
5018
  var output = "";
5019
- while (path15.length > 0) {
5020
- if (path15 === "." || path15 === "..") {
5021
- path15 = "";
5019
+ while (path13.length > 0) {
5020
+ if (path13 === "." || path13 === "..") {
5021
+ path13 = "";
5022
5022
  break;
5023
5023
  }
5024
- var twochars = path15.substring(0, 2);
5025
- var threechars = path15.substring(0, 3);
5026
- var fourchars = path15.substring(0, 4);
5024
+ var twochars = path13.substring(0, 2);
5025
+ var threechars = path13.substring(0, 3);
5026
+ var fourchars = path13.substring(0, 4);
5027
5027
  if (threechars === "../") {
5028
- path15 = path15.substring(3);
5028
+ path13 = path13.substring(3);
5029
5029
  } else if (twochars === "./") {
5030
- path15 = path15.substring(2);
5030
+ path13 = path13.substring(2);
5031
5031
  } else if (threechars === "/./") {
5032
- path15 = "/" + path15.substring(3);
5033
- } else if (twochars === "/." && path15.length === 2) {
5034
- path15 = "/";
5035
- } else if (fourchars === "/../" || threechars === "/.." && path15.length === 3) {
5036
- path15 = "/" + path15.substring(4);
5032
+ path13 = "/" + path13.substring(3);
5033
+ } else if (twochars === "/." && path13.length === 2) {
5034
+ path13 = "/";
5035
+ } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5036
+ path13 = "/" + path13.substring(4);
5037
5037
  output = output.replace(/\/?[^\/]*$/, "");
5038
5038
  } else {
5039
- var segment = path15.match(/(\/?([^\/]*))/)[0];
5039
+ var segment = path13.match(/(\/?([^\/]*))/)[0];
5040
5040
  output += segment;
5041
- path15 = path15.substring(segment.length);
5041
+ path13 = path13.substring(segment.length);
5042
5042
  }
5043
5043
  }
5044
5044
  return output;
@@ -5603,9 +5603,9 @@ var require_defineElement = __commonJS({
5603
5603
  });
5604
5604
  return c;
5605
5605
  };
5606
- function EventHandlerBuilder(body, document, form, element) {
5606
+ function EventHandlerBuilder(body, document2, form, element) {
5607
5607
  this.body = body;
5608
- this.document = document;
5608
+ this.document = document2;
5609
5609
  this.form = form;
5610
5610
  this.element = element;
5611
5611
  }
@@ -5639,7 +5639,7 @@ var require_defineElement = __commonJS({
5639
5639
  var require_htmlelts = __commonJS({
5640
5640
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/htmlelts.js"(exports$1) {
5641
5641
  var Node2 = require_Node();
5642
- var Element = require_Element();
5642
+ var Element2 = require_Element();
5643
5643
  var CSSStyleDeclaration = require_CSSStyleDeclaration();
5644
5644
  var utils = require_utils();
5645
5645
  var URLUtils = require_URLUtils();
@@ -5707,10 +5707,10 @@ var require_htmlelts = __commonJS({
5707
5707
  this._form = null;
5708
5708
  };
5709
5709
  var HTMLElement = exports$1.HTMLElement = define({
5710
- superclass: Element,
5710
+ superclass: Element2,
5711
5711
  name: "HTMLElement",
5712
5712
  ctor: function HTMLElement2(doc, localName, prefix) {
5713
- Element.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
5713
+ Element2.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
5714
5714
  },
5715
5715
  props: {
5716
5716
  dangerouslySetInnerHTML: {
@@ -7192,7 +7192,7 @@ var require_htmlelts = __commonJS({
7192
7192
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js
7193
7193
  var require_svg = __commonJS({
7194
7194
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js"(exports$1) {
7195
- var Element = require_Element();
7195
+ var Element2 = require_Element();
7196
7196
  var defineElement = require_defineElement();
7197
7197
  var utils = require_utils();
7198
7198
  var CSSStyleDeclaration = require_CSSStyleDeclaration();
@@ -7206,10 +7206,10 @@ var require_svg = __commonJS({
7206
7206
  return defineElement(spec, SVGElement, svgElements, svgNameToImpl);
7207
7207
  }
7208
7208
  var SVGElement = define({
7209
- superclass: Element,
7209
+ superclass: Element2,
7210
7210
  name: "SVGElement",
7211
7211
  ctor: function SVGElement2(doc, localName, prefix) {
7212
- Element.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
7212
+ Element2.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
7213
7213
  },
7214
7214
  props: {
7215
7215
  style: { get: function() {
@@ -7344,7 +7344,7 @@ var require_Document = __commonJS({
7344
7344
  var Node2 = require_Node();
7345
7345
  var NodeList = require_NodeList();
7346
7346
  var ContainerNode = require_ContainerNode();
7347
- var Element = require_Element();
7347
+ var Element2 = require_Element();
7348
7348
  var Text = require_Text();
7349
7349
  var Comment = require_Comment();
7350
7350
  var Event = require_Event();
@@ -7353,7 +7353,7 @@ var require_Document = __commonJS({
7353
7353
  var DOMImplementation = require_DOMImplementation();
7354
7354
  var TreeWalker = require_TreeWalker();
7355
7355
  var NodeIterator = require_NodeIterator();
7356
- var NodeFilter = require_NodeFilter();
7356
+ var NodeFilter2 = require_NodeFilter();
7357
7357
  var URL2 = require_URL();
7358
7358
  var select = require_select();
7359
7359
  var events = require_events();
@@ -7492,13 +7492,13 @@ var require_Document = __commonJS({
7492
7492
  if (this.isHTML) {
7493
7493
  localName = utils.toASCIILowerCase(localName);
7494
7494
  }
7495
- return new Element._Attr(null, localName, null, null, "");
7495
+ return new Element2._Attr(null, localName, null, null, "");
7496
7496
  } },
7497
7497
  createAttributeNS: { value: function(namespace, qualifiedName) {
7498
7498
  namespace = namespace === null || namespace === void 0 || namespace === "" ? null : String(namespace);
7499
7499
  qualifiedName = String(qualifiedName);
7500
7500
  var ve = validateAndExtract(namespace, qualifiedName);
7501
- return new Element._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
7501
+ return new Element2._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
7502
7502
  } },
7503
7503
  createElement: { value: function(localName) {
7504
7504
  localName = String(localName);
@@ -7510,7 +7510,7 @@ var require_Document = __commonJS({
7510
7510
  } else if (this.contentType === "application/xhtml+xml") {
7511
7511
  return html.createElement(this, localName, null);
7512
7512
  } else {
7513
- return new Element(this, localName, null, null);
7513
+ return new Element2(this, localName, null, null);
7514
7514
  }
7515
7515
  }, writable: isApiWritable },
7516
7516
  createElementNS: { value: function(namespace, qualifiedName) {
@@ -7527,7 +7527,7 @@ var require_Document = __commonJS({
7527
7527
  } else if (namespace === NAMESPACE.SVG) {
7528
7528
  return svg.createElement(this, localName, prefix);
7529
7529
  }
7530
- return new Element(this, localName, namespace, prefix);
7530
+ return new Element2(this, localName, namespace, prefix);
7531
7531
  } },
7532
7532
  createEvent: { value: function createEvent(interfaceName) {
7533
7533
  interfaceName = interfaceName.toLowerCase();
@@ -7549,7 +7549,7 @@ var require_Document = __commonJS({
7549
7549
  if (!(root3 instanceof Node2)) {
7550
7550
  throw new TypeError("root not a node");
7551
7551
  }
7552
- whatToShow = whatToShow === void 0 ? NodeFilter.SHOW_ALL : +whatToShow;
7552
+ whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
7553
7553
  filter = filter === void 0 ? null : filter;
7554
7554
  return new TreeWalker(root3, whatToShow, filter);
7555
7555
  } },
@@ -7561,7 +7561,7 @@ var require_Document = __commonJS({
7561
7561
  if (!(root3 instanceof Node2)) {
7562
7562
  throw new TypeError("root not a node");
7563
7563
  }
7564
- whatToShow = whatToShow === void 0 ? NodeFilter.SHOW_ALL : +whatToShow;
7564
+ whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
7565
7565
  filter = filter === void 0 ? null : filter;
7566
7566
  return new NodeIterator(root3, whatToShow, filter);
7567
7567
  } },
@@ -7622,10 +7622,10 @@ var require_Document = __commonJS({
7622
7622
  return this.byId[id] instanceof MultiId;
7623
7623
  } },
7624
7624
  // Just copy this method from the Element prototype
7625
- getElementsByName: { value: Element.prototype.getElementsByName },
7626
- getElementsByTagName: { value: Element.prototype.getElementsByTagName },
7627
- getElementsByTagNameNS: { value: Element.prototype.getElementsByTagNameNS },
7628
- getElementsByClassName: { value: Element.prototype.getElementsByClassName },
7625
+ getElementsByName: { value: Element2.prototype.getElementsByName },
7626
+ getElementsByTagName: { value: Element2.prototype.getElementsByTagName },
7627
+ getElementsByTagNameNS: { value: Element2.prototype.getElementsByTagNameNS },
7628
+ getElementsByClassName: { value: Element2.prototype.getElementsByClassName },
7629
7629
  adoptNode: { value: function adoptNode(node) {
7630
7630
  if (node.nodeType === Node2.DOCUMENT_NODE) utils.NotSupportedError();
7631
7631
  if (node.nodeType === Node2.ATTRIBUTE_NODE) {
@@ -16451,8 +16451,8 @@ var require_Window = __commonJS({
16451
16451
  var Location = require_Location();
16452
16452
  var utils = require_utils();
16453
16453
  module.exports = Window;
16454
- function Window(document) {
16455
- this.document = document || new DOMImplementation(null).createHTMLDocument("");
16454
+ function Window(document2) {
16455
+ this.document = document2 || new DOMImplementation(null).createHTMLDocument("");
16456
16456
  this.document._scripting_enabled = true;
16457
16457
  this.document.defaultView = this;
16458
16458
  this.location = new Location(this, this.document._address || "about:blank");
@@ -16582,11 +16582,11 @@ var require_lib = __commonJS({
16582
16582
  };
16583
16583
  };
16584
16584
  exports$1.createWindow = function(html, address) {
16585
- var document = exports$1.createDocument(html);
16585
+ var document2 = exports$1.createDocument(html);
16586
16586
  if (address !== void 0) {
16587
- document._address = address;
16587
+ document2._address = address;
16588
16588
  }
16589
- return new impl.Window(document);
16589
+ return new impl.Window(document2);
16590
16590
  };
16591
16591
  exports$1.impl = impl;
16592
16592
  }
@@ -16651,29 +16651,18 @@ var searchSocketConfigSchema = z.object({
16651
16651
  prependTitle: z.boolean().optional(),
16652
16652
  pageSummaryChunk: z.boolean().optional()
16653
16653
  }).optional(),
16654
- embeddings: z.object({
16655
- provider: z.literal("jina").optional(),
16656
- model: z.string().min(1).optional(),
16657
- apiKey: z.string().min(1).optional(),
16658
- apiKeyEnv: z.string().min(1).optional(),
16659
- batchSize: z.number().int().positive().optional(),
16660
- concurrency: z.number().int().positive().optional(),
16661
- pricePer1kTokens: z.number().positive().optional()
16654
+ upstash: z.object({
16655
+ url: z.string().url().optional(),
16656
+ token: z.string().min(1).optional(),
16657
+ urlEnv: z.string().min(1).optional(),
16658
+ tokenEnv: z.string().min(1).optional()
16662
16659
  }).optional(),
16663
- vector: z.object({
16664
- dimension: z.number().int().positive().optional(),
16665
- turso: z.object({
16666
- url: z.string().url().optional(),
16667
- authToken: z.string().min(1).optional(),
16668
- urlEnv: z.string().optional(),
16669
- authTokenEnv: z.string().optional(),
16670
- localPath: z.string().optional()
16671
- }).optional()
16672
- }).optional(),
16673
- rerank: z.object({
16674
- enabled: z.boolean().optional(),
16675
- topN: z.number().int().positive().optional(),
16676
- model: z.string().optional()
16660
+ search: z.object({
16661
+ semanticWeight: z.number().min(0).max(1).optional(),
16662
+ inputEnrichment: z.boolean().optional(),
16663
+ reranking: z.boolean().optional(),
16664
+ dualSearch: z.boolean().optional(),
16665
+ pageSearchWeight: z.number().min(0).max(1).optional()
16677
16666
  }).optional(),
16678
16667
  ranking: z.object({
16679
16668
  enableIncomingLinkBoost: z.boolean().optional(),
@@ -16683,11 +16672,12 @@ var searchSocketConfigSchema = z.object({
16683
16672
  aggregationDecay: z.number().min(0).max(1).optional(),
16684
16673
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
16685
16674
  minScore: z.number().min(0).max(1).optional(),
16675
+ scoreGapThreshold: z.number().min(0).max(1).optional(),
16686
16676
  weights: z.object({
16687
16677
  incomingLinks: z.number().optional(),
16688
16678
  depth: z.number().optional(),
16689
- rerank: z.number().optional(),
16690
- aggregation: z.number().optional()
16679
+ aggregation: z.number().optional(),
16680
+ titleMatch: z.number().optional()
16691
16681
  }).optional()
16692
16682
  }).optional(),
16693
16683
  api: z.object({
@@ -16709,8 +16699,7 @@ var searchSocketConfigSchema = z.object({
16709
16699
  }).optional()
16710
16700
  }).optional(),
16711
16701
  state: z.object({
16712
- dir: z.string().optional(),
16713
- writeMirror: z.boolean().optional()
16702
+ dir: z.string().optional()
16714
16703
  }).optional()
16715
16704
  });
16716
16705
 
@@ -16764,24 +16753,16 @@ function createDefaultConfig(projectId) {
16764
16753
  prependTitle: true,
16765
16754
  pageSummaryChunk: true
16766
16755
  },
16767
- embeddings: {
16768
- provider: "jina",
16769
- model: "jina-embeddings-v5-text-small",
16770
- apiKeyEnv: "JINA_API_KEY",
16771
- batchSize: 64,
16772
- concurrency: 4
16773
- },
16774
- vector: {
16775
- turso: {
16776
- urlEnv: "TURSO_DATABASE_URL",
16777
- authTokenEnv: "TURSO_AUTH_TOKEN",
16778
- localPath: ".searchsocket/vectors.db"
16779
- }
16756
+ upstash: {
16757
+ urlEnv: "UPSTASH_SEARCH_REST_URL",
16758
+ tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16780
16759
  },
16781
- rerank: {
16782
- enabled: true,
16783
- topN: 20,
16784
- model: "jina-reranker-v3"
16760
+ search: {
16761
+ semanticWeight: 0.75,
16762
+ inputEnrichment: true,
16763
+ reranking: true,
16764
+ dualSearch: true,
16765
+ pageSearchWeight: 0.3
16785
16766
  },
16786
16767
  ranking: {
16787
16768
  enableIncomingLinkBoost: true,
@@ -16790,12 +16771,13 @@ function createDefaultConfig(projectId) {
16790
16771
  aggregationCap: 5,
16791
16772
  aggregationDecay: 0.5,
16792
16773
  minChunkScoreRatio: 0.5,
16793
- minScore: 0,
16774
+ minScore: 0.3,
16775
+ scoreGapThreshold: 0.4,
16794
16776
  weights: {
16795
16777
  incomingLinks: 0.05,
16796
16778
  depth: 0.03,
16797
- rerank: 1,
16798
- aggregation: 0.1
16779
+ aggregation: 0.1,
16780
+ titleMatch: 0.15
16799
16781
  }
16800
16782
  },
16801
16783
  api: {
@@ -16813,8 +16795,7 @@ function createDefaultConfig(projectId) {
16813
16795
  }
16814
16796
  },
16815
16797
  state: {
16816
- dir: ".searchsocket",
16817
- writeMirror: false
16798
+ dir: ".searchsocket"
16818
16799
  }
16819
16800
  };
16820
16801
  }
@@ -16938,21 +16919,13 @@ ${issues}`
16938
16919
  ...defaults.chunking,
16939
16920
  ...parsed.chunking
16940
16921
  },
16941
- embeddings: {
16942
- ...defaults.embeddings,
16943
- ...parsed.embeddings
16922
+ upstash: {
16923
+ ...defaults.upstash,
16924
+ ...parsed.upstash
16944
16925
  },
16945
- vector: {
16946
- ...defaults.vector,
16947
- ...parsed.vector,
16948
- turso: {
16949
- ...defaults.vector.turso,
16950
- ...parsed.vector?.turso
16951
- }
16952
- },
16953
- rerank: {
16954
- ...defaults.rerank,
16955
- ...parsed.rerank
16926
+ search: {
16927
+ ...defaults.search,
16928
+ ...parsed.search
16956
16929
  },
16957
16930
  ranking: {
16958
16931
  ...defaults.ranking,
@@ -17131,660 +17104,245 @@ function resolveScope(config, override) {
17131
17104
  scopeId: `${config.project.id}:${scopeName}`
17132
17105
  };
17133
17106
  }
17134
- function sleep(ms) {
17135
- return new Promise((resolve) => {
17136
- setTimeout(resolve, ms);
17137
- });
17138
- }
17139
- var JinaEmbeddingsProvider = class {
17140
- apiKey;
17141
- batchSize;
17142
- concurrency;
17143
- defaultTask;
17144
- constructor(options) {
17145
- if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
17146
- throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
17147
- }
17148
- if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
17149
- throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
17150
- }
17151
- this.apiKey = options.apiKey;
17152
- this.batchSize = options.batchSize;
17153
- this.concurrency = options.concurrency;
17154
- this.defaultTask = options.task ?? "retrieval.passage";
17155
- }
17156
- estimateTokens(text) {
17157
- const normalized = text.trim();
17158
- if (!normalized) {
17159
- return 0;
17160
- }
17161
- const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
17162
- const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
17163
- const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
17164
- const charEstimate = Math.ceil(normalized.length / 4);
17165
- const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
17166
- return Math.max(1, Math.max(charEstimate, lexicalEstimate));
17167
- }
17168
- async embedTexts(texts, modelId, task) {
17169
- if (texts.length === 0) {
17170
- return [];
17171
- }
17172
- const batches = [];
17173
- for (let i = 0; i < texts.length; i += this.batchSize) {
17174
- batches.push({
17175
- index: i,
17176
- values: texts.slice(i, i + this.batchSize)
17177
- });
17178
- }
17179
- const outputs = new Array(batches.length);
17180
- const limit = pLimit2(this.concurrency);
17181
- await Promise.all(
17182
- batches.map(
17183
- (batch, position) => limit(async () => {
17184
- outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
17185
- })
17186
- )
17187
- );
17188
- return outputs.flat();
17189
- }
17190
- async embedWithRetry(texts, modelId, task) {
17191
- const maxAttempts = 5;
17192
- let attempt = 0;
17193
- while (attempt < maxAttempts) {
17194
- attempt += 1;
17195
- let response;
17196
- try {
17197
- response = await fetch("https://api.jina.ai/v1/embeddings", {
17198
- method: "POST",
17199
- headers: {
17200
- "content-type": "application/json",
17201
- authorization: `Bearer ${this.apiKey}`
17202
- },
17203
- body: JSON.stringify({
17204
- model: modelId,
17205
- input: texts,
17206
- task
17207
- })
17208
- });
17209
- } catch (error) {
17210
- if (attempt >= maxAttempts) {
17211
- throw error;
17212
- }
17213
- await sleep(Math.min(2 ** attempt * 300, 5e3));
17214
- continue;
17215
- }
17216
- if (!response.ok) {
17217
- const retryable = response.status === 429 || response.status >= 500;
17218
- if (!retryable || attempt >= maxAttempts) {
17219
- const errorBody = await response.text();
17220
- throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
17221
- }
17222
- await sleep(Math.min(2 ** attempt * 300, 5e3));
17223
- continue;
17224
- }
17225
- const payload = await response.json();
17226
- if (!payload.data || !Array.isArray(payload.data)) {
17227
- throw new Error("Invalid Jina embeddings response format");
17228
- }
17229
- return payload.data.map((entry) => entry.embedding);
17230
- }
17231
- throw new Error("Unreachable retry state");
17232
- }
17233
- };
17234
-
17235
- // src/embeddings/factory.ts
17236
- function createEmbeddingsProvider(config) {
17237
- if (config.embeddings.provider !== "jina") {
17238
- throw new SearchSocketError(
17239
- "CONFIG_MISSING",
17240
- `Unsupported embeddings provider ${config.embeddings.provider}`
17241
- );
17242
- }
17243
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17244
- if (!apiKey) {
17245
- throw new SearchSocketError(
17246
- "CONFIG_MISSING",
17247
- `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
17248
- );
17249
- }
17250
- return new JinaEmbeddingsProvider({
17251
- apiKey,
17252
- batchSize: config.embeddings.batchSize,
17253
- concurrency: config.embeddings.concurrency
17254
- });
17255
- }
17256
-
17257
- // src/rerank/jina.ts
17258
- function sleep2(ms) {
17259
- return new Promise((resolve) => {
17260
- setTimeout(resolve, ms);
17261
- });
17262
- }
17263
- var JinaReranker = class {
17264
- apiKey;
17265
- model;
17266
- maxRetries;
17267
- constructor(options) {
17268
- this.apiKey = options.apiKey;
17269
- this.model = options.model;
17270
- this.maxRetries = options.maxRetries ?? 2;
17271
- }
17272
- async rerank(query, candidates, topN) {
17273
- if (candidates.length === 0) {
17274
- return [];
17275
- }
17276
- const body = {
17277
- model: this.model,
17278
- query,
17279
- documents: candidates.map((candidate) => candidate.text),
17280
- top_n: topN ?? candidates.length,
17281
- return_documents: false
17282
- };
17283
- let attempt = 0;
17284
- while (attempt <= this.maxRetries) {
17285
- attempt += 1;
17286
- let response;
17287
- try {
17288
- response = await fetch("https://api.jina.ai/v1/rerank", {
17289
- method: "POST",
17290
- headers: {
17291
- "content-type": "application/json",
17292
- authorization: `Bearer ${this.apiKey}`
17293
- },
17294
- body: JSON.stringify(body)
17295
- });
17296
- } catch (error) {
17297
- if (attempt <= this.maxRetries) {
17298
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
17299
- continue;
17300
- }
17301
- throw error;
17302
- }
17303
- if (!response.ok) {
17304
- const retryable = response.status === 429 || response.status >= 500;
17305
- if (retryable && attempt <= this.maxRetries) {
17306
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
17307
- continue;
17308
- }
17309
- const errorBody = await response.text();
17310
- throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
17311
- }
17312
- const payload = await response.json();
17313
- const rawResults = payload.results ?? payload.data ?? [];
17314
- if (!Array.isArray(rawResults)) {
17315
- throw new Error("Invalid Jina rerank response format");
17316
- }
17317
- return rawResults.flatMap((item) => {
17318
- const index = item.index;
17319
- if (typeof index !== "number" || index < 0 || index >= candidates.length) {
17320
- return [];
17321
- }
17322
- const candidate = candidates[index];
17323
- if (!candidate) {
17324
- return [];
17325
- }
17326
- const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
17327
- return [
17328
- {
17329
- id: candidate.id,
17330
- score
17331
- }
17332
- ];
17333
- }).sort((a, b) => b.score - a.score);
17334
- }
17335
- throw new Error("Jina rerank request failed after retries");
17336
- }
17337
- };
17338
-
17339
- // src/rerank/factory.ts
17340
- function createReranker(config) {
17341
- if (!config.rerank.enabled) {
17342
- return null;
17343
- }
17344
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17345
- if (!apiKey) {
17346
- return null;
17347
- }
17348
- return new JinaReranker({
17349
- apiKey,
17350
- model: config.rerank.model
17351
- });
17352
- }
17353
17107
  function ensureStateDirs(cwd, stateDir, scope) {
17354
17108
  const statePath = path.resolve(cwd, stateDir);
17355
- const pagesPath = path.join(statePath, "pages", scope.scopeName);
17356
- fs.mkdirSync(pagesPath, { recursive: true });
17357
- return { statePath, pagesPath };
17109
+ fs.mkdirSync(statePath, { recursive: true });
17110
+ return { statePath };
17358
17111
  }
17359
17112
 
17360
- // src/vector/turso.ts
17361
- var TursoVectorStore = class {
17113
+ // src/vector/upstash.ts
17114
+ function chunkIndexName(scope) {
17115
+ return `${scope.projectId}--${scope.scopeName}`;
17116
+ }
17117
+ function pageIndexName(scope) {
17118
+ return `${scope.projectId}--${scope.scopeName}--pages`;
17119
+ }
17120
+ var UpstashSearchStore = class {
17362
17121
  client;
17363
- dimension;
17364
- chunksReady = false;
17365
- registryReady = false;
17366
- pagesReady = false;
17367
17122
  constructor(opts) {
17368
17123
  this.client = opts.client;
17369
- this.dimension = opts.dimension;
17370
17124
  }
17371
- async ensureRegistry() {
17372
- if (this.registryReady) return;
17373
- await this.client.execute(`
17374
- CREATE TABLE IF NOT EXISTS registry (
17375
- scope_key TEXT PRIMARY KEY,
17376
- project_id TEXT NOT NULL,
17377
- scope_name TEXT NOT NULL,
17378
- model_id TEXT NOT NULL,
17379
- last_indexed_at TEXT NOT NULL,
17380
- vector_count INTEGER,
17381
- last_estimate_tokens INTEGER,
17382
- last_estimate_cost_usd REAL,
17383
- last_estimate_changed_chunks INTEGER
17384
- )
17385
- `);
17386
- const estimateCols = [
17387
- { name: "last_estimate_tokens", def: "INTEGER" },
17388
- { name: "last_estimate_cost_usd", def: "REAL" },
17389
- { name: "last_estimate_changed_chunks", def: "INTEGER" }
17390
- ];
17391
- for (const col of estimateCols) {
17392
- try {
17393
- await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
17394
- } catch (error) {
17395
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17396
- throw error;
17397
- }
17398
- }
17399
- }
17400
- this.registryReady = true;
17401
- }
17402
- async ensureChunks(dim) {
17403
- if (this.chunksReady) return;
17404
- const exists = await this.chunksTableExists();
17405
- if (exists) {
17406
- const currentDim = await this.getChunksDimension();
17407
- if (currentDim !== null && currentDim !== dim) {
17408
- await this.client.batch([
17409
- "DROP INDEX IF EXISTS idx",
17410
- "DROP TABLE IF EXISTS chunks"
17411
- ]);
17412
- }
17413
- }
17414
- await this.client.batch([
17415
- `CREATE TABLE IF NOT EXISTS chunks (
17416
- id TEXT PRIMARY KEY,
17417
- project_id TEXT NOT NULL,
17418
- scope_name TEXT NOT NULL,
17419
- url TEXT NOT NULL,
17420
- path TEXT NOT NULL,
17421
- title TEXT NOT NULL,
17422
- section_title TEXT NOT NULL DEFAULT '',
17423
- heading_path TEXT NOT NULL DEFAULT '[]',
17424
- snippet TEXT NOT NULL DEFAULT '',
17425
- chunk_text TEXT NOT NULL DEFAULT '',
17426
- ordinal INTEGER NOT NULL DEFAULT 0,
17427
- content_hash TEXT NOT NULL DEFAULT '',
17428
- model_id TEXT NOT NULL DEFAULT '',
17429
- depth INTEGER NOT NULL DEFAULT 0,
17430
- incoming_links INTEGER NOT NULL DEFAULT 0,
17431
- route_file TEXT NOT NULL DEFAULT '',
17432
- tags TEXT NOT NULL DEFAULT '[]',
17433
- description TEXT NOT NULL DEFAULT '',
17434
- keywords TEXT NOT NULL DEFAULT '[]',
17435
- embedding F32_BLOB(${dim})
17436
- )`,
17437
- `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17438
- ]);
17439
- this.chunksReady = true;
17125
+ chunkIndex(scope) {
17126
+ return this.client.index(chunkIndexName(scope));
17440
17127
  }
17441
- async ensurePages() {
17442
- if (this.pagesReady) return;
17443
- await this.client.execute(`
17444
- CREATE TABLE IF NOT EXISTS pages (
17445
- project_id TEXT NOT NULL,
17446
- scope_name TEXT NOT NULL,
17447
- url TEXT NOT NULL,
17448
- title TEXT NOT NULL,
17449
- markdown TEXT NOT NULL,
17450
- route_file TEXT NOT NULL DEFAULT '',
17451
- route_resolution TEXT NOT NULL DEFAULT 'exact',
17452
- incoming_links INTEGER NOT NULL DEFAULT 0,
17453
- outgoing_links INTEGER NOT NULL DEFAULT 0,
17454
- depth INTEGER NOT NULL DEFAULT 0,
17455
- tags TEXT NOT NULL DEFAULT '[]',
17456
- indexed_at TEXT NOT NULL,
17457
- PRIMARY KEY (project_id, scope_name, url)
17458
- )
17459
- `);
17460
- this.pagesReady = true;
17128
+ pageIndex(scope) {
17129
+ return this.client.index(pageIndexName(scope));
17461
17130
  }
17462
- async chunksTableExists() {
17463
- try {
17464
- await this.client.execute("SELECT 1 FROM chunks LIMIT 0");
17465
- return true;
17466
- } catch (error) {
17467
- if (error instanceof Error && error.message.includes("no such table")) {
17468
- return false;
17469
- }
17470
- throw error;
17471
- }
17472
- }
17473
- /**
17474
- * Read the current F32_BLOB dimension from the chunks table schema.
17475
- * Returns null if the table doesn't exist or the dimension can't be parsed.
17476
- */
17477
- async getChunksDimension() {
17478
- try {
17479
- const rs = await this.client.execute(
17480
- "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
17481
- );
17482
- if (rs.rows.length === 0) return null;
17483
- const sql = rs.rows[0].sql;
17484
- const match = sql.match(/F32_BLOB\((\d+)\)/i);
17485
- return match ? parseInt(match[1], 10) : null;
17486
- } catch {
17487
- return null;
17488
- }
17489
- }
17490
- /**
17491
- * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
17492
- * Used by `clean --remote` for a full reset.
17493
- */
17494
- async dropAllTables() {
17495
- await this.client.batch([
17496
- "DROP INDEX IF EXISTS idx",
17497
- "DROP TABLE IF EXISTS chunks",
17498
- "DROP TABLE IF EXISTS registry",
17499
- "DROP TABLE IF EXISTS pages"
17500
- ]);
17501
- this.chunksReady = false;
17502
- this.registryReady = false;
17503
- this.pagesReady = false;
17504
- }
17505
- async upsert(records, _scope) {
17506
- if (records.length === 0) return;
17507
- const dim = this.dimension ?? records[0].vector.length;
17508
- await this.ensureChunks(dim);
17131
+ async upsertChunks(chunks, scope) {
17132
+ if (chunks.length === 0) return;
17133
+ const index = this.chunkIndex(scope);
17509
17134
  const BATCH_SIZE = 100;
17510
- for (let i = 0; i < records.length; i += BATCH_SIZE) {
17511
- const batch = records.slice(i, i + BATCH_SIZE);
17512
- const stmts = batch.map((r) => ({
17513
- sql: `INSERT OR REPLACE INTO chunks
17514
- (id, project_id, scope_name, url, path, title, section_title,
17515
- heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17516
- incoming_links, route_file, tags, description, keywords, embedding)
17517
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17518
- args: [
17519
- r.id,
17520
- r.metadata.projectId,
17521
- r.metadata.scopeName,
17522
- r.metadata.url,
17523
- r.metadata.path,
17524
- r.metadata.title,
17525
- r.metadata.sectionTitle,
17526
- JSON.stringify(r.metadata.headingPath),
17527
- r.metadata.snippet,
17528
- r.metadata.chunkText,
17529
- r.metadata.ordinal,
17530
- r.metadata.contentHash,
17531
- r.metadata.modelId,
17532
- r.metadata.depth,
17533
- r.metadata.incomingLinks,
17534
- r.metadata.routeFile,
17535
- JSON.stringify(r.metadata.tags),
17536
- r.metadata.description ?? "",
17537
- JSON.stringify(r.metadata.keywords ?? []),
17538
- JSON.stringify(r.vector)
17539
- ]
17540
- }));
17541
- await this.client.batch(stmts);
17135
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17136
+ const batch = chunks.slice(i, i + BATCH_SIZE);
17137
+ await index.upsert(batch);
17542
17138
  }
17543
17139
  }
17544
- async query(queryVector, opts, scope) {
17545
- const dim = this.dimension ?? queryVector.length;
17546
- await this.ensureChunks(dim);
17547
- const queryJson = JSON.stringify(queryVector);
17548
- const rs = await this.client.execute({
17549
- sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
17550
- c.section_title, c.heading_path, c.snippet, c.chunk_text,
17551
- c.ordinal, c.content_hash,
17552
- c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17553
- c.description, c.keywords,
17554
- vector_distance_cos(c.embedding, vector(?)) AS distance
17555
- FROM vector_top_k('idx', vector(?), ?) AS v
17556
- JOIN chunks AS c ON c.rowid = v.id`,
17557
- args: [queryJson, queryJson, opts.topK]
17140
+ async search(query, opts, scope) {
17141
+ const index = this.chunkIndex(scope);
17142
+ const results = await index.search({
17143
+ query,
17144
+ limit: opts.limit,
17145
+ semanticWeight: opts.semanticWeight,
17146
+ inputEnrichment: opts.inputEnrichment,
17147
+ reranking: opts.reranking,
17148
+ filter: opts.filter
17558
17149
  });
17559
- let hits = [];
17560
- for (const row of rs.rows) {
17561
- const projectId = row.project_id;
17562
- const scopeName = row.scope_name;
17563
- if (projectId !== scope.projectId || scopeName !== scope.scopeName) {
17564
- continue;
17150
+ return results.map((doc) => ({
17151
+ id: doc.id,
17152
+ score: doc.score,
17153
+ metadata: {
17154
+ projectId: doc.metadata?.projectId ?? "",
17155
+ scopeName: doc.metadata?.scopeName ?? "",
17156
+ url: doc.content.url,
17157
+ path: doc.metadata?.path ?? "",
17158
+ title: doc.content.title,
17159
+ sectionTitle: doc.content.sectionTitle,
17160
+ headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17161
+ snippet: doc.metadata?.snippet ?? "",
17162
+ chunkText: doc.content.text,
17163
+ ordinal: doc.metadata?.ordinal ?? 0,
17164
+ contentHash: doc.metadata?.contentHash ?? "",
17165
+ depth: doc.metadata?.depth ?? 0,
17166
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17167
+ routeFile: doc.metadata?.routeFile ?? "",
17168
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17169
+ description: doc.metadata?.description || void 0,
17170
+ keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17565
17171
  }
17566
- const rowPath = row.path;
17567
- if (opts.pathPrefix) {
17568
- const rawPrefix = opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}`;
17569
- const prefix = rawPrefix.endsWith("/") ? rawPrefix : `${rawPrefix}/`;
17570
- const normalizedPath = rowPath.replace(/\/$/, "");
17571
- const normalizedPrefix = rawPrefix.replace(/\/$/, "");
17572
- if (normalizedPath !== normalizedPrefix && !rowPath.startsWith(prefix)) {
17573
- continue;
17574
- }
17575
- }
17576
- const tags = JSON.parse(row.tags || "[]");
17577
- if (opts.tags && opts.tags.length > 0) {
17578
- if (!opts.tags.every((t) => tags.includes(t))) {
17579
- continue;
17580
- }
17581
- }
17582
- const distance = row.distance;
17583
- const score = 1 - distance;
17584
- const description = row.description || void 0;
17585
- const keywords = (() => {
17586
- const raw = row.keywords || "[]";
17587
- const parsed = JSON.parse(raw);
17588
- return parsed.length > 0 ? parsed : void 0;
17589
- })();
17590
- hits.push({
17591
- id: row.id,
17592
- score,
17593
- metadata: {
17594
- projectId,
17595
- scopeName,
17596
- url: row.url,
17597
- path: rowPath,
17598
- title: row.title,
17599
- sectionTitle: row.section_title,
17600
- headingPath: JSON.parse(row.heading_path || "[]"),
17601
- snippet: row.snippet,
17602
- chunkText: row.chunk_text || "",
17603
- ordinal: row.ordinal || 0,
17604
- contentHash: row.content_hash,
17605
- modelId: row.model_id,
17606
- depth: row.depth,
17607
- incomingLinks: row.incoming_links,
17608
- routeFile: row.route_file,
17609
- tags,
17610
- description,
17611
- keywords
17612
- }
17172
+ }));
17173
+ }
17174
+ async searchPages(query, opts, scope) {
17175
+ const index = this.pageIndex(scope);
17176
+ let results;
17177
+ try {
17178
+ results = await index.search({
17179
+ query,
17180
+ limit: opts.limit,
17181
+ semanticWeight: opts.semanticWeight,
17182
+ inputEnrichment: opts.inputEnrichment,
17183
+ reranking: true,
17184
+ filter: opts.filter
17613
17185
  });
17186
+ } catch {
17187
+ return [];
17614
17188
  }
17615
- hits.sort((a, b) => b.score - a.score);
17616
- return hits;
17189
+ return results.map((doc) => ({
17190
+ id: doc.id,
17191
+ score: doc.score,
17192
+ title: doc.content.title,
17193
+ url: doc.content.url,
17194
+ description: doc.content.description ?? "",
17195
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17196
+ depth: doc.metadata?.depth ?? 0,
17197
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17198
+ routeFile: doc.metadata?.routeFile ?? ""
17199
+ }));
17617
17200
  }
17618
17201
  async deleteByIds(ids, scope) {
17619
17202
  if (ids.length === 0) return;
17203
+ const index = this.chunkIndex(scope);
17620
17204
  const BATCH_SIZE = 500;
17621
17205
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17622
17206
  const batch = ids.slice(i, i + BATCH_SIZE);
17623
- const placeholders = batch.map(() => "?").join(", ");
17624
- await this.client.execute({
17625
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
17626
- args: [scope.projectId, scope.scopeName, ...batch]
17627
- });
17207
+ await index.delete(batch);
17628
17208
  }
17629
17209
  }
17630
17210
  async deleteScope(scope) {
17631
- await this.ensureRegistry();
17632
17211
  try {
17633
- await this.client.execute({
17634
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ?`,
17635
- args: [scope.projectId, scope.scopeName]
17636
- });
17637
- } catch (error) {
17638
- if (error instanceof Error && !error.message.includes("no such table")) {
17639
- throw error;
17640
- }
17212
+ const chunkIdx = this.chunkIndex(scope);
17213
+ await chunkIdx.deleteIndex();
17214
+ } catch {
17641
17215
  }
17642
17216
  try {
17643
- await this.client.execute({
17644
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
17645
- args: [scope.projectId, scope.scopeName]
17646
- });
17647
- } catch (error) {
17648
- if (error instanceof Error && !error.message.includes("no such table")) {
17649
- throw error;
17650
- }
17217
+ const pageIdx = this.pageIndex(scope);
17218
+ await pageIdx.deleteIndex();
17219
+ } catch {
17651
17220
  }
17652
- await this.client.execute({
17653
- sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
17654
- args: [scope.projectId, scope.scopeName]
17655
- });
17656
- }
17657
- async listScopes(scopeProjectId) {
17658
- await this.ensureRegistry();
17659
- const rs = await this.client.execute({
17660
- sql: `SELECT project_id, scope_name, model_id, last_indexed_at, vector_count,
17661
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks
17662
- FROM registry WHERE project_id = ?`,
17663
- args: [scopeProjectId]
17664
- });
17665
- return rs.rows.map((row) => ({
17666
- projectId: row.project_id,
17667
- scopeName: row.scope_name,
17668
- modelId: row.model_id,
17669
- lastIndexedAt: row.last_indexed_at,
17670
- vectorCount: row.vector_count,
17671
- lastEstimateTokens: row.last_estimate_tokens,
17672
- lastEstimateCostUSD: row.last_estimate_cost_usd,
17673
- lastEstimateChangedChunks: row.last_estimate_changed_chunks
17674
- }));
17675
17221
  }
17676
- async recordScope(info) {
17677
- await this.ensureRegistry();
17678
- const key = `${info.projectId}:${info.scopeName}`;
17679
- await this.client.execute({
17680
- sql: `INSERT OR REPLACE INTO registry
17681
- (scope_key, project_id, scope_name, model_id, last_indexed_at, vector_count,
17682
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks)
17683
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
17684
- args: [
17685
- key,
17686
- info.projectId,
17687
- info.scopeName,
17688
- info.modelId,
17689
- info.lastIndexedAt,
17690
- info.vectorCount ?? null,
17691
- info.lastEstimateTokens ?? null,
17692
- info.lastEstimateCostUSD ?? null,
17693
- info.lastEstimateChangedChunks ?? null
17694
- ]
17695
- });
17222
+ async listScopes(projectId) {
17223
+ const allIndexes = await this.client.listIndexes();
17224
+ const prefix = `${projectId}--`;
17225
+ const scopeNames = /* @__PURE__ */ new Set();
17226
+ for (const name of allIndexes) {
17227
+ if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17228
+ const scopeName = name.slice(prefix.length);
17229
+ scopeNames.add(scopeName);
17230
+ }
17231
+ }
17232
+ const scopes = [];
17233
+ for (const scopeName of scopeNames) {
17234
+ const scope = {
17235
+ projectId,
17236
+ scopeName,
17237
+ scopeId: `${projectId}:${scopeName}`
17238
+ };
17239
+ try {
17240
+ const info = await this.chunkIndex(scope).info();
17241
+ scopes.push({
17242
+ projectId,
17243
+ scopeName,
17244
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17245
+ documentCount: info.documentCount
17246
+ });
17247
+ } catch {
17248
+ scopes.push({
17249
+ projectId,
17250
+ scopeName,
17251
+ lastIndexedAt: "unknown",
17252
+ documentCount: 0
17253
+ });
17254
+ }
17255
+ }
17256
+ return scopes;
17696
17257
  }
17697
17258
  async getContentHashes(scope) {
17698
- const exists = await this.chunksTableExists();
17699
- if (!exists) return /* @__PURE__ */ new Map();
17700
- const rs = await this.client.execute({
17701
- sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
17702
- args: [scope.projectId, scope.scopeName]
17703
- });
17704
17259
  const map = /* @__PURE__ */ new Map();
17705
- for (const row of rs.rows) {
17706
- map.set(row.id, row.content_hash);
17260
+ const index = this.chunkIndex(scope);
17261
+ let cursor = "0";
17262
+ try {
17263
+ for (; ; ) {
17264
+ const result = await index.range({ cursor, limit: 100 });
17265
+ for (const doc of result.documents) {
17266
+ if (doc.metadata?.contentHash) {
17267
+ map.set(doc.id, doc.metadata.contentHash);
17268
+ }
17269
+ }
17270
+ if (!result.nextCursor || result.nextCursor === "0") break;
17271
+ cursor = result.nextCursor;
17272
+ }
17273
+ } catch {
17707
17274
  }
17708
17275
  return map;
17709
17276
  }
17710
17277
  async upsertPages(pages, scope) {
17711
17278
  if (pages.length === 0) return;
17712
- await this.ensurePages();
17713
- for (const page of pages) {
17714
- if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
17715
- throw new Error(
17716
- `Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
17717
- );
17718
- }
17719
- }
17720
- const BATCH_SIZE = 100;
17279
+ const index = this.pageIndex(scope);
17280
+ const BATCH_SIZE = 50;
17721
17281
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17722
17282
  const batch = pages.slice(i, i + BATCH_SIZE);
17723
- const stmts = batch.map((p) => ({
17724
- sql: `INSERT OR REPLACE INTO pages
17725
- (project_id, scope_name, url, title, markdown, route_file,
17726
- route_resolution, incoming_links, outgoing_links, depth, tags, indexed_at)
17727
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
17728
- args: [
17729
- p.projectId,
17730
- p.scopeName,
17731
- p.url,
17732
- p.title,
17733
- p.markdown,
17734
- p.routeFile,
17735
- p.routeResolution,
17736
- p.incomingLinks,
17737
- p.outgoingLinks,
17738
- p.depth,
17739
- JSON.stringify(p.tags),
17740
- p.indexedAt
17741
- ]
17283
+ const docs = batch.map((p) => ({
17284
+ id: p.url,
17285
+ content: {
17286
+ title: p.title,
17287
+ url: p.url,
17288
+ type: "page",
17289
+ description: p.description ?? "",
17290
+ keywords: (p.keywords ?? []).join(","),
17291
+ summary: p.summary ?? "",
17292
+ tags: p.tags.join(",")
17293
+ },
17294
+ metadata: {
17295
+ markdown: p.markdown,
17296
+ projectId: p.projectId,
17297
+ scopeName: p.scopeName,
17298
+ routeFile: p.routeFile,
17299
+ routeResolution: p.routeResolution,
17300
+ incomingLinks: p.incomingLinks,
17301
+ outgoingLinks: p.outgoingLinks,
17302
+ depth: p.depth,
17303
+ indexedAt: p.indexedAt
17304
+ }
17742
17305
  }));
17743
- await this.client.batch(stmts);
17306
+ await index.upsert(docs);
17744
17307
  }
17745
17308
  }
17746
17309
  async getPage(url, scope) {
17747
- await this.ensurePages();
17748
- const rs = await this.client.execute({
17749
- sql: `SELECT * FROM pages WHERE project_id = ? AND scope_name = ? AND url = ?`,
17750
- args: [scope.projectId, scope.scopeName, url]
17751
- });
17752
- if (rs.rows.length === 0) return null;
17753
- const row = rs.rows[0];
17754
- return {
17755
- url: row.url,
17756
- title: row.title,
17757
- markdown: row.markdown,
17758
- projectId: row.project_id,
17759
- scopeName: row.scope_name,
17760
- routeFile: row.route_file,
17761
- routeResolution: row.route_resolution,
17762
- incomingLinks: row.incoming_links,
17763
- outgoingLinks: row.outgoing_links,
17764
- depth: row.depth,
17765
- tags: JSON.parse(row.tags || "[]"),
17766
- indexedAt: row.indexed_at
17767
- };
17310
+ const index = this.pageIndex(scope);
17311
+ try {
17312
+ const results = await index.fetch([url]);
17313
+ const doc = results[0];
17314
+ if (!doc) return null;
17315
+ return {
17316
+ url: doc.content.url,
17317
+ title: doc.content.title,
17318
+ markdown: doc.metadata.markdown,
17319
+ projectId: doc.metadata.projectId,
17320
+ scopeName: doc.metadata.scopeName,
17321
+ routeFile: doc.metadata.routeFile,
17322
+ routeResolution: doc.metadata.routeResolution,
17323
+ incomingLinks: doc.metadata.incomingLinks,
17324
+ outgoingLinks: doc.metadata.outgoingLinks,
17325
+ depth: doc.metadata.depth,
17326
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17327
+ indexedAt: doc.metadata.indexedAt,
17328
+ summary: doc.content.summary || void 0,
17329
+ description: doc.content.description || void 0,
17330
+ keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17331
+ };
17332
+ } catch {
17333
+ return null;
17334
+ }
17768
17335
  }
17769
17336
  async deletePages(scope) {
17770
- await this.ensurePages();
17771
- await this.client.execute({
17772
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
17773
- args: [scope.projectId, scope.scopeName]
17774
- });
17775
- }
17776
- async getScopeModelId(scope) {
17777
- await this.ensureRegistry();
17778
- const rs = await this.client.execute({
17779
- sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
17780
- args: [scope.projectId, scope.scopeName]
17781
- });
17782
- if (rs.rows.length === 0) return null;
17783
- return rs.rows[0].model_id;
17337
+ try {
17338
+ const index = this.pageIndex(scope);
17339
+ await index.reset();
17340
+ } catch {
17341
+ }
17784
17342
  }
17785
17343
  async health() {
17786
17344
  try {
17787
- await this.client.execute("SELECT 1");
17345
+ await this.client.info();
17788
17346
  return { ok: true };
17789
17347
  } catch (error) {
17790
17348
  return {
@@ -17793,40 +17351,34 @@ var TursoVectorStore = class {
17793
17351
  };
17794
17352
  }
17795
17353
  }
17354
+ async dropAllIndexes(projectId) {
17355
+ const allIndexes = await this.client.listIndexes();
17356
+ const prefix = `${projectId}--`;
17357
+ for (const name of allIndexes) {
17358
+ if (name.startsWith(prefix)) {
17359
+ try {
17360
+ const index = this.client.index(name);
17361
+ await index.deleteIndex();
17362
+ } catch {
17363
+ }
17364
+ }
17365
+ }
17366
+ }
17796
17367
  };
17797
17368
 
17798
17369
  // src/vector/factory.ts
17799
- async function createVectorStore(config, cwd) {
17800
- const turso = config.vector.turso;
17801
- const remoteUrl = turso.url ?? process.env[turso.urlEnv];
17802
- if (remoteUrl) {
17803
- const { createClient: createClient2 } = await import('@libsql/client/http');
17804
- const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
17805
- const client2 = createClient2({
17806
- url: remoteUrl,
17807
- authToken
17808
- });
17809
- return new TursoVectorStore({
17810
- client: client2,
17811
- dimension: config.vector.dimension
17812
- });
17813
- }
17814
- if (isServerless()) {
17370
+ async function createUpstashStore(config) {
17371
+ const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
17372
+ const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
17373
+ if (!url || !token) {
17815
17374
  throw new SearchSocketError(
17816
17375
  "VECTOR_BACKEND_UNAVAILABLE",
17817
- `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
17376
+ `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17818
17377
  );
17819
17378
  }
17820
- const { createClient } = await import('@libsql/client');
17821
- const localPath = path.resolve(cwd, turso.localPath);
17822
- fs.mkdirSync(path.dirname(localPath), { recursive: true });
17823
- const client = createClient({
17824
- url: `file:${localPath}`
17825
- });
17826
- return new TursoVectorStore({
17827
- client,
17828
- dimension: config.vector.dimension
17829
- });
17379
+ const { Search } = await import('@upstash/search');
17380
+ const client = new Search({ url, token });
17381
+ return new UpstashSearchStore({ client });
17830
17382
  }
17831
17383
  function sha1(input) {
17832
17384
  return createHash("sha1").update(input).digest("hex");
@@ -17845,13 +17397,6 @@ function normalizeUrlPath(rawPath) {
17845
17397
  }
17846
17398
  return out;
17847
17399
  }
17848
- function urlPathToMirrorRelative(urlPath) {
17849
- const normalized = normalizeUrlPath(urlPath);
17850
- if (normalized === "/") {
17851
- return "index.md";
17852
- }
17853
- return `${normalized.slice(1)}.md`;
17854
- }
17855
17400
  function staticHtmlFileToUrl(filePath, rootDir) {
17856
17401
  const relative = path.relative(rootDir, filePath).replace(/\\/g, "/");
17857
17402
  if (relative === "index.html") {
@@ -18126,7 +17671,7 @@ function buildEmbeddingText(chunk, prependTitle) {
18126
17671
 
18127
17672
  ${chunk.chunkText}`;
18128
17673
  }
18129
- function chunkMirrorPage(page, config, scope) {
17674
+ function chunkPage(page, config, scope) {
18130
17675
  const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
18131
17676
  const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
18132
17677
  const chunks = [];
@@ -19157,53 +18702,6 @@ function extractFromMarkdown(url, markdown, title) {
19157
18702
  weight: mdWeight
19158
18703
  };
19159
18704
  }
19160
- function yamlString(value) {
19161
- return JSON.stringify(value);
19162
- }
19163
- function yamlArray(values) {
19164
- return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
19165
- }
19166
- function buildMirrorMarkdown(page) {
19167
- const frontmatterLines = [
19168
- "---",
19169
- `url: ${yamlString(page.url)}`,
19170
- `title: ${yamlString(page.title)}`,
19171
- `scope: ${yamlString(page.scope)}`,
19172
- `routeFile: ${yamlString(page.routeFile)}`,
19173
- `routeResolution: ${yamlString(page.routeResolution)}`,
19174
- `generatedAt: ${yamlString(page.generatedAt)}`,
19175
- `incomingLinks: ${page.incomingLinks}`,
19176
- `outgoingLinks: ${page.outgoingLinks}`,
19177
- `depth: ${page.depth}`,
19178
- `tags: ${yamlArray(page.tags)}`,
19179
- "---",
19180
- ""
19181
- ];
19182
- return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
19183
- }
19184
- function stripGeneratedAt(content) {
19185
- return content.replace(/^generatedAt: .*$/m, "");
19186
- }
19187
- async function writeMirrorPage(statePath, scope, page) {
19188
- const relative = urlPathToMirrorRelative(page.url);
19189
- const outputPath = path.join(statePath, "pages", scope.scopeName, relative);
19190
- await fs4.mkdir(path.dirname(outputPath), { recursive: true });
19191
- const newContent = buildMirrorMarkdown(page);
19192
- try {
19193
- const existing = await fs4.readFile(outputPath, "utf8");
19194
- if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
19195
- return outputPath;
19196
- }
19197
- } catch {
19198
- }
19199
- await fs4.writeFile(outputPath, newContent, "utf8");
19200
- return outputPath;
19201
- }
19202
- async function cleanMirrorForScope(statePath, scope) {
19203
- const target = path.join(statePath, "pages", scope.scopeName);
19204
- await fs4.rm(target, { recursive: true, force: true });
19205
- await fs4.mkdir(target, { recursive: true });
19206
- }
19207
18705
  function segmentToRegex(segment) {
19208
18706
  if (segment.startsWith("(") && segment.endsWith(")")) {
19209
18707
  return { regex: "", score: 0 };
@@ -19396,7 +18894,7 @@ async function parseManifest(cwd, outputDir) {
19396
18894
  const manifestPath = path.resolve(cwd, outputDir, "server", "manifest-full.js");
19397
18895
  let content;
19398
18896
  try {
19399
- content = await fs4.readFile(manifestPath, "utf8");
18897
+ content = await fs3.readFile(manifestPath, "utf8");
19400
18898
  } catch {
19401
18899
  throw new SearchSocketError(
19402
18900
  "BUILD_MANIFEST_NOT_FOUND",
@@ -19569,7 +19067,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
19569
19067
  const visited = /* @__PURE__ */ new Set();
19570
19068
  const pages = [];
19571
19069
  const queue = [];
19572
- const limit = pLimit2(8);
19070
+ const limit = pLimit(8);
19573
19071
  for (const seed of seedUrls) {
19574
19072
  const normalized = normalizeUrlPath(seed);
19575
19073
  if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
@@ -19651,7 +19149,7 @@ async function loadBuildPages(cwd, config, maxPages) {
19651
19149
  const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
19652
19150
  const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
19653
19151
  try {
19654
- const concurrencyLimit = pLimit2(8);
19152
+ const concurrencyLimit = pLimit(8);
19655
19153
  const results = await Promise.allSettled(
19656
19154
  selected.map(
19657
19155
  (route) => concurrencyLimit(async () => {
@@ -19725,7 +19223,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
19725
19223
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
19726
19224
  const pages = [];
19727
19225
  for (const filePath of selected) {
19728
- const raw = await fs4.readFile(filePath, "utf8");
19226
+ const raw = await fs3.readFile(filePath, "utf8");
19729
19227
  const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
19730
19228
  pages.push({
19731
19229
  url: filePathToUrl(filePath, baseDir),
@@ -19820,7 +19318,7 @@ async function loadCrawledPages(config, maxPages) {
19820
19318
  const routes = await resolveRoutes(config);
19821
19319
  const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
19822
19320
  const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
19823
- const concurrencyLimit = pLimit2(8);
19321
+ const concurrencyLimit = pLimit(8);
19824
19322
  const results = await Promise.allSettled(
19825
19323
  selected.map(
19826
19324
  (route) => concurrencyLimit(async () => {
@@ -19861,7 +19359,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
19861
19359
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
19862
19360
  const pages = [];
19863
19361
  for (const filePath of selected) {
19864
- const html = await fs4.readFile(filePath, "utf8");
19362
+ const html = await fs3.readFile(filePath, "utf8");
19865
19363
  pages.push({
19866
19364
  url: staticHtmlFileToUrl(filePath, outputDir),
19867
19365
  html,
@@ -19924,7 +19422,7 @@ function isBlockedByRobots(urlPath, rules3) {
19924
19422
  }
19925
19423
  async function loadRobotsTxtFromDir(dir) {
19926
19424
  try {
19927
- const content = await fs4.readFile(path.join(dir, "robots.txt"), "utf8");
19425
+ const content = await fs3.readFile(path.join(dir, "robots.txt"), "utf8");
19928
19426
  return parseRobotsTxt(content);
19929
19427
  } catch {
19930
19428
  return null;
@@ -19949,7 +19447,12 @@ function nonNegativeOrZero(value) {
19949
19447
  }
19950
19448
  return Math.max(0, value);
19951
19449
  }
19952
- function rankHits(hits, config) {
19450
+ function normalizeForTitleMatch(text) {
19451
+ return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
19452
+ }
19453
+ function rankHits(hits, config, query) {
19454
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
19455
+ const titleMatchWeight = config.ranking.weights.titleMatch;
19953
19456
  return hits.map((hit) => {
19954
19457
  let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
19955
19458
  if (config.ranking.enableIncomingLinkBoost) {
@@ -19960,6 +19463,12 @@ function rankHits(hits, config) {
19960
19463
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
19961
19464
  score += depthBoost * config.ranking.weights.depth;
19962
19465
  }
19466
+ if (normalizedQuery && titleMatchWeight > 0) {
19467
+ const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
19468
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
19469
+ score += titleMatchWeight;
19470
+ }
19471
+ }
19963
19472
  return {
19964
19473
  hit,
19965
19474
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
@@ -19969,6 +19478,30 @@ function rankHits(hits, config) {
19969
19478
  return Number.isNaN(delta) ? 0 : delta;
19970
19479
  });
19971
19480
  }
19481
+ function trimByScoreGap(results, config) {
19482
+ if (results.length === 0) return results;
19483
+ const threshold = config.ranking.scoreGapThreshold;
19484
+ const minScore = config.ranking.minScore;
19485
+ if (minScore > 0 && results.length > 0) {
19486
+ const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
19487
+ const mid = Math.floor(sortedScores.length / 2);
19488
+ const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
19489
+ if (median < minScore) return [];
19490
+ }
19491
+ if (threshold > 0 && results.length > 1) {
19492
+ for (let i = 1; i < results.length; i++) {
19493
+ const prev = results[i - 1].pageScore;
19494
+ const current = results[i].pageScore;
19495
+ if (prev > 0) {
19496
+ const gap = (prev - current) / prev;
19497
+ if (gap >= threshold) {
19498
+ return results.slice(0, i);
19499
+ }
19500
+ }
19501
+ }
19502
+ }
19503
+ return results;
19504
+ }
19972
19505
  function findPageWeight(url, pageWeights) {
19973
19506
  let bestPattern = "";
19974
19507
  let bestWeight = 1;
@@ -20023,6 +19556,61 @@ function aggregateByPage(ranked, config) {
20023
19556
  return Number.isNaN(delta) ? 0 : delta;
20024
19557
  });
20025
19558
  }
19559
+ function mergePageAndChunkResults(pageHits, rankedChunks, config) {
19560
+ if (pageHits.length === 0) return rankedChunks;
19561
+ const w = config.search.pageSearchWeight;
19562
+ const pageScoreMap = /* @__PURE__ */ new Map();
19563
+ for (const ph of pageHits) {
19564
+ pageScoreMap.set(ph.url, ph);
19565
+ }
19566
+ const pagesWithChunks = /* @__PURE__ */ new Set();
19567
+ const merged = rankedChunks.map((ranked) => {
19568
+ const url = ranked.hit.metadata.url;
19569
+ const pageHit = pageScoreMap.get(url);
19570
+ if (pageHit) {
19571
+ pagesWithChunks.add(url);
19572
+ const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
19573
+ return {
19574
+ hit: ranked.hit,
19575
+ finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
19576
+ };
19577
+ }
19578
+ return ranked;
19579
+ });
19580
+ for (const [url, pageHit] of pageScoreMap) {
19581
+ if (pagesWithChunks.has(url)) continue;
19582
+ const syntheticScore = pageHit.score * w;
19583
+ const syntheticHit = {
19584
+ id: `page:${url}`,
19585
+ score: pageHit.score,
19586
+ metadata: {
19587
+ projectId: "",
19588
+ scopeName: "",
19589
+ url: pageHit.url,
19590
+ path: pageHit.url,
19591
+ title: pageHit.title,
19592
+ sectionTitle: "",
19593
+ headingPath: [],
19594
+ snippet: pageHit.description || pageHit.title,
19595
+ chunkText: pageHit.description || pageHit.title,
19596
+ ordinal: 0,
19597
+ contentHash: "",
19598
+ depth: pageHit.depth,
19599
+ incomingLinks: pageHit.incomingLinks,
19600
+ routeFile: pageHit.routeFile,
19601
+ tags: pageHit.tags
19602
+ }
19603
+ };
19604
+ merged.push({
19605
+ hit: syntheticHit,
19606
+ finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
19607
+ });
19608
+ }
19609
+ return merged.sort((a, b) => {
19610
+ const delta = b.finalScore - a.finalScore;
19611
+ return Number.isNaN(delta) ? 0 : delta;
19612
+ });
19613
+ }
20026
19614
 
20027
19615
  // src/utils/time.ts
20028
19616
  function nowIso() {
@@ -20033,34 +19621,41 @@ function hrTimeMs(start) {
20033
19621
  }
20034
19622
 
20035
19623
  // src/indexing/pipeline.ts
20036
- var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
20037
- "jina-embeddings-v3": 2e-5,
20038
- "jina-embeddings-v5-text-small": 5e-5
20039
- };
20040
- var DEFAULT_EMBEDDING_PRICE_PER_1K = 5e-5;
19624
+ function buildPageSummary(page, maxChars = 3500) {
19625
+ const parts = [page.title];
19626
+ if (page.description) {
19627
+ parts.push(page.description);
19628
+ }
19629
+ if (page.keywords && page.keywords.length > 0) {
19630
+ parts.push(page.keywords.join(", "));
19631
+ }
19632
+ const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
19633
+ if (plainBody) {
19634
+ parts.push(plainBody);
19635
+ }
19636
+ const joined = parts.join("\n\n");
19637
+ if (joined.length <= maxChars) return joined;
19638
+ return joined.slice(0, maxChars).trim();
19639
+ }
20041
19640
  var IndexPipeline = class _IndexPipeline {
20042
19641
  cwd;
20043
19642
  config;
20044
- embeddings;
20045
- vectorStore;
19643
+ store;
20046
19644
  logger;
20047
19645
  constructor(options) {
20048
19646
  this.cwd = options.cwd;
20049
19647
  this.config = options.config;
20050
- this.embeddings = options.embeddings;
20051
- this.vectorStore = options.vectorStore;
19648
+ this.store = options.store;
20052
19649
  this.logger = options.logger;
20053
19650
  }
20054
19651
  static async create(options = {}) {
20055
19652
  const cwd = path.resolve(options.cwd ?? process.cwd());
20056
19653
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
20057
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
20058
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
19654
+ const store = options.store ?? await createUpstashStore(config);
20059
19655
  return new _IndexPipeline({
20060
19656
  cwd,
20061
19657
  config,
20062
- embeddings,
20063
- vectorStore,
19658
+ store,
20064
19659
  logger: options.logger ?? new Logger()
20065
19660
  });
20066
19661
  }
@@ -20080,25 +19675,17 @@ var IndexPipeline = class _IndexPipeline {
20080
19675
  stageTimingsMs[name] = Math.round(hrTimeMs(start));
20081
19676
  };
20082
19677
  const scope = resolveScope(this.config, options.scopeOverride);
20083
- const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
19678
+ ensureStateDirs(this.cwd, this.config.state.dir);
20084
19679
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
20085
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
19680
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
20086
19681
  if (options.force) {
20087
19682
  this.logger.info("Force mode enabled \u2014 full rebuild");
20088
- await cleanMirrorForScope(statePath, scope);
20089
19683
  }
20090
19684
  if (options.dryRun) {
20091
19685
  this.logger.info("Dry run \u2014 no writes will be performed");
20092
19686
  }
20093
19687
  const manifestStart = stageStart();
20094
- const existingHashes = await this.vectorStore.getContentHashes(scope);
20095
- const existingModelId = await this.vectorStore.getScopeModelId(scope);
20096
- if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
20097
- throw new SearchSocketError(
20098
- "EMBEDDING_MODEL_MISMATCH",
20099
- `Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
20100
- );
20101
- }
19688
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
20102
19689
  stageEnd("manifest", manifestStart);
20103
19690
  this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20104
19691
  const sourceStart = stageStart();
@@ -20227,9 +19814,9 @@ var IndexPipeline = class _IndexPipeline {
20227
19814
  }
20228
19815
  stageEnd("links", linkStart);
20229
19816
  this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
20230
- const mirrorStart = stageStart();
20231
- this.logger.info("Writing mirror pages...");
20232
- const mirrorPages = [];
19817
+ const pagesStart = stageStart();
19818
+ this.logger.info("Building indexed pages...");
19819
+ const pages = [];
20233
19820
  let routeExact = 0;
20234
19821
  let routeBestEffort = 0;
20235
19822
  const precomputedRoutes = /* @__PURE__ */ new Map();
@@ -20258,7 +19845,7 @@ var IndexPipeline = class _IndexPipeline {
20258
19845
  } else {
20259
19846
  routeExact += 1;
20260
19847
  }
20261
- const mirror = {
19848
+ const indexedPage = {
20262
19849
  url: page.url,
20263
19850
  title: page.title,
20264
19851
  scope: scope.scopeName,
@@ -20273,35 +19860,38 @@ var IndexPipeline = class _IndexPipeline {
20273
19860
  description: page.description,
20274
19861
  keywords: page.keywords
20275
19862
  };
20276
- mirrorPages.push(mirror);
20277
- if (this.config.state.writeMirror) {
20278
- await writeMirrorPage(statePath, scope, mirror);
20279
- }
20280
- this.logger.event("markdown_written", { url: page.url });
19863
+ pages.push(indexedPage);
19864
+ this.logger.event("page_indexed", { url: page.url });
20281
19865
  }
20282
19866
  if (!options.dryRun) {
20283
- const pageRecords = mirrorPages.map((mp) => ({
20284
- url: mp.url,
20285
- title: mp.title,
20286
- markdown: mp.markdown,
20287
- projectId: scope.projectId,
20288
- scopeName: scope.scopeName,
20289
- routeFile: mp.routeFile,
20290
- routeResolution: mp.routeResolution,
20291
- incomingLinks: mp.incomingLinks,
20292
- outgoingLinks: mp.outgoingLinks,
20293
- depth: mp.depth,
20294
- tags: mp.tags,
20295
- indexedAt: mp.generatedAt
20296
- }));
20297
- await this.vectorStore.deletePages(scope);
20298
- await this.vectorStore.upsertPages(pageRecords, scope);
19867
+ const pageRecords = pages.map((p) => {
19868
+ const summary = buildPageSummary(p);
19869
+ return {
19870
+ url: p.url,
19871
+ title: p.title,
19872
+ markdown: p.markdown,
19873
+ projectId: scope.projectId,
19874
+ scopeName: scope.scopeName,
19875
+ routeFile: p.routeFile,
19876
+ routeResolution: p.routeResolution,
19877
+ incomingLinks: p.incomingLinks,
19878
+ outgoingLinks: p.outgoingLinks,
19879
+ depth: p.depth,
19880
+ tags: p.tags,
19881
+ indexedAt: p.generatedAt,
19882
+ summary,
19883
+ description: p.description,
19884
+ keywords: p.keywords
19885
+ };
19886
+ });
19887
+ await this.store.deletePages(scope);
19888
+ await this.store.upsertPages(pageRecords, scope);
20299
19889
  }
20300
- stageEnd("mirror", mirrorStart);
20301
- this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
19890
+ stageEnd("pages", pagesStart);
19891
+ this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
20302
19892
  const chunkStart = stageStart();
20303
19893
  this.logger.info("Chunking pages...");
20304
- let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
19894
+ let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
20305
19895
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
20306
19896
  if (typeof maxChunks === "number") {
20307
19897
  chunks = chunks.slice(0, maxChunks);
@@ -20333,125 +19923,59 @@ var IndexPipeline = class _IndexPipeline {
20333
19923
  });
20334
19924
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
20335
19925
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
20336
- const embedStart = stageStart();
20337
- const chunkTokenEstimates = /* @__PURE__ */ new Map();
20338
- for (const chunk of changedChunks) {
20339
- chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
20340
- }
20341
- const estimatedTokens = changedChunks.reduce(
20342
- (sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
20343
- 0
20344
- );
20345
- const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
20346
- const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
20347
- let newEmbeddings = 0;
20348
- const vectorsByChunk = /* @__PURE__ */ new Map();
19926
+ const upsertStart = stageStart();
19927
+ let documentsUpserted = 0;
20349
19928
  if (!options.dryRun && changedChunks.length > 0) {
20350
- this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
20351
- const embeddings = await this.embeddings.embedTexts(
20352
- changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
20353
- this.config.embeddings.model,
20354
- "retrieval.passage"
20355
- );
20356
- if (embeddings.length !== changedChunks.length) {
20357
- throw new SearchSocketError(
20358
- "VECTOR_BACKEND_UNAVAILABLE",
20359
- `Embedding provider returned ${embeddings.length} vectors for ${changedChunks.length} chunks.`
20360
- );
20361
- }
20362
- for (let i = 0; i < changedChunks.length; i += 1) {
20363
- const chunk = changedChunks[i];
20364
- const embedding = embeddings[i];
20365
- if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
20366
- throw new SearchSocketError(
20367
- "VECTOR_BACKEND_UNAVAILABLE",
20368
- `Embedding provider returned an invalid vector for chunk index ${i}.`
20369
- );
20370
- }
20371
- vectorsByChunk.set(chunk.chunkKey, embedding);
20372
- newEmbeddings += 1;
20373
- this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
20374
- }
20375
- }
20376
- stageEnd("embedding", embedStart);
20377
- if (changedChunks.length > 0) {
20378
- this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
20379
- } else {
20380
- this.logger.info("No chunks to embed \u2014 all up to date");
20381
- }
20382
- const syncStart = stageStart();
20383
- if (!options.dryRun) {
20384
- this.logger.info("Syncing vectors...");
20385
- const upserts = [];
20386
- for (const chunk of changedChunks) {
20387
- const vector = vectorsByChunk.get(chunk.chunkKey);
20388
- if (!vector) {
20389
- continue;
20390
- }
20391
- upserts.push({
19929
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
19930
+ const UPSTASH_CONTENT_LIMIT = 4096;
19931
+ const docs = changedChunks.map((chunk) => {
19932
+ const title = chunk.title;
19933
+ const sectionTitle = chunk.sectionTitle ?? "";
19934
+ const url = chunk.url;
19935
+ const tags = chunk.tags.join(",");
19936
+ const headingPath = chunk.headingPath.join(" > ");
19937
+ const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
19938
+ const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
19939
+ const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
19940
+ return {
20392
19941
  id: chunk.chunkKey,
20393
- vector,
19942
+ content: { title, sectionTitle, text, url, tags, headingPath },
20394
19943
  metadata: {
20395
19944
  projectId: scope.projectId,
20396
19945
  scopeName: scope.scopeName,
20397
- url: chunk.url,
20398
19946
  path: chunk.path,
20399
- title: chunk.title,
20400
- sectionTitle: chunk.sectionTitle ?? "",
20401
- headingPath: chunk.headingPath,
20402
19947
  snippet: chunk.snippet,
20403
- chunkText: chunk.chunkText.slice(0, 4e3),
20404
19948
  ordinal: chunk.ordinal,
20405
19949
  contentHash: chunk.contentHash,
20406
- modelId: this.config.embeddings.model,
20407
19950
  depth: chunk.depth,
20408
19951
  incomingLinks: chunk.incomingLinks,
20409
19952
  routeFile: chunk.routeFile,
20410
- tags: chunk.tags,
20411
- description: chunk.description,
20412
- keywords: chunk.keywords
19953
+ description: chunk.description ?? "",
19954
+ keywords: (chunk.keywords ?? []).join(",")
20413
19955
  }
20414
- });
20415
- }
20416
- if (upserts.length > 0) {
20417
- await this.vectorStore.upsert(upserts, scope);
20418
- this.logger.event("upserted", { count: upserts.length });
20419
- }
20420
- if (deletes.length > 0) {
20421
- await this.vectorStore.deleteByIds(deletes, scope);
20422
- this.logger.event("deleted", { count: deletes.length });
20423
- }
20424
- }
20425
- stageEnd("sync", syncStart);
20426
- this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
20427
- const finalizeStart = stageStart();
20428
- if (!options.dryRun) {
20429
- const scopeInfo = {
20430
- projectId: scope.projectId,
20431
- scopeName: scope.scopeName,
20432
- modelId: this.config.embeddings.model,
20433
- lastIndexedAt: nowIso(),
20434
- vectorCount: chunks.length,
20435
- lastEstimateTokens: estimatedTokens,
20436
- lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
20437
- lastEstimateChangedChunks: changedChunks.length
20438
- };
20439
- await this.vectorStore.recordScope(scopeInfo);
20440
- this.logger.event("registry_updated", {
20441
- scope: scope.scopeName,
20442
- vectorCount: chunks.length
19956
+ };
20443
19957
  });
19958
+ await this.store.upsertChunks(docs, scope);
19959
+ documentsUpserted = docs.length;
19960
+ this.logger.event("upserted", { count: docs.length });
19961
+ }
19962
+ if (!options.dryRun && deletes.length > 0) {
19963
+ await this.store.deleteByIds(deletes, scope);
19964
+ this.logger.event("deleted", { count: deletes.length });
19965
+ }
19966
+ stageEnd("upsert", upsertStart);
19967
+ if (changedChunks.length > 0) {
19968
+ this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
19969
+ } else {
19970
+ this.logger.info("No chunks to upsert \u2014 all up to date");
20444
19971
  }
20445
- stageEnd("finalize", finalizeStart);
20446
19972
  this.logger.info("Done.");
20447
19973
  return {
20448
- pagesProcessed: mirrorPages.length,
19974
+ pagesProcessed: pages.length,
20449
19975
  chunksTotal: chunks.length,
20450
19976
  chunksChanged: changedChunks.length,
20451
- newEmbeddings,
19977
+ documentsUpserted,
20452
19978
  deletes: deletes.length,
20453
- estimatedTokens,
20454
- estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
20455
19979
  routeExact,
20456
19980
  routeBestEffort,
20457
19981
  stageTimingsMs
@@ -20464,35 +19988,25 @@ var requestSchema = z.object({
20464
19988
  scope: z.string().optional(),
20465
19989
  pathPrefix: z.string().optional(),
20466
19990
  tags: z.array(z.string()).optional(),
20467
- rerank: z.boolean().optional(),
20468
- groupBy: z.enum(["page", "chunk"]).optional(),
20469
- stream: z.boolean().optional()
19991
+ groupBy: z.enum(["page", "chunk"]).optional()
20470
19992
  });
20471
19993
  var SearchEngine = class _SearchEngine {
20472
19994
  cwd;
20473
19995
  config;
20474
- embeddings;
20475
- vectorStore;
20476
- reranker;
19996
+ store;
20477
19997
  constructor(options) {
20478
19998
  this.cwd = options.cwd;
20479
19999
  this.config = options.config;
20480
- this.embeddings = options.embeddings;
20481
- this.vectorStore = options.vectorStore;
20482
- this.reranker = options.reranker;
20000
+ this.store = options.store;
20483
20001
  }
20484
20002
  static async create(options = {}) {
20485
20003
  const cwd = path.resolve(options.cwd ?? process.cwd());
20486
20004
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
20487
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
20488
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
20489
- const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
20005
+ const store = options.store ?? await createUpstashStore(config);
20490
20006
  return new _SearchEngine({
20491
20007
  cwd,
20492
20008
  config,
20493
- embeddings,
20494
- vectorStore,
20495
- reranker
20009
+ store
20496
20010
  });
20497
20011
  }
20498
20012
  getConfig() {
@@ -20506,142 +20020,90 @@ var SearchEngine = class _SearchEngine {
20506
20020
  const input = parsed.data;
20507
20021
  const totalStart = process.hrtime.bigint();
20508
20022
  const resolvedScope = resolveScope(this.config, input.scope);
20509
- await this.assertModelCompatibility(resolvedScope);
20510
20023
  const topK = input.topK ?? 10;
20511
- const wantsRerank = Boolean(input.rerank);
20512
20024
  const groupByPage = (input.groupBy ?? "page") === "page";
20513
20025
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20514
- const embedStart = process.hrtime.bigint();
20515
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
20516
- const queryVector = queryEmbeddings[0];
20517
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
20518
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
20519
- }
20520
- const embedMs = hrTimeMs(embedStart);
20521
- const vectorStart = process.hrtime.bigint();
20522
- const hits = await this.vectorStore.query(
20523
- queryVector,
20524
- {
20525
- topK: candidateK,
20526
- pathPrefix: input.pathPrefix,
20527
- tags: input.tags
20528
- },
20529
- resolvedScope
20530
- );
20531
- const vectorMs = hrTimeMs(vectorStart);
20532
- const ranked = rankHits(hits, this.config);
20533
- let usedRerank = false;
20534
- let rerankMs = 0;
20535
- let ordered = ranked;
20536
- if (wantsRerank) {
20537
- const rerankStart = process.hrtime.bigint();
20538
- ordered = await this.rerankHits(input.q, ranked, topK);
20539
- rerankMs = hrTimeMs(rerankStart);
20540
- usedRerank = true;
20026
+ const filterParts = [];
20027
+ if (input.pathPrefix) {
20028
+ const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
20029
+ filterParts.push(`url GLOB '${prefix}*'`);
20030
+ }
20031
+ if (input.tags && input.tags.length > 0) {
20032
+ for (const tag of input.tags) {
20033
+ filterParts.push(`tags GLOB '*${tag}*'`);
20034
+ }
20035
+ }
20036
+ const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
20037
+ const useDualSearch = this.config.search.dualSearch && groupByPage;
20038
+ const searchStart = process.hrtime.bigint();
20039
+ let ranked;
20040
+ if (useDualSearch) {
20041
+ const chunkLimit = Math.max(topK * 10, 100);
20042
+ const pageLimit = 20;
20043
+ const [pageHits, chunkHits] = await Promise.all([
20044
+ this.store.searchPages(
20045
+ input.q,
20046
+ {
20047
+ limit: pageLimit,
20048
+ semanticWeight: this.config.search.semanticWeight,
20049
+ inputEnrichment: this.config.search.inputEnrichment,
20050
+ filter
20051
+ },
20052
+ resolvedScope
20053
+ ),
20054
+ this.store.search(
20055
+ input.q,
20056
+ {
20057
+ limit: chunkLimit,
20058
+ semanticWeight: this.config.search.semanticWeight,
20059
+ inputEnrichment: this.config.search.inputEnrichment,
20060
+ reranking: false,
20061
+ filter
20062
+ },
20063
+ resolvedScope
20064
+ )
20065
+ ]);
20066
+ const rankedChunks = rankHits(chunkHits, this.config, input.q);
20067
+ ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
20068
+ } else {
20069
+ const hits = await this.store.search(
20070
+ input.q,
20071
+ {
20072
+ limit: candidateK,
20073
+ semanticWeight: this.config.search.semanticWeight,
20074
+ inputEnrichment: this.config.search.inputEnrichment,
20075
+ reranking: this.config.search.reranking,
20076
+ filter
20077
+ },
20078
+ resolvedScope
20079
+ );
20080
+ ranked = rankHits(hits, this.config, input.q);
20541
20081
  }
20542
- const results = this.buildResults(ordered, topK, groupByPage);
20082
+ const searchMs = hrTimeMs(searchStart);
20083
+ const results = this.buildResults(ranked, topK, groupByPage, input.q);
20543
20084
  return {
20544
20085
  q: input.q,
20545
20086
  scope: resolvedScope.scopeName,
20546
20087
  results,
20547
20088
  meta: {
20548
20089
  timingsMs: {
20549
- embed: Math.round(embedMs),
20550
- vector: Math.round(vectorMs),
20551
- rerank: Math.round(rerankMs),
20090
+ search: Math.round(searchMs),
20552
20091
  total: Math.round(hrTimeMs(totalStart))
20553
- },
20554
- usedRerank,
20555
- modelId: this.config.embeddings.model
20556
- }
20557
- };
20558
- }
20559
- async *searchStreaming(request) {
20560
- const parsed = requestSchema.safeParse(request);
20561
- if (!parsed.success) {
20562
- throw new SearchSocketError("INVALID_REQUEST", parsed.error.issues[0]?.message ?? "Invalid request", 400);
20563
- }
20564
- const input = parsed.data;
20565
- const wantsRerank = Boolean(input.rerank);
20566
- if (!wantsRerank) {
20567
- const response = await this.search(request);
20568
- yield { phase: "initial", data: response };
20569
- return;
20570
- }
20571
- const totalStart = process.hrtime.bigint();
20572
- const resolvedScope = resolveScope(this.config, input.scope);
20573
- await this.assertModelCompatibility(resolvedScope);
20574
- const topK = input.topK ?? 10;
20575
- const groupByPage = (input.groupBy ?? "page") === "page";
20576
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20577
- const embedStart = process.hrtime.bigint();
20578
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
20579
- const queryVector = queryEmbeddings[0];
20580
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
20581
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
20582
- }
20583
- const embedMs = hrTimeMs(embedStart);
20584
- const vectorStart = process.hrtime.bigint();
20585
- const hits = await this.vectorStore.query(
20586
- queryVector,
20587
- {
20588
- topK: candidateK,
20589
- pathPrefix: input.pathPrefix,
20590
- tags: input.tags
20591
- },
20592
- resolvedScope
20593
- );
20594
- const vectorMs = hrTimeMs(vectorStart);
20595
- const ranked = rankHits(hits, this.config);
20596
- const initialResults = this.buildResults(ranked, topK, groupByPage);
20597
- yield {
20598
- phase: "initial",
20599
- data: {
20600
- q: input.q,
20601
- scope: resolvedScope.scopeName,
20602
- results: initialResults,
20603
- meta: {
20604
- timingsMs: {
20605
- embed: Math.round(embedMs),
20606
- vector: Math.round(vectorMs),
20607
- rerank: 0,
20608
- total: Math.round(hrTimeMs(totalStart))
20609
- },
20610
- usedRerank: false,
20611
- modelId: this.config.embeddings.model
20612
- }
20613
- }
20614
- };
20615
- const rerankStart = process.hrtime.bigint();
20616
- const reranked = await this.rerankHits(input.q, ranked, topK);
20617
- const rerankMs = hrTimeMs(rerankStart);
20618
- const rerankedResults = this.buildResults(reranked, topK, groupByPage);
20619
- yield {
20620
- phase: "reranked",
20621
- data: {
20622
- q: input.q,
20623
- scope: resolvedScope.scopeName,
20624
- results: rerankedResults,
20625
- meta: {
20626
- timingsMs: {
20627
- embed: Math.round(embedMs),
20628
- vector: Math.round(vectorMs),
20629
- rerank: Math.round(rerankMs),
20630
- total: Math.round(hrTimeMs(totalStart))
20631
- },
20632
- usedRerank: true,
20633
- modelId: this.config.embeddings.model
20634
20092
  }
20635
20093
  }
20636
20094
  };
20637
20095
  }
20638
- buildResults(ordered, topK, groupByPage) {
20639
- const minScore = this.config.ranking.minScore;
20096
+ ensureSnippet(hit) {
20097
+ const snippet = hit.hit.metadata.snippet;
20098
+ if (snippet && snippet.length >= 30) return snippet;
20099
+ const chunkText = hit.hit.metadata.chunkText;
20100
+ if (chunkText) return toSnippet(chunkText);
20101
+ return snippet || "";
20102
+ }
20103
+ buildResults(ordered, topK, groupByPage, _query) {
20640
20104
  if (groupByPage) {
20641
20105
  let pages = aggregateByPage(ordered, this.config);
20642
- if (minScore > 0) {
20643
- pages = pages.filter((p) => p.pageScore >= minScore);
20644
- }
20106
+ pages = trimByScoreGap(pages, this.config);
20645
20107
  const minRatio = this.config.ranking.minChunkScoreRatio;
20646
20108
  return pages.slice(0, topK).map((page) => {
20647
20109
  const bestScore = page.bestChunk.finalScore;
@@ -20651,12 +20113,12 @@ var SearchEngine = class _SearchEngine {
20651
20113
  url: page.url,
20652
20114
  title: page.title,
20653
20115
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
20654
- snippet: page.bestChunk.hit.metadata.snippet,
20116
+ snippet: this.ensureSnippet(page.bestChunk),
20655
20117
  score: Number(page.pageScore.toFixed(6)),
20656
20118
  routeFile: page.routeFile,
20657
20119
  chunks: meaningful.length > 1 ? meaningful.map((c) => ({
20658
20120
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
20659
- snippet: c.hit.metadata.snippet,
20121
+ snippet: this.ensureSnippet(c),
20660
20122
  headingPath: c.hit.metadata.headingPath,
20661
20123
  score: Number(c.finalScore.toFixed(6))
20662
20124
  })) : void 0
@@ -20664,6 +20126,7 @@ var SearchEngine = class _SearchEngine {
20664
20126
  });
20665
20127
  } else {
20666
20128
  let filtered = ordered;
20129
+ const minScore = this.config.ranking.minScore;
20667
20130
  if (minScore > 0) {
20668
20131
  filtered = ordered.filter((entry) => entry.finalScore >= minScore);
20669
20132
  }
@@ -20671,7 +20134,7 @@ var SearchEngine = class _SearchEngine {
20671
20134
  url: hit.metadata.url,
20672
20135
  title: hit.metadata.title,
20673
20136
  sectionTitle: hit.metadata.sectionTitle || void 0,
20674
- snippet: hit.metadata.snippet,
20137
+ snippet: this.ensureSnippet({ hit, finalScore }),
20675
20138
  score: Number(finalScore.toFixed(6)),
20676
20139
  routeFile: hit.metadata.routeFile
20677
20140
  }));
@@ -20680,7 +20143,7 @@ var SearchEngine = class _SearchEngine {
20680
20143
  async getPage(pathOrUrl, scope) {
20681
20144
  const resolvedScope = resolveScope(this.config, scope);
20682
20145
  const urlPath = this.resolveInputPath(pathOrUrl);
20683
- const page = await this.vectorStore.getPage(urlPath, resolvedScope);
20146
+ const page = await this.store.getPage(urlPath, resolvedScope);
20684
20147
  if (!page) {
20685
20148
  throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
20686
20149
  }
@@ -20701,7 +20164,7 @@ var SearchEngine = class _SearchEngine {
20701
20164
  };
20702
20165
  }
20703
20166
  async health() {
20704
- return this.vectorStore.health();
20167
+ return this.store.health();
20705
20168
  }
20706
20169
  resolveInputPath(pathOrUrl) {
20707
20170
  try {
@@ -20713,92 +20176,8 @@ var SearchEngine = class _SearchEngine {
20713
20176
  const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
20714
20177
  return normalizeUrlPath(withoutQueryOrHash);
20715
20178
  }
20716
- async assertModelCompatibility(scope) {
20717
- const modelId = await this.vectorStore.getScopeModelId(scope);
20718
- if (modelId && modelId !== this.config.embeddings.model) {
20719
- throw new SearchSocketError(
20720
- "EMBEDDING_MODEL_MISMATCH",
20721
- `Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
20722
- );
20723
- }
20724
- }
20725
- async rerankHits(query, ranked, topK) {
20726
- if (!this.config.rerank.enabled) {
20727
- throw new SearchSocketError(
20728
- "INVALID_REQUEST",
20729
- "rerank=true requested but rerank.enabled is not set to true.",
20730
- 400
20731
- );
20732
- }
20733
- if (!this.reranker) {
20734
- throw new SearchSocketError(
20735
- "CONFIG_MISSING",
20736
- `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
20737
- 400
20738
- );
20739
- }
20740
- const pageGroups = /* @__PURE__ */ new Map();
20741
- for (const entry of ranked) {
20742
- const url = entry.hit.metadata.url;
20743
- const group = pageGroups.get(url);
20744
- if (group) group.push(entry);
20745
- else pageGroups.set(url, [entry]);
20746
- }
20747
- const MAX_CHUNKS_PER_PAGE = 5;
20748
- const MIN_CHUNKS_PER_PAGE = 1;
20749
- const MIN_CHUNK_SCORE_RATIO = 0.5;
20750
- const MAX_DOC_CHARS = 2e3;
20751
- const pageCandidates = [];
20752
- for (const [url, chunks] of pageGroups) {
20753
- const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
20754
- const bestScore = byScore[0].finalScore;
20755
- const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
20756
- const selected = byScore.filter(
20757
- (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
20758
- ).slice(0, MAX_CHUNKS_PER_PAGE);
20759
- selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
20760
- const first = selected[0].hit.metadata;
20761
- const parts = [first.title];
20762
- if (first.description) {
20763
- parts.push(first.description);
20764
- }
20765
- if (first.keywords && first.keywords.length > 0) {
20766
- parts.push(first.keywords.join(", "));
20767
- }
20768
- const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20769
- parts.push(body);
20770
- let text = parts.join("\n\n");
20771
- if (text.length > MAX_DOC_CHARS) {
20772
- text = text.slice(0, MAX_DOC_CHARS);
20773
- }
20774
- pageCandidates.push({ id: url, text });
20775
- }
20776
- const maxCandidates = Math.max(topK, this.config.rerank.topN);
20777
- const cappedCandidates = pageCandidates.slice(0, maxCandidates);
20778
- const reranked = await this.reranker.rerank(
20779
- query,
20780
- cappedCandidates,
20781
- maxCandidates
20782
- );
20783
- const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
20784
- return ranked.map((entry) => {
20785
- const pageScore = scoreByUrl.get(entry.hit.metadata.url);
20786
- const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
20787
- if (pageScore === void 0 || !Number.isFinite(pageScore)) {
20788
- return { ...entry, finalScore: base };
20789
- }
20790
- const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
20791
- return {
20792
- ...entry,
20793
- finalScore: Number.isFinite(combined) ? combined : base
20794
- };
20795
- }).sort((a, b) => {
20796
- const delta = b.finalScore - a.finalScore;
20797
- return Number.isNaN(delta) ? 0 : delta;
20798
- });
20799
- }
20800
20179
  };
20801
- function createServer(engine, config) {
20180
+ function createServer(engine) {
20802
20181
  const server = new McpServer({
20803
20182
  name: "searchsocket-mcp",
20804
20183
  version: "0.1.0"
@@ -20806,15 +20185,14 @@ function createServer(engine, config) {
20806
20185
  server.registerTool(
20807
20186
  "search",
20808
20187
  {
20809
- description: "Semantic site search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and rerank. Enable rerank for better relevance on natural-language queries.",
20188
+ description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
20810
20189
  inputSchema: {
20811
20190
  query: z.string().min(1),
20812
20191
  scope: z.string().optional(),
20813
20192
  topK: z.number().int().positive().max(100).optional(),
20814
20193
  pathPrefix: z.string().optional(),
20815
20194
  tags: z.array(z.string()).optional(),
20816
- groupBy: z.enum(["page", "chunk"]).optional(),
20817
- rerank: z.boolean().optional().describe("Enable reranking for better relevance (uses Jina Reranker). Defaults to true when rerank is enabled in config.")
20195
+ groupBy: z.enum(["page", "chunk"]).optional()
20818
20196
  }
20819
20197
  },
20820
20198
  async (input) => {
@@ -20824,8 +20202,7 @@ function createServer(engine, config) {
20824
20202
  scope: input.scope,
20825
20203
  pathPrefix: input.pathPrefix,
20826
20204
  tags: input.tags,
20827
- groupBy: input.groupBy,
20828
- rerank: input.rerank ?? config.rerank.enabled
20205
+ groupBy: input.groupBy
20829
20206
  });
20830
20207
  return {
20831
20208
  content: [
@@ -20951,10 +20328,10 @@ async function runMcpServer(options = {}) {
20951
20328
  config
20952
20329
  });
20953
20330
  if (resolvedTransport === "http") {
20954
- await startHttpServer(() => createServer(engine, config), config, options);
20331
+ await startHttpServer(() => createServer(engine), config, options);
20955
20332
  return;
20956
20333
  }
20957
- const server = createServer(engine, config);
20334
+ const server = createServer(engine);
20958
20335
  const stdioTransport = new StdioServerTransport();
20959
20336
  await server.connect(stdioTransport);
20960
20337
  }
@@ -21111,42 +20488,6 @@ function searchsocketHandle(options = {}) {
21111
20488
  }
21112
20489
  const engine = await getEngine();
21113
20490
  const searchRequest = body;
21114
- if (searchRequest.stream && searchRequest.rerank) {
21115
- const encoder = new TextEncoder();
21116
- const stream = new ReadableStream({
21117
- async start(controller) {
21118
- try {
21119
- for await (const event2 of engine.searchStreaming(searchRequest)) {
21120
- const line = JSON.stringify(event2) + "\n";
21121
- controller.enqueue(encoder.encode(line));
21122
- }
21123
- } catch (streamError) {
21124
- const errorEvent = {
21125
- phase: "error",
21126
- data: {
21127
- error: {
21128
- code: streamError instanceof SearchSocketError ? streamError.code : "INTERNAL_ERROR",
21129
- message: streamError instanceof Error ? streamError.message : "Unknown error"
21130
- }
21131
- }
21132
- };
21133
- controller.enqueue(encoder.encode(JSON.stringify(errorEvent) + "\n"));
21134
- } finally {
21135
- controller.close();
21136
- }
21137
- }
21138
- });
21139
- return withCors(
21140
- new Response(stream, {
21141
- status: 200,
21142
- headers: {
21143
- "content-type": "application/x-ndjson"
21144
- }
21145
- }),
21146
- event.request,
21147
- config
21148
- );
21149
- }
21150
20491
  const result = await engine.search(searchRequest);
21151
20492
  return withCors(
21152
20493
  new Response(JSON.stringify(result), {
@@ -21232,13 +20573,6 @@ function searchsocketVitePlugin(options = {}) {
21232
20573
  let running = false;
21233
20574
  return {
21234
20575
  name: "searchsocket:auto-index",
21235
- config() {
21236
- return {
21237
- ssr: {
21238
- external: ["@libsql/client", "libsql"]
21239
- }
21240
- };
21241
- },
21242
20576
  async closeBundle() {
21243
20577
  if (executed || running) {
21244
20578
  return;
@@ -21266,9 +20600,8 @@ function searchsocketVitePlugin(options = {}) {
21266
20600
  verbose: options.verbose
21267
20601
  });
21268
20602
  logger3.info(
21269
- `[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} embedded=${stats.newEmbeddings}`
20603
+ `[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} upserted=${stats.documentsUpserted}`
21270
20604
  );
21271
- logger3.info("[searchsocket] markdown mirror written under .searchsocket/pages/<scope> (safe to commit for content workflows).");
21272
20605
  executed = true;
21273
20606
  } finally {
21274
20607
  running = false;
@@ -21277,60 +20610,6 @@ function searchsocketVitePlugin(options = {}) {
21277
20610
  };
21278
20611
  }
21279
20612
 
21280
- // src/merge.ts
21281
- function mergeSearchResults(initial, reranked, options) {
21282
- const maxDisplacement = options?.maxDisplacement ?? 3;
21283
- const initialUrls = initial.results.map((r) => r.url);
21284
- const rerankedUrls = reranked.results.map((r) => r.url);
21285
- const initialPos = /* @__PURE__ */ new Map();
21286
- for (let i = 0; i < initialUrls.length; i++) {
21287
- initialPos.set(initialUrls[i], i);
21288
- }
21289
- const rerankedPos = /* @__PURE__ */ new Map();
21290
- for (let i = 0; i < rerankedUrls.length; i++) {
21291
- rerankedPos.set(rerankedUrls[i], i);
21292
- }
21293
- const displacements = [];
21294
- for (const url of initialUrls) {
21295
- const iPos = initialPos.get(url);
21296
- const rPos = rerankedPos.get(url);
21297
- const displacement = rPos !== void 0 ? Math.abs(iPos - rPos) : 0;
21298
- displacements.push({ url, displacement });
21299
- }
21300
- const totalResults = displacements.length;
21301
- if (totalResults === 0) {
21302
- return {
21303
- response: reranked,
21304
- usedRerankedOrder: true,
21305
- displacements
21306
- };
21307
- }
21308
- const hasLargeDisplacement = displacements.some((d) => d.displacement > maxDisplacement);
21309
- if (hasLargeDisplacement) {
21310
- return {
21311
- response: reranked,
21312
- usedRerankedOrder: true,
21313
- displacements
21314
- };
21315
- }
21316
- const rerankedScoreMap = /* @__PURE__ */ new Map();
21317
- for (const result of reranked.results) {
21318
- rerankedScoreMap.set(result.url, result.score);
21319
- }
21320
- const mergedResults = initial.results.map((result) => ({
21321
- ...result,
21322
- score: rerankedScoreMap.get(result.url) ?? result.score
21323
- }));
21324
- return {
21325
- response: {
21326
- ...reranked,
21327
- results: mergedResults
21328
- },
21329
- usedRerankedOrder: false,
21330
- displacements
21331
- };
21332
- }
21333
-
21334
20613
  // src/client.ts
21335
20614
  function createSearchClient(options = {}) {
21336
20615
  const endpoint = options.endpoint ?? "/api/search";
@@ -21358,72 +20637,6 @@ function createSearchClient(options = {}) {
21358
20637
  throw new Error(message);
21359
20638
  }
21360
20639
  return payload;
21361
- },
21362
- async streamSearch(request, onPhase) {
21363
- const response = await fetchImpl(endpoint, {
21364
- method: "POST",
21365
- headers: {
21366
- "content-type": "application/json"
21367
- },
21368
- body: JSON.stringify(request)
21369
- });
21370
- if (!response.ok) {
21371
- let payload;
21372
- try {
21373
- payload = await response.json();
21374
- } catch {
21375
- throw new Error("Search failed");
21376
- }
21377
- const message = payload.error?.message ?? "Search failed";
21378
- throw new Error(message);
21379
- }
21380
- const contentType = response.headers.get("content-type") ?? "";
21381
- if (contentType.includes("application/json")) {
21382
- const data = await response.json();
21383
- onPhase({ phase: "initial", data });
21384
- return data;
21385
- }
21386
- if (!response.body) {
21387
- throw new Error("Response body is not readable");
21388
- }
21389
- const reader = response.body.getReader();
21390
- const decoder = new TextDecoder();
21391
- let buffer = "";
21392
- let lastResponse = null;
21393
- for (; ; ) {
21394
- const { done, value } = await reader.read();
21395
- if (done) break;
21396
- buffer += decoder.decode(value, { stream: true });
21397
- let newlineIdx;
21398
- while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
21399
- const line = buffer.slice(0, newlineIdx).trim();
21400
- buffer = buffer.slice(newlineIdx + 1);
21401
- if (line.length === 0) continue;
21402
- const event = JSON.parse(line);
21403
- if (event.phase === "error") {
21404
- const errData = event.data;
21405
- throw new Error(errData.error.message ?? "Streaming search error");
21406
- }
21407
- const searchEvent = event;
21408
- onPhase(searchEvent);
21409
- lastResponse = searchEvent.data;
21410
- }
21411
- }
21412
- const remaining = buffer.trim();
21413
- if (remaining.length > 0) {
21414
- const event = JSON.parse(remaining);
21415
- if (event.phase === "error") {
21416
- const errData = event.data;
21417
- throw new Error(errData.error.message ?? "Streaming search error");
21418
- }
21419
- const searchEvent = event;
21420
- onPhase(searchEvent);
21421
- lastResponse = searchEvent.data;
21422
- }
21423
- if (!lastResponse) {
21424
- throw new Error("No search results received");
21425
- }
21426
- return lastResponse;
21427
20640
  }
21428
20641
  };
21429
20642
  }
@@ -21439,6 +20652,6 @@ function createSearchClient(options = {}) {
21439
20652
  *)
21440
20653
  */
21441
20654
 
21442
- export { IndexPipeline, JinaReranker, SearchEngine, createEmbeddingsProvider, createReranker, createSearchClient, createVectorStore, isServerless, loadConfig, mergeConfig, mergeConfigServerless, mergeSearchResults, resolveScope, runMcpServer, searchsocketHandle, searchsocketVitePlugin };
20655
+ export { IndexPipeline, SearchEngine, UpstashSearchStore, createSearchClient, createUpstashStore, isServerless, loadConfig, mergeConfig, mergeConfigServerless, resolveScope, runMcpServer, searchsocketHandle, searchsocketVitePlugin };
21443
20656
  //# sourceMappingURL=index.js.map
21444
20657
  //# sourceMappingURL=index.js.map